contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

   1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the PPCISelLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "PPCISelLowering.h"
  15 #include "MCTargetDesc/PPCPredicates.h"
  16 #include "PPC.h"
  17 #include "PPCCCState.h"
  18 #include "PPCCallingConv.h"
  19 #include "PPCFrameLowering.h"
  20 #include "PPCInstrInfo.h"
  21 #include "PPCMachineFunctionInfo.h"
  22 #include "PPCPerfectShuffle.h"
  23 #include "PPCRegisterInfo.h"
  24 #include "PPCSubtarget.h"
  25 #include "PPCTargetMachine.h"
  26 #include "llvm/ADT/APFloat.h"
  27 #include "llvm/ADT/APInt.h"
  28 #include "llvm/ADT/ArrayRef.h"
  29 #include "llvm/ADT/DenseMap.h"
  30 #include "llvm/ADT/None.h"
  31 #include "llvm/ADT/STLExtras.h"
  32 #include "llvm/ADT/SmallPtrSet.h"
  33 #include "llvm/ADT/SmallSet.h"
  34 #include "llvm/ADT/SmallVector.h"
  35 #include "llvm/ADT/Statistic.h"
  36 #include "llvm/ADT/StringRef.h"
  37 #include "llvm/ADT/StringSwitch.h"
  38 #include "llvm/CodeGen/CallingConvLower.h"
  39 #include "llvm/CodeGen/ISDOpcodes.h"
  40 #include "llvm/CodeGen/MachineBasicBlock.h"
  41 #include "llvm/CodeGen/MachineFrameInfo.h"
  42 #include "llvm/CodeGen/MachineFunction.h"
  43 #include "llvm/CodeGen/MachineInstr.h"
  44 #include "llvm/CodeGen/MachineInstrBuilder.h"
  45 #include "llvm/CodeGen/MachineJumpTableInfo.h"
  46 #include "llvm/CodeGen/MachineLoopInfo.h"
  47 #include "llvm/CodeGen/MachineMemOperand.h"
  48 #include "llvm/CodeGen/MachineOperand.h"
  49 #include "llvm/CodeGen/MachineRegisterInfo.h"
  50 #include "llvm/CodeGen/MachineValueType.h"
  51 #include "llvm/CodeGen/RuntimeLibcalls.h"
  52 #include "llvm/CodeGen/SelectionDAG.h"
  53 #include "llvm/CodeGen/SelectionDAGNodes.h"
  54 #include "llvm/CodeGen/ValueTypes.h"
  55 #include "llvm/IR/CallSite.h"
  56 #include "llvm/IR/CallingConv.h"
  57 #include "llvm/IR/Constant.h"
  58 #include "llvm/IR/Constants.h"
  59 #include "llvm/IR/DataLayout.h"
  60 #include "llvm/IR/DebugLoc.h"
  61 #include "llvm/IR/DerivedTypes.h"
  62 #include "llvm/IR/Function.h"
  63 #include "llvm/IR/GlobalValue.h"
  64 #include "llvm/IR/IRBuilder.h"
  65 #include "llvm/IR/Instructions.h"
  66 #include "llvm/IR/Intrinsics.h"
  67 #include "llvm/IR/Module.h"
  68 #include "llvm/IR/Type.h"
  69 #include "llvm/IR/Use.h"
  70 #include "llvm/IR/Value.h"
  71 #include "llvm/MC/MCExpr.h"
  72 #include "llvm/MC/MCRegisterInfo.h"
  73 #include "llvm/Support/AtomicOrdering.h"
  74 #include "llvm/Support/BranchProbability.h"
  75 #include "llvm/Support/Casting.h"
  76 #include "llvm/Support/CodeGen.h"
  77 #include "llvm/Support/CommandLine.h"
  78 #include "llvm/Support/Compiler.h"
  79 #include "llvm/Support/Debug.h"
  80 #include "llvm/Support/ErrorHandling.h"
  81 #include "llvm/Support/Format.h"
  82 #include "llvm/Support/KnownBits.h"
  83 #include "llvm/Support/MathExtras.h"
  84 #include "llvm/Support/raw_ostream.h"
  85 #include "llvm/Target/TargetInstrInfo.h"
  86 #include "llvm/Target/TargetLowering.h"
  87 #include "llvm/Target/TargetMachine.h"
  88 #include "llvm/Target/TargetOptions.h"
  89 #include "llvm/Target/TargetRegisterInfo.h"
  90 #include <algorithm>
  91 #include <cassert>
  92 #include <cstdint>
  93 #include <iterator>
  94 #include <list>
  95 #include <utility>
  96 #include <vector>
  97
  98 using namespace llvm;
  99
 100 #define DEBUG_TYPE "ppc-lowering"
 101
 102 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
 103 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
 104
 105 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
 106 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
 107
 108 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
 109 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
 110
 111 static cl::opt<bool> DisableSCO("disable-ppc-sco",
 112 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
 113
 114 STATISTIC(NumTailCalls, "Number of tail calls");
 115 STATISTIC(NumSiblingCalls, "Number of sibling calls");
 116
 117 // FIXME: Remove this once the bug has been fixed!
 118 extern cl::opt<bool> ANDIGlueBug;
 119
 120 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 121                                      const PPCSubtarget &STI)
 122     : TargetLowering(TM), Subtarget(STI) {
 123   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 124   setUseUnderscoreSetJmp(true);
 125   setUseUnderscoreLongJmp(true);
 126
 127   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
 128   // arguments are at least 4/8 bytes aligned.
 129   bool isPPC64 = Subtarget.isPPC64();
 130   setMinStackArgumentAlignment(isPPC64 ? 8:4);
 131
 132   // Set up the register classes.
 133   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
 134   if (!useSoftFloat()) {
 135     addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
 136     addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
 137   }
 138
 139   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
 140   for (MVT VT : MVT::integer_valuetypes()) {
 141     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 142     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
 143   }
 144
 145   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 146
 147   // PowerPC has pre-inc load and store's.
 148   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
 149   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
 150   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
 151   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
 152   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
 153   setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
 154   setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
 155   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
 156   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
 157   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
 158   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
 159   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
 160   setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
 161   setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
 162
 163   if (Subtarget.useCRBits()) {
 164     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 165
 166     if (isPPC64 || Subtarget.hasFPCVT()) {
 167       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
 168       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
 169                          isPPC64 ? MVT::i64 : MVT::i32);
 170       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
 171       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
 172                         isPPC64 ? MVT::i64 : MVT::i32);
 173     } else {
 174       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
 175       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
 176     }
 177
 178     // PowerPC does not support direct load/store of condition registers.
 179     setOperationAction(ISD::LOAD, MVT::i1, Custom);
 180     setOperationAction(ISD::STORE, MVT::i1, Custom);
 181
 182     // FIXME: Remove this once the ANDI glue bug is fixed:
 183     if (ANDIGlueBug)
 184       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
 185
 186     for (MVT VT : MVT::integer_valuetypes()) {
 187       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 188       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
 189       setTruncStoreAction(VT, MVT::i1, Expand);
 190     }
 191
 192     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
 193   }
 194
 195   // This is used in the ppcf128->int sequence.  Note it has different semantics
 196   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
 197   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
 198
 199   // We do not currently implement these libm ops for PowerPC.
 200   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
 201   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
 202   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
 203   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
 204   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
 205   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
 206
 207   // PowerPC has no SREM/UREM instructions unless we are on P9
 208   // On P9 we may use a hardware instruction to compute the remainder.
 209   // The instructions are not legalized directly because in the cases where the
 210   // result of both the remainder and the division is required it is more
 211   // efficient to compute the remainder from the result of the division rather
 212   // than use the remainder instruction.
 213   if (Subtarget.isISA3_0()) {
 214     setOperationAction(ISD::SREM, MVT::i32, Custom);
 215     setOperationAction(ISD::UREM, MVT::i32, Custom);
 216     setOperationAction(ISD::SREM, MVT::i64, Custom);
 217     setOperationAction(ISD::UREM, MVT::i64, Custom);
 218   } else {
 219     setOperationAction(ISD::SREM, MVT::i32, Expand);
 220     setOperationAction(ISD::UREM, MVT::i32, Expand);
 221     setOperationAction(ISD::SREM, MVT::i64, Expand);
 222     setOperationAction(ISD::UREM, MVT::i64, Expand);
 223   }
 224
 225   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
 226   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 227   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 228   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 229   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 230   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 231   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
 232   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
 233   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
 234
 235   // We don't support sin/cos/sqrt/fmod/pow
 236   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 237   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 238   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
 239   setOperationAction(ISD::FREM , MVT::f64, Expand);
 240   setOperationAction(ISD::FPOW , MVT::f64, Expand);
 241   setOperationAction(ISD::FMA  , MVT::f64, Legal);
 242   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 243   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 244   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
 245   setOperationAction(ISD::FREM , MVT::f32, Expand);
 246   setOperationAction(ISD::FPOW , MVT::f32, Expand);
 247   setOperationAction(ISD::FMA  , MVT::f32, Legal);
 248
 249   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 250
 251   // If we're enabling GP optimizations, use hardware square root
 252   if (!Subtarget.hasFSQRT() &&
 253       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
 254         Subtarget.hasFRE()))
 255     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 256
 257   if (!Subtarget.hasFSQRT() &&
 258       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
 259         Subtarget.hasFRES()))
 260     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 261
 262   if (Subtarget.hasFCPSGN()) {
 263     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
 264     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
 265   } else {
 266     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 267     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 268   }
 269
 270   if (Subtarget.hasFPRND()) {
 271     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
 272     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
 273     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
 274     setOperationAction(ISD::FROUND, MVT::f64, Legal);
 275
 276     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
 277     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
 278     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
 279     setOperationAction(ISD::FROUND, MVT::f32, Legal);
 280   }
 281
 282   // PowerPC does not have BSWAP
 283   // CTPOP or CTTZ were introduced in P8/P9 respectivelly
 284   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
 285   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
 286   if (Subtarget.isISA3_0()) {
 287     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
 288     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
 289   } else {
 290     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
 291     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
 292   }
 293
 294   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
 295     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
 296     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
 297   } else {
 298     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
 299     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
 300   }
 301
 302   // PowerPC does not have ROTR
 303   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
 304   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
 305
 306   if (!Subtarget.useCRBits()) {
 307     // PowerPC does not have Select
 308     setOperationAction(ISD::SELECT, MVT::i32, Expand);
 309     setOperationAction(ISD::SELECT, MVT::i64, Expand);
 310     setOperationAction(ISD::SELECT, MVT::f32, Expand);
 311     setOperationAction(ISD::SELECT, MVT::f64, Expand);
 312   }
 313
 314   // PowerPC wants to turn select_cc of FP into fsel when possible.
 315   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 316   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 317
 318   // PowerPC wants to optimize integer setcc a bit
 319   if (!Subtarget.useCRBits())
 320     setOperationAction(ISD::SETCC, MVT::i32, Custom);
 321
 322   // PowerPC does not have BRCOND which requires SetCC
 323   if (!Subtarget.useCRBits())
 324     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 325
 326   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
 327
 328   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
 329   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 330
 331   // PowerPC does not have [U|S]INT_TO_FP
 332   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
 333   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 334
 335   if (Subtarget.hasDirectMove() && isPPC64) {
 336     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
 337     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
 338     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
 339     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
 340   } else {
 341     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
 342     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
 343     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
 344     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
 345   }
 346
 347   // We cannot sextinreg(i1).  Expand to shifts.
 348   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 349
 350   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
 351   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
 352   // support continuation, user-level threading, and etc.. As a result, no
 353   // other SjLj exception interfaces are implemented and please don't build
 354   // your own exception handling based on them.
 355   // LLVM/Clang supports zero-cost DWARF exception handling.
 356   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
 357   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 358
 359   // We want to legalize GlobalAddress and ConstantPool nodes into the
 360   // appropriate instructions to materialize the address.
 361   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 362   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
 363   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
 364   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
 365   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
 366   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 367   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
 368   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
 369   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
 370   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 371
 372   // TRAP is legal.
 373   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 374
 375   // TRAMPOLINE is custom lowered.
 376   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
 377   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
 378
 379   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 380   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 381
 382   if (Subtarget.isSVR4ABI()) {
 383     if (isPPC64) {
 384       // VAARG always uses double-word chunks, so promote anything smaller.
 385       setOperationAction(ISD::VAARG, MVT::i1, Promote);
 386       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
 387       setOperationAction(ISD::VAARG, MVT::i8, Promote);
 388       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
 389       setOperationAction(ISD::VAARG, MVT::i16, Promote);
 390       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
 391       setOperationAction(ISD::VAARG, MVT::i32, Promote);
 392       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
 393       setOperationAction(ISD::VAARG, MVT::Other, Expand);
 394     } else {
 395       // VAARG is custom lowered with the 32-bit SVR4 ABI.
 396       setOperationAction(ISD::VAARG, MVT::Other, Custom);
 397       setOperationAction(ISD::VAARG, MVT::i64, Custom);
 398     }
 399   } else
 400     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 401
 402   if (Subtarget.isSVR4ABI() && !isPPC64)
 403     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
 404     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
 405   else
 406     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 407
 408   // Use the default implementation.
 409   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 410   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 411   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
 412   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 413   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
 414   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
 415   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
 416   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
 417   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
 418
 419   // We want to custom lower some of our intrinsics.
 420   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 421
 422   // To handle counter-based loop conditions.
 423   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
 424
 425   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
 426   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
 427   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
 428   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 429
 430   // Comparisons that require checking two conditions.
 431   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
 432   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
 433   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
 434   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
 435   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
 436   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
 437   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
 438   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
 439   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
 440   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
 441   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
 442   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
 443
 444   if (Subtarget.has64BitSupport()) {
 445     // They also have instructions for converting between i64 and fp.
 446     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 447     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 448     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 449     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
 450     // This is just the low 32 bits of a (signed) fp->i64 conversion.
 451     // We cannot do this with Promote because i64 is not a legal type.
 452     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 453
 454     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
 455       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 456   } else {
 457     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
 458     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
 459   }
 460
 461   // With the instructions enabled under FPCVT, we can do everything.
 462   if (Subtarget.hasFPCVT()) {
 463     if (Subtarget.has64BitSupport()) {
 464       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 465       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 466       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 467       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 468     }
 469
 470     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 471     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 472     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 473     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 474   }
 475
 476   if (Subtarget.use64BitRegs()) {
 477     // 64-bit PowerPC implementations can support i64 types directly
 478     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
 479     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 480     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 481     // 64-bit PowerPC wants to expand i128 shifts itself.
 482     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
 483     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
 484     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
 485   } else {
 486     // 32-bit PowerPC wants to expand i64 shifts itself.
 487     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
 488     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
 489     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
 490   }
 491
 492   if (Subtarget.hasAltivec()) {
 493     // First set operation action for all vector types to expand. Then we
 494     // will selectively turn on ones that can be effectively codegen'd.
 495     for (MVT VT : MVT::vector_valuetypes()) {
 496       // add/sub are legal for all supported vector VT's.
 497       setOperationAction(ISD::ADD, VT, Legal);
 498       setOperationAction(ISD::SUB, VT, Legal);
 499
 500       // Vector instructions introduced in P8
 501       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
 502         setOperationAction(ISD::CTPOP, VT, Legal);
 503         setOperationAction(ISD::CTLZ, VT, Legal);
 504       }
 505       else {
 506         setOperationAction(ISD::CTPOP, VT, Expand);
 507         setOperationAction(ISD::CTLZ, VT, Expand);
 508       }
 509
 510       // Vector instructions introduced in P9
 511       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
 512         setOperationAction(ISD::CTTZ, VT, Legal);
 513       else
 514         setOperationAction(ISD::CTTZ, VT, Expand);
 515
 516       // We promote all shuffles to v16i8.
 517       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
 518       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
 519
 520       // We promote all non-typed operations to v4i32.
 521       setOperationAction(ISD::AND   , VT, Promote);
 522       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
 523       setOperationAction(ISD::OR    , VT, Promote);
 524       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
 525       setOperationAction(ISD::XOR   , VT, Promote);
 526       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
 527       setOperationAction(ISD::LOAD  , VT, Promote);
 528       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
 529       setOperationAction(ISD::SELECT, VT, Promote);
 530       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
 531       setOperationAction(ISD::SELECT_CC, VT, Promote);
 532       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
 533       setOperationAction(ISD::STORE, VT, Promote);
 534       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
 535
 536       // No other operations are legal.
 537       setOperationAction(ISD::MUL , VT, Expand);
 538       setOperationAction(ISD::SDIV, VT, Expand);
 539       setOperationAction(ISD::SREM, VT, Expand);
 540       setOperationAction(ISD::UDIV, VT, Expand);
 541       setOperationAction(ISD::UREM, VT, Expand);
 542       setOperationAction(ISD::FDIV, VT, Expand);
 543       setOperationAction(ISD::FREM, VT, Expand);
 544       setOperationAction(ISD::FNEG, VT, Expand);
 545       setOperationAction(ISD::FSQRT, VT, Expand);
 546       setOperationAction(ISD::FLOG, VT, Expand);
 547       setOperationAction(ISD::FLOG10, VT, Expand);
 548       setOperationAction(ISD::FLOG2, VT, Expand);
 549       setOperationAction(ISD::FEXP, VT, Expand);
 550       setOperationAction(ISD::FEXP2, VT, Expand);
 551       setOperationAction(ISD::FSIN, VT, Expand);
 552       setOperationAction(ISD::FCOS, VT, Expand);
 553       setOperationAction(ISD::FABS, VT, Expand);
 554       setOperationAction(ISD::FFLOOR, VT, Expand);
 555       setOperationAction(ISD::FCEIL,  VT, Expand);
 556       setOperationAction(ISD::FTRUNC, VT, Expand);
 557       setOperationAction(ISD::FRINT,  VT, Expand);
 558       setOperationAction(ISD::FNEARBYINT, VT, Expand);
 559       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
 560       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
 561       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
 562       setOperationAction(ISD::MULHU, VT, Expand);
 563       setOperationAction(ISD::MULHS, VT, Expand);
 564       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 565       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 566       setOperationAction(ISD::UDIVREM, VT, Expand);
 567       setOperationAction(ISD::SDIVREM, VT, Expand);
 568       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
 569       setOperationAction(ISD::FPOW, VT, Expand);
 570       setOperationAction(ISD::BSWAP, VT, Expand);
 571       setOperationAction(ISD::VSELECT, VT, Expand);
 572       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 573       setOperationAction(ISD::ROTL, VT, Expand);
 574       setOperationAction(ISD::ROTR, VT, Expand);
 575
 576       for (MVT InnerVT : MVT::vector_valuetypes()) {
 577         setTruncStoreAction(VT, InnerVT, Expand);
 578         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
 579         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
 580         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
 581       }
 582     }
 583
 584     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
 585     // with merges, splats, etc.
 586     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
 587
 588     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
 589     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
 590     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
 591     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
 592     setOperationAction(ISD::SELECT, MVT::v4i32,
 593                        Subtarget.useCRBits() ? Legal : Expand);
 594     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
 595     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
 596     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
 597     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
 598     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
 599     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
 600     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
 601     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
 602     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 603
 604     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
 605     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
 606     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
 607     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
 608
 609     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
 610     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
 611
 612     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
 613       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 614       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
 615     }
 616
 617     if (Subtarget.hasP8Altivec())
 618       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
 619     else
 620       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
 621
 622     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
 623     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 624
 625     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 626     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
 627
 628     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
 629     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
 630     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
 631     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 632
 633     // Altivec does not contain unordered floating-point compare instructions
 634     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
 635     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
 636     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
 637     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
 638
 639     if (Subtarget.hasVSX()) {
 640       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
 641       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 642       if (Subtarget.hasP8Vector()) {
 643         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
 644         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
 645       }
 646       if (Subtarget.hasDirectMove() && isPPC64) {
 647         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
 648         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
 649         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
 650         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
 651         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
 652         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
 653         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
 654         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
 655       }
 656       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 657
 658       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
 659       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
 660       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
 661       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
 662       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
 663
 664       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 665
 666       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
 667       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
 668
 669       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
 670       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
 671
 672       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
 673       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
 674       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
 675       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
 676       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
 677
 678       // Share the Altivec comparison restrictions.
 679       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
 680       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
 681       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
 682       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
 683
 684       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
 685       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
 686
 687       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
 688
 689       if (Subtarget.hasP8Vector())
 690         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
 691
 692       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
 693
 694       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
 695       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
 696       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
 697
 698       if (Subtarget.hasP8Altivec()) {
 699         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
 700         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
 701         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
 702
 703         // 128 bit shifts can be accomplished via 3 instructions for SHL and
 704         // SRL, but not for SRA because of the instructions available:
 705         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
 706         // doing
 707         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
 708         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
 709         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
 710
 711         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
 712       }
 713       else {
 714         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
 715         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
 716         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
 717
 718         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
 719
 720         // VSX v2i64 only supports non-arithmetic operations.
 721         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 722         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
 723       }
 724
 725       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
 726       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
 727       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
 728       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
 729
 730       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
 731
 732       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
 733       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
 734       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
 735       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
 736
 737       // Vector operation legalization checks the result type of
 738       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
 739       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
 740       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
 741       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
 742       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
 743
 744       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
 745       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
 746       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
 747       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
 748
 749       if (Subtarget.hasDirectMove())
 750         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
 751       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
 752
 753       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
 754     }
 755
 756     if (Subtarget.hasP8Altivec()) {
 757       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
 758       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
 759     }
 760
 761     if (Subtarget.hasP9Vector()) {
 762       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
 763       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
 764
 765       // 128 bit shifts can be accomplished via 3 instructions for SHL and
 766       // SRL, but not for SRA because of the instructions available:
 767       // VS{RL} and VS{RL}O.
 768       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
 769       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
 770       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
 771     }
 772   }
 773
 774   if (Subtarget.hasQPX()) {
 775     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
 776     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
 777     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
 778     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
 779
 780     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
 781     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
 782
 783     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
 784     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
 785
 786     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
 787     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
 788
 789     if (!Subtarget.useCRBits())
 790       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
 791     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
 792
 793     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
 794     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
 795     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
 796     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
 797     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
 798     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
 799     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
 800
 801     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
 802     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
 803
 804     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
 805     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
 806     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
 807
 808     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
 809     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
 810     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
 811     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
 812     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
 813     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
 814     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
 815     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
 816     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
 817     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
 818
 819     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
 820     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
 821
 822     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
 823     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
 824
 825     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
 826
 827     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
 828     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
 829     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
 830     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
 831
 832     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
 833     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
 834
 835     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
 836     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
 837
 838     if (!Subtarget.useCRBits())
 839       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
 840     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
 841
 842     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
 843     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
 844     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
 845     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
 846     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
 847     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
 848     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 849
 850     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
 851     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
 852
 853     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
 854     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
 855     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
 856     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
 857     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
 858     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
 859     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
 860     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
 861     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
 862     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
 863
 864     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
 865     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
 866
 867     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
 868     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
 869
 870     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
 871
 872     setOperationAction(ISD::AND , MVT::v4i1, Legal);
 873     setOperationAction(ISD::OR , MVT::v4i1, Legal);
 874     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
 875
 876     if (!Subtarget.useCRBits())
 877       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
 878     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
 879
 880     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
 881     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
 882
 883     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
 884     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
 885     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
 886     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
 887     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
 888     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
 889     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
 890
 891     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
 892     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
 893
 894     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
 895
 896     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
 897     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
 898     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
 899     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
 900
 901     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
 902     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
 903     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
 904     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 905
 906     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
 907     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
 908
 909     // These need to set FE_INEXACT, and so cannot be vectorized here.
 910     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
 911     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
 912
 913     if (TM.Options.UnsafeFPMath) {
 914       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
 915       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
 916
 917       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 918       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
 919     } else {
 920       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
 921       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
 922
 923       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
 924       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
 925     }
 926   }
 927
 928   if (Subtarget.has64BitSupport())
 929     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
 930
 931   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
 932
 933   if (!isPPC64) {
 934     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
 935     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
 936   }
 937
 938   setBooleanContents(ZeroOrOneBooleanContent);
 939
 940   if (Subtarget.hasAltivec()) {
 941     // Altivec instructions set fields to all zeros or all ones.
 942     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 943   }
 944
 945   if (!isPPC64) {
 946     // These libcalls are not available in 32-bit.
 947     setLibcallName(RTLIB::SHL_I128, nullptr);
 948     setLibcallName(RTLIB::SRL_I128, nullptr);
 949     setLibcallName(RTLIB::SRA_I128, nullptr);
 950   }
 951
 952   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
 953
 954   // We have target-specific dag combine patterns for the following nodes:
 955   setTargetDAGCombine(ISD::SHL);
 956   setTargetDAGCombine(ISD::SRA);
 957   setTargetDAGCombine(ISD::SRL);
 958   setTargetDAGCombine(ISD::SINT_TO_FP);
 959   setTargetDAGCombine(ISD::BUILD_VECTOR);
 960   if (Subtarget.hasFPCVT())
 961     setTargetDAGCombine(ISD::UINT_TO_FP);
 962   setTargetDAGCombine(ISD::LOAD);
 963   setTargetDAGCombine(ISD::STORE);
 964   setTargetDAGCombine(ISD::BR_CC);
 965   if (Subtarget.useCRBits())
 966     setTargetDAGCombine(ISD::BRCOND);
 967   setTargetDAGCombine(ISD::BSWAP);
 968   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
 969   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
 970   setTargetDAGCombine(ISD::INTRINSIC_VOID);
 971
 972   setTargetDAGCombine(ISD::SIGN_EXTEND);
 973   setTargetDAGCombine(ISD::ZERO_EXTEND);
 974   setTargetDAGCombine(ISD::ANY_EXTEND);
 975
 976   if (Subtarget.useCRBits()) {
 977     setTargetDAGCombine(ISD::TRUNCATE);
 978     setTargetDAGCombine(ISD::SETCC);
 979     setTargetDAGCombine(ISD::SELECT_CC);
 980   }
 981
 982   // Use reciprocal estimates.
 983   if (TM.Options.UnsafeFPMath) {
 984     setTargetDAGCombine(ISD::FDIV);
 985     setTargetDAGCombine(ISD::FSQRT);
 986   }
 987
 988   // Darwin long double math library functions have $LDBL128 appended.
 989   if (Subtarget.isDarwin()) {
 990     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
 991     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
 992     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
 993     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
 994     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
 995     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
 996     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
 997     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
 998     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
 999     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1000   }
1001
1002   // With 32 condition bits, we don't need to sink (and duplicate) compares
1003   // aggressively in CodeGenPrep.
1004   if (Subtarget.useCRBits()) {
1005     setHasMultipleConditionRegisters();
1006     setJumpIsExpensive();
1007   }
1008
1009   setMinFunctionAlignment(2);
1010   if (Subtarget.isDarwin())
1011     setPrefFunctionAlignment(4);
1012
1013   switch (Subtarget.getDarwinDirective()) {
1014   default: break;
1015   case PPC::DIR_970:
1016   case PPC::DIR_A2:
1017   case PPC::DIR_E500mc:
1018   case PPC::DIR_E5500:
1019   case PPC::DIR_PWR4:
1020   case PPC::DIR_PWR5:
1021   case PPC::DIR_PWR5X:
1022   case PPC::DIR_PWR6:
1023   case PPC::DIR_PWR6X:
1024   case PPC::DIR_PWR7:
1025   case PPC::DIR_PWR8:
1026   case PPC::DIR_PWR9:
1027     setPrefFunctionAlignment(4);
1028     setPrefLoopAlignment(4);
1029     break;
1030   }
1031
1032   if (Subtarget.enableMachineScheduler())
1033     setSchedulingPreference(Sched::Source);
1034   else
1035     setSchedulingPreference(Sched::Hybrid);
1036
1037   computeRegisterProperties(STI.getRegisterInfo());
1038
1039   // The Freescale cores do better with aggressive inlining of memcpy and
1040   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1041   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1042       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
1043     MaxStoresPerMemset = 32;
1044     MaxStoresPerMemsetOptSize = 16;
1045     MaxStoresPerMemcpy = 32;
1046     MaxStoresPerMemcpyOptSize = 8;
1047     MaxStoresPerMemmove = 32;
1048     MaxStoresPerMemmoveOptSize = 8;
1049   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1050     // The A2 also benefits from (very) aggressive inlining of memcpy and
1051     // friends. The overhead of a the function call, even when warm, can be
1052     // over one hundred cycles.
1053     MaxStoresPerMemset = 128;
1054     MaxStoresPerMemcpy = 128;
1055     MaxStoresPerMemmove = 128;
1056     MaxLoadsPerMemcmp = 128;
1057   } else {
1058     MaxLoadsPerMemcmp = 8;
1059     MaxLoadsPerMemcmpOptSize = 4;
1060   }
1061 }
1062
1063 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1064 /// the desired ByVal argument alignment.
1065 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1066                              unsigned MaxMaxAlign) {
1067   if (MaxAlign == MaxMaxAlign)
1068     return;
1069   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1070     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
1071       MaxAlign = 32;
1072     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1073       MaxAlign = 16;
1074   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1075     unsigned EltAlign = 0;
1076     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1077     if (EltAlign > MaxAlign)
1078       MaxAlign = EltAlign;
1079   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1080     for (auto *EltTy : STy->elements()) {
1081       unsigned EltAlign = 0;
1082       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1083       if (EltAlign > MaxAlign)
1084         MaxAlign = EltAlign;
1085       if (MaxAlign == MaxMaxAlign)
1086         break;
1087     }
1088   }
1089 }
1090
1091 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1092 /// function arguments in the caller parameter area.
1093 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1094                                                   const DataLayout &DL) const {
1095   // Darwin passes everything on 4 byte boundary.
1096   if (Subtarget.isDarwin())
1097     return 4;
1098
1099   // 16byte and wider vectors are passed on 16byte boundary.
1100   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1101   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1102   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1103     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1104   return Align;
1105 }
1106
1107 bool PPCTargetLowering::useSoftFloat() const {
1108   return Subtarget.useSoftFloat();
1109 }
1110
1111 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1112   switch ((PPCISD::NodeType)Opcode) {
1113   case PPCISD::FIRST_NUMBER:    break;
1114   case PPCISD::FSEL:            return "PPCISD::FSEL";
1115   case PPCISD::FCFID:           return "PPCISD::FCFID";
1116   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1117   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1118   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1119   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1120   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1121   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1122   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1123   case PPCISD::FRE:             return "PPCISD::FRE";
1124   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1125   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1126   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1127   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1128   case PPCISD::VPERM:           return "PPCISD::VPERM";
1129   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1130   case PPCISD::XXINSERT:        return "PPCISD::XXINSERT";
1131   case PPCISD::XXREVERSE:       return "PPCISD::XXREVERSE";
1132   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1133   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1134   case PPCISD::CMPB:            return "PPCISD::CMPB";
1135   case PPCISD::Hi:              return "PPCISD::Hi";
1136   case PPCISD::Lo:              return "PPCISD::Lo";
1137   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1138   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1139   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1140   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1141   case PPCISD::SRL:             return "PPCISD::SRL";
1142   case PPCISD::SRA:             return "PPCISD::SRA";
1143   case PPCISD::SHL:             return "PPCISD::SHL";
1144   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1145   case PPCISD::CALL:            return "PPCISD::CALL";
1146   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1147   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1148   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1149   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1150   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1151   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1152   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1153   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1154   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1155   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1156   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1157   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1158   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1159   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1160   case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1161   case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1162   case PPCISD::VCMP:            return "PPCISD::VCMP";
1163   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1164   case PPCISD::LBRX:            return "PPCISD::LBRX";
1165   case PPCISD::STBRX:           return "PPCISD::STBRX";
1166   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1167   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1168   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1169   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1170   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1171   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1172   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1173   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1174   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1175   case PPCISD::BDZ:             return "PPCISD::BDZ";
1176   case PPCISD::MFFS:            return "PPCISD::MFFS";
1177   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1178   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1179   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1180   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1181   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1182   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1183   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1184   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1185   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1186   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1187   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1188   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1189   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1190   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1191   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1192   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1193   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1194   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1195   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1196   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1197   case PPCISD::SC:              return "PPCISD::SC";
1198   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1199   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1200   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1201   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1202   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1203   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1204   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1205   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1206   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1207   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1208   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1209   }
1210   return nullptr;
1211 }
1212
1213 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1214                                           EVT VT) const {
1215   if (!VT.isVector())
1216     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1217
1218   if (Subtarget.hasQPX())
1219     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1220
1221   return VT.changeVectorElementTypeToInteger();
1222 }
1223
1224 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1225   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1226   return true;
1227 }
1228
1229 //===----------------------------------------------------------------------===//
1230 // Node matching predicates, for use by the tblgen matching code.
1231 //===----------------------------------------------------------------------===//
1232
1233 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1234 static bool isFloatingPointZero(SDValue Op) {
1235   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1236     return CFP->getValueAPF().isZero();
1237   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1238     // Maybe this has already been legalized into the constant pool?
1239     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1240       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1241         return CFP->getValueAPF().isZero();
1242   }
1243   return false;
1244 }
1245
1246 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1247 /// true if Op is undef or if it matches the specified value.
1248 static bool isConstantOrUndef(int Op, int Val) {
1249   return Op < 0 || Op == Val;
1250 }
1251
1252 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1253 /// VPKUHUM instruction.
1254 /// The ShuffleKind distinguishes between big-endian operations with
1255 /// two different inputs (0), either-endian operations with two identical
1256 /// inputs (1), and little-endian operations with two different inputs (2).
1257 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1258 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1259                                SelectionDAG &DAG) {
1260   bool IsLE = DAG.getDataLayout().isLittleEndian();
1261   if (ShuffleKind == 0) {
1262     if (IsLE)
1263       return false;
1264     for (unsigned i = 0; i != 16; ++i)
1265       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1266         return false;
1267   } else if (ShuffleKind == 2) {
1268     if (!IsLE)
1269       return false;
1270     for (unsigned i = 0; i != 16; ++i)
1271       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1272         return false;
1273   } else if (ShuffleKind == 1) {
1274     unsigned j = IsLE ? 0 : 1;
1275     for (unsigned i = 0; i != 8; ++i)
1276       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1277           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1278         return false;
1279   }
1280   return true;
1281 }
1282
1283 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1284 /// VPKUWUM instruction.
1285 /// The ShuffleKind distinguishes between big-endian operations with
1286 /// two different inputs (0), either-endian operations with two identical
1287 /// inputs (1), and little-endian operations with two different inputs (2).
1288 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1289 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1290                                SelectionDAG &DAG) {
1291   bool IsLE = DAG.getDataLayout().isLittleEndian();
1292   if (ShuffleKind == 0) {
1293     if (IsLE)
1294       return false;
1295     for (unsigned i = 0; i != 16; i += 2)
1296       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1297           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1298         return false;
1299   } else if (ShuffleKind == 2) {
1300     if (!IsLE)
1301       return false;
1302     for (unsigned i = 0; i != 16; i += 2)
1303       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1304           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1305         return false;
1306   } else if (ShuffleKind == 1) {
1307     unsigned j = IsLE ? 0 : 2;
1308     for (unsigned i = 0; i != 8; i += 2)
1309       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1310           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1311           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1312           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1313         return false;
1314   }
1315   return true;
1316 }
1317
1318 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1319 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1320 /// current subtarget.
1321 ///
1322 /// The ShuffleKind distinguishes between big-endian operations with
1323 /// two different inputs (0), either-endian operations with two identical
1324 /// inputs (1), and little-endian operations with two different inputs (2).
1325 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1326 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1327                                SelectionDAG &DAG) {
1328   const PPCSubtarget& Subtarget =
1329     static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1330   if (!Subtarget.hasP8Vector())
1331     return false;
1332
1333   bool IsLE = DAG.getDataLayout().isLittleEndian();
1334   if (ShuffleKind == 0) {
1335     if (IsLE)
1336       return false;
1337     for (unsigned i = 0; i != 16; i += 4)
1338       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1339           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1340           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1341           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1342         return false;
1343   } else if (ShuffleKind == 2) {
1344     if (!IsLE)
1345       return false;
1346     for (unsigned i = 0; i != 16; i += 4)
1347       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1348           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1349           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1350           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1351         return false;
1352   } else if (ShuffleKind == 1) {
1353     unsigned j = IsLE ? 0 : 4;
1354     for (unsigned i = 0; i != 8; i += 4)
1355       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1356           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1357           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1358           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1359           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1360           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1361           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1362           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1363         return false;
1364   }
1365   return true;
1366 }
1367
1368 /// isVMerge - Common function, used to match vmrg* shuffles.
1369 ///
1370 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1371                      unsigned LHSStart, unsigned RHSStart) {
1372   if (N->getValueType(0) != MVT::v16i8)
1373     return false;
1374   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1375          "Unsupported merge size!");
1376
1377   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1378     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1379       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1380                              LHSStart+j+i*UnitSize) ||
1381           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1382                              RHSStart+j+i*UnitSize))
1383         return false;
1384     }
1385   return true;
1386 }
1387
1388 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1389 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1390 /// The ShuffleKind distinguishes between big-endian merges with two
1391 /// different inputs (0), either-endian merges with two identical inputs (1),
1392 /// and little-endian merges with two different inputs (2).  For the latter,
1393 /// the input operands are swapped (see PPCInstrAltivec.td).
1394 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1395                              unsigned ShuffleKind, SelectionDAG &DAG) {
1396   if (DAG.getDataLayout().isLittleEndian()) {
1397     if (ShuffleKind == 1) // unary
1398       return isVMerge(N, UnitSize, 0, 0);
1399     else if (ShuffleKind == 2) // swapped
1400       return isVMerge(N, UnitSize, 0, 16);
1401     else
1402       return false;
1403   } else {
1404     if (ShuffleKind == 1) // unary
1405       return isVMerge(N, UnitSize, 8, 8);
1406     else if (ShuffleKind == 0) // normal
1407       return isVMerge(N, UnitSize, 8, 24);
1408     else
1409       return false;
1410   }
1411 }
1412
1413 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1414 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1415 /// The ShuffleKind distinguishes between big-endian merges with two
1416 /// different inputs (0), either-endian merges with two identical inputs (1),
1417 /// and little-endian merges with two different inputs (2).  For the latter,
1418 /// the input operands are swapped (see PPCInstrAltivec.td).
1419 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1420                              unsigned ShuffleKind, SelectionDAG &DAG) {
1421   if (DAG.getDataLayout().isLittleEndian()) {
1422     if (ShuffleKind == 1) // unary
1423       return isVMerge(N, UnitSize, 8, 8);
1424     else if (ShuffleKind == 2) // swapped
1425       return isVMerge(N, UnitSize, 8, 24);
1426     else
1427       return false;
1428   } else {
1429     if (ShuffleKind == 1) // unary
1430       return isVMerge(N, UnitSize, 0, 0);
1431     else if (ShuffleKind == 0) // normal
1432       return isVMerge(N, UnitSize, 0, 16);
1433     else
1434       return false;
1435   }
1436 }
1437
1438 /**
1439  * \brief Common function used to match vmrgew and vmrgow shuffles
1440  *
1441  * The indexOffset determines whether to look for even or odd words in
1442  * the shuffle mask. This is based on the of the endianness of the target
1443  * machine.
1444  *   - Little Endian:
1445  *     - Use offset of 0 to check for odd elements
1446  *     - Use offset of 4 to check for even elements
1447  *   - Big Endian:
1448  *     - Use offset of 0 to check for even elements
1449  *     - Use offset of 4 to check for odd elements
1450  * A detailed description of the vector element ordering for little endian and
1451  * big endian can be found at
1452  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1453  * Targeting your applications - what little endian and big endian IBM XL C/C++
1454  * compiler differences mean to you
1455  *
1456  * The mask to the shuffle vector instruction specifies the indices of the
1457  * elements from the two input vectors to place in the result. The elements are
1458  * numbered in array-access order, starting with the first vector. These vectors
1459  * are always of type v16i8, thus each vector will contain 16 elements of size
1460  * 8. More info on the shuffle vector can be found in the
1461  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1462  * Language Reference.
1463  *
1464  * The RHSStartValue indicates whether the same input vectors are used (unary)
1465  * or two different input vectors are used, based on the following:
1466  *   - If the instruction uses the same vector for both inputs, the range of the
1467  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1468  *     be 0.
1469  *   - If the instruction has two different vectors then the range of the
1470  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1471  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1472  *     to 31 specify elements in the second vector).
1473  *
1474  * \param[in] N The shuffle vector SD Node to analyze
1475  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1476  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1477  * vector to the shuffle_vector instruction
1478  * \return true iff this shuffle vector represents an even or odd word merge
1479  */
1480 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1481                      unsigned RHSStartValue) {
1482   if (N->getValueType(0) != MVT::v16i8)
1483     return false;
1484
1485   for (unsigned i = 0; i < 2; ++i)
1486     for (unsigned j = 0; j < 4; ++j)
1487       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1488                              i*RHSStartValue+j+IndexOffset) ||
1489           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1490                              i*RHSStartValue+j+IndexOffset+8))
1491         return false;
1492   return true;
1493 }
1494
1495 /**
1496  * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
1497  * vmrgow instructions.
1498  *
1499  * \param[in] N The shuffle vector SD Node to analyze
1500  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1501  * \param[in] ShuffleKind Identify the type of merge:
1502  *   - 0 = big-endian merge with two different inputs;
1503  *   - 1 = either-endian merge with two identical inputs;
1504  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1505  *     little-endian merges).
1506  * \param[in] DAG The current SelectionDAG
1507  * \return true iff this shuffle mask
1508  */
1509 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1510                               unsigned ShuffleKind, SelectionDAG &DAG) {
1511   if (DAG.getDataLayout().isLittleEndian()) {
1512     unsigned indexOffset = CheckEven ? 4 : 0;
1513     if (ShuffleKind == 1) // Unary
1514       return isVMerge(N, indexOffset, 0);
1515     else if (ShuffleKind == 2) // swapped
1516       return isVMerge(N, indexOffset, 16);
1517     else
1518       return false;
1519   }
1520   else {
1521     unsigned indexOffset = CheckEven ? 0 : 4;
1522     if (ShuffleKind == 1) // Unary
1523       return isVMerge(N, indexOffset, 0);
1524     else if (ShuffleKind == 0) // Normal
1525       return isVMerge(N, indexOffset, 16);
1526     else
1527       return false;
1528   }
1529   return false;
1530 }
1531
1532 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1533 /// amount, otherwise return -1.
1534 /// The ShuffleKind distinguishes between big-endian operations with two
1535 /// different inputs (0), either-endian operations with two identical inputs
1536 /// (1), and little-endian operations with two different inputs (2).  For the
1537 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1538 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1539                              SelectionDAG &DAG) {
1540   if (N->getValueType(0) != MVT::v16i8)
1541     return -1;
1542
1543   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1544
1545   // Find the first non-undef value in the shuffle mask.
1546   unsigned i;
1547   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1548     /*search*/;
1549
1550   if (i == 16) return -1;  // all undef.
1551
1552   // Otherwise, check to see if the rest of the elements are consecutively
1553   // numbered from this value.
1554   unsigned ShiftAmt = SVOp->getMaskElt(i);
1555   if (ShiftAmt < i) return -1;
1556
1557   ShiftAmt -= i;
1558   bool isLE = DAG.getDataLayout().isLittleEndian();
1559
1560   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1561     // Check the rest of the elements to see if they are consecutive.
1562     for (++i; i != 16; ++i)
1563       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1564         return -1;
1565   } else if (ShuffleKind == 1) {
1566     // Check the rest of the elements to see if they are consecutive.
1567     for (++i; i != 16; ++i)
1568       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1569         return -1;
1570   } else
1571     return -1;
1572
1573   if (isLE)
1574     ShiftAmt = 16 - ShiftAmt;
1575
1576   return ShiftAmt;
1577 }
1578
1579 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1580 /// specifies a splat of a single element that is suitable for input to
1581 /// VSPLTB/VSPLTH/VSPLTW.
1582 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1583   assert(N->getValueType(0) == MVT::v16i8 &&
1584          (EltSize == 1 || EltSize == 2 || EltSize == 4));
1585
1586   // The consecutive indices need to specify an element, not part of two
1587   // different elements.  So abandon ship early if this isn't the case.
1588   if (N->getMaskElt(0) % EltSize != 0)
1589     return false;
1590
1591   // This is a splat operation if each element of the permute is the same, and
1592   // if the value doesn't reference the second vector.
1593   unsigned ElementBase = N->getMaskElt(0);
1594
1595   // FIXME: Handle UNDEF elements too!
1596   if (ElementBase >= 16)
1597     return false;
1598
1599   // Check that the indices are consecutive, in the case of a multi-byte element
1600   // splatted with a v16i8 mask.
1601   for (unsigned i = 1; i != EltSize; ++i)
1602     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1603       return false;
1604
1605   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1606     if (N->getMaskElt(i) < 0) continue;
1607     for (unsigned j = 0; j != EltSize; ++j)
1608       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1609         return false;
1610   }
1611   return true;
1612 }
1613
1614 /// Check that the mask is shuffling N byte elements. Within each N byte
1615 /// element of the mask, the indices could be either in increasing or
1616 /// decreasing order as long as they are consecutive.
1617 /// \param[in] N the shuffle vector SD Node to analyze
1618 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1619 /// Word/DoubleWord/QuadWord).
1620 /// \param[in] StepLen the delta indices number among the N byte element, if
1621 /// the mask is in increasing/decreasing order then it is 1/-1.
1622 /// \return true iff the mask is shuffling N byte elements.
1623 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1624                                    int StepLen) {
1625   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1626          "Unexpected element width.");
1627   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1628
1629   unsigned NumOfElem = 16 / Width;
1630   unsigned MaskVal[16]; //  Width is never greater than 16
1631   for (unsigned i = 0; i < NumOfElem; ++i) {
1632     MaskVal[0] = N->getMaskElt(i * Width);
1633     if ((StepLen == 1) && (MaskVal[0] % Width)) {
1634       return false;
1635     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1636       return false;
1637     }
1638
1639     for (unsigned int j = 1; j < Width; ++j) {
1640       MaskVal[j] = N->getMaskElt(i * Width + j);
1641       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1642         return false;
1643       }
1644     }
1645   }
1646
1647   return true;
1648 }
1649
1650 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1651                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1652   if (!isNByteElemShuffleMask(N, 4, 1))
1653     return false;
1654
1655   // Now we look at mask elements 0,4,8,12
1656   unsigned M0 = N->getMaskElt(0) / 4;
1657   unsigned M1 = N->getMaskElt(4) / 4;
1658   unsigned M2 = N->getMaskElt(8) / 4;
1659   unsigned M3 = N->getMaskElt(12) / 4;
1660   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1661   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1662
1663   // Below, let H and L be arbitrary elements of the shuffle mask
1664   // where H is in the range [4,7] and L is in the range [0,3].
1665   // H, 1, 2, 3 or L, 5, 6, 7
1666   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1667       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1668     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1669     InsertAtByte = IsLE ? 12 : 0;
1670     Swap = M0 < 4;
1671     return true;
1672   }
1673   // 0, H, 2, 3 or 4, L, 6, 7
1674   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1675       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1676     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1677     InsertAtByte = IsLE ? 8 : 4;
1678     Swap = M1 < 4;
1679     return true;
1680   }
1681   // 0, 1, H, 3 or 4, 5, L, 7
1682   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1683       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1684     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1685     InsertAtByte = IsLE ? 4 : 8;
1686     Swap = M2 < 4;
1687     return true;
1688   }
1689   // 0, 1, 2, H or 4, 5, 6, L
1690   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1691       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1692     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1693     InsertAtByte = IsLE ? 0 : 12;
1694     Swap = M3 < 4;
1695     return true;
1696   }
1697
1698   // If both vector operands for the shuffle are the same vector, the mask will
1699   // contain only elements from the first one and the second one will be undef.
1700   if (N->getOperand(1).isUndef()) {
1701     ShiftElts = 0;
1702     Swap = true;
1703     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1704     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1705       InsertAtByte = IsLE ? 12 : 0;
1706       return true;
1707     }
1708     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1709       InsertAtByte = IsLE ? 8 : 4;
1710       return true;
1711     }
1712     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1713       InsertAtByte = IsLE ? 4 : 8;
1714       return true;
1715     }
1716     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1717       InsertAtByte = IsLE ? 0 : 12;
1718       return true;
1719     }
1720   }
1721
1722   return false;
1723 }
1724
1725 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1726                                bool &Swap, bool IsLE) {
1727   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1728   // Ensure each byte index of the word is consecutive.
1729   if (!isNByteElemShuffleMask(N, 4, 1))
1730     return false;
1731
1732   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1733   unsigned M0 = N->getMaskElt(0) / 4;
1734   unsigned M1 = N->getMaskElt(4) / 4;
1735   unsigned M2 = N->getMaskElt(8) / 4;
1736   unsigned M3 = N->getMaskElt(12) / 4;
1737
1738   // If both vector operands for the shuffle are the same vector, the mask will
1739   // contain only elements from the first one and the second one will be undef.
1740   if (N->getOperand(1).isUndef()) {
1741     assert(M0 < 4 && "Indexing into an undef vector?");
1742     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
1743       return false;
1744
1745     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
1746     Swap = false;
1747     return true;
1748   }
1749
1750   // Ensure each word index of the ShuffleVector Mask is consecutive.
1751   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
1752     return false;
1753
1754   if (IsLE) {
1755     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
1756       // Input vectors don't need to be swapped if the leading element
1757       // of the result is one of the 3 left elements of the second vector
1758       // (or if there is no shift to be done at all).
1759       Swap = false;
1760       ShiftElts = (8 - M0) % 8;
1761     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
1762       // Input vectors need to be swapped if the leading element
1763       // of the result is one of the 3 left elements of the first vector
1764       // (or if we're shifting by 4 - thereby simply swapping the vectors).
1765       Swap = true;
1766       ShiftElts = (4 - M0) % 4;
1767     }
1768
1769     return true;
1770   } else {                                          // BE
1771     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
1772       // Input vectors don't need to be swapped if the leading element
1773       // of the result is one of the 4 elements of the first vector.
1774       Swap = false;
1775       ShiftElts = M0;
1776     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
1777       // Input vectors need to be swapped if the leading element
1778       // of the result is one of the 4 elements of the right vector.
1779       Swap = true;
1780       ShiftElts = M0 - 4;
1781     }
1782
1783     return true;
1784   }
1785 }
1786
1787 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
1788   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1789
1790   if (!isNByteElemShuffleMask(N, Width, -1))
1791     return false;
1792
1793   for (int i = 0; i < 16; i += Width)
1794     if (N->getMaskElt(i) != i + Width - 1)
1795       return false;
1796
1797   return true;
1798 }
1799
1800 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
1801   return isXXBRShuffleMaskHelper(N, 2);
1802 }
1803
1804 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
1805   return isXXBRShuffleMaskHelper(N, 4);
1806 }
1807
1808 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
1809   return isXXBRShuffleMaskHelper(N, 8);
1810 }
1811
1812 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
1813   return isXXBRShuffleMaskHelper(N, 16);
1814 }
1815
1816 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
1817 /// if the inputs to the instruction should be swapped and set \p DM to the
1818 /// value for the immediate.
1819 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
1820 /// AND element 0 of the result comes from the first input (LE) or second input
1821 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
1822 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
1823 /// mask.
1824 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
1825                                bool &Swap, bool IsLE) {
1826   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1827
1828   // Ensure each byte index of the double word is consecutive.
1829   if (!isNByteElemShuffleMask(N, 8, 1))
1830     return false;
1831
1832   unsigned M0 = N->getMaskElt(0) / 8;
1833   unsigned M1 = N->getMaskElt(8) / 8;
1834   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
1835
1836   // If both vector operands for the shuffle are the same vector, the mask will
1837   // contain only elements from the first one and the second one will be undef.
1838   if (N->getOperand(1).isUndef()) {
1839     if ((M0 | M1) < 2) {
1840       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
1841       Swap = false;
1842       return true;
1843     } else
1844       return false;
1845   }
1846
1847   if (IsLE) {
1848     if (M0 > 1 && M1 < 2) {
1849       Swap = false;
1850     } else if (M0 < 2 && M1 > 1) {
1851       M0 = (M0 + 2) % 4;
1852       M1 = (M1 + 2) % 4;
1853       Swap = true;
1854     } else
1855       return false;
1856
1857     // Note: if control flow comes here that means Swap is already set above
1858     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
1859     return true;
1860   } else { // BE
1861     if (M0 < 2 && M1 > 1) {
1862       Swap = false;
1863     } else if (M0 > 1 && M1 < 2) {
1864       M0 = (M0 + 2) % 4;
1865       M1 = (M1 + 2) % 4;
1866       Swap = true;
1867     } else
1868       return false;
1869
1870     // Note: if control flow comes here that means Swap is already set above
1871     DM = (M0 << 1) + (M1 & 1);
1872     return true;
1873   }
1874 }
1875
1876
1877 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1878 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1879 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1880                                 SelectionDAG &DAG) {
1881   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1882   assert(isSplatShuffleMask(SVOp, EltSize));
1883   if (DAG.getDataLayout().isLittleEndian())
1884     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1885   else
1886     return SVOp->getMaskElt(0) / EltSize;
1887 }
1888
1889 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1890 /// by using a vspltis[bhw] instruction of the specified element size, return
1891 /// the constant being splatted.  The ByteSize field indicates the number of
1892 /// bytes of each element [124] -> [bhw].
1893 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1894   SDValue OpVal(nullptr, 0);
1895
1896   // If ByteSize of the splat is bigger than the element size of the
1897   // build_vector, then we have a case where we are checking for a splat where
1898   // multiple elements of the buildvector are folded together into a single
1899   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1900   unsigned EltSize = 16/N->getNumOperands();
1901   if (EltSize < ByteSize) {
1902     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1903     SDValue UniquedVals[4];
1904     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1905
1906     // See if all of the elements in the buildvector agree across.
1907     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1908       if (N->getOperand(i).isUndef()) continue;
1909       // If the element isn't a constant, bail fully out.
1910       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1911
1912       if (!UniquedVals[i&(Multiple-1)].getNode())
1913         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1914       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1915         return SDValue();  // no match.
1916     }
1917
1918     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1919     // either constant or undef values that are identical for each chunk.  See
1920     // if these chunks can form into a larger vspltis*.
1921
1922     // Check to see if all of the leading entries are either 0 or -1.  If
1923     // neither, then this won't fit into the immediate field.
1924     bool LeadingZero = true;
1925     bool LeadingOnes = true;
1926     for (unsigned i = 0; i != Multiple-1; ++i) {
1927       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
1928
1929       LeadingZero &= isNullConstant(UniquedVals[i]);
1930       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
1931     }
1932     // Finally, check the least significant entry.
1933     if (LeadingZero) {
1934       if (!UniquedVals[Multiple-1].getNode())
1935         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
1936       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1937       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
1938         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1939     }
1940     if (LeadingOnes) {
1941       if (!UniquedVals[Multiple-1].getNode())
1942         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
1943       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1944       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1945         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1946     }
1947
1948     return SDValue();
1949   }
1950
1951   // Check to see if this buildvec has a single non-undef value in its elements.
1952   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1953     if (N->getOperand(i).isUndef()) continue;
1954     if (!OpVal.getNode())
1955       OpVal = N->getOperand(i);
1956     else if (OpVal != N->getOperand(i))
1957       return SDValue();
1958   }
1959
1960   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
1961
1962   unsigned ValSizeInBytes = EltSize;
1963   uint64_t Value = 0;
1964   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1965     Value = CN->getZExtValue();
1966   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1967     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1968     Value = FloatToBits(CN->getValueAPF().convertToFloat());
1969   }
1970
1971   // If the splat value is larger than the element value, then we can never do
1972   // this splat.  The only case that we could fit the replicated bits into our
1973   // immediate field for would be zero, and we prefer to use vxor for it.
1974   if (ValSizeInBytes < ByteSize) return SDValue();
1975
1976   // If the element value is larger than the splat value, check if it consists
1977   // of a repeated bit pattern of size ByteSize.
1978   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
1979     return SDValue();
1980
1981   // Properly sign extend the value.
1982   int MaskVal = SignExtend32(Value, ByteSize * 8);
1983
1984   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1985   if (MaskVal == 0) return SDValue();
1986
1987   // Finally, if this value fits in a 5 bit sext field, return it
1988   if (SignExtend32<5>(MaskVal) == MaskVal)
1989     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
1990   return SDValue();
1991 }
1992
1993 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
1994 /// amount, otherwise return -1.
1995 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
1996   EVT VT = N->getValueType(0);
1997   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
1998     return -1;
1999
2000   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2001
2002   // Find the first non-undef value in the shuffle mask.
2003   unsigned i;
2004   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2005     /*search*/;
2006
2007   if (i == 4) return -1;  // all undef.
2008
2009   // Otherwise, check to see if the rest of the elements are consecutively
2010   // numbered from this value.
2011   unsigned ShiftAmt = SVOp->getMaskElt(i);
2012   if (ShiftAmt < i) return -1;
2013   ShiftAmt -= i;
2014
2015   // Check the rest of the elements to see if they are consecutive.
2016   for (++i; i != 4; ++i)
2017     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2018       return -1;
2019
2020   return ShiftAmt;
2021 }
2022
2023 //===----------------------------------------------------------------------===//
2024 //  Addressing Mode Selection
2025 //===----------------------------------------------------------------------===//
2026
2027 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2028 /// or 64-bit immediate, and if the value can be accurately represented as a
2029 /// sign extension from a 16-bit value.  If so, this returns true and the
2030 /// immediate.
2031 static bool isIntS16Immediate(SDNode *N, short &Imm) {
2032   if (!isa<ConstantSDNode>(N))
2033     return false;
2034
2035   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
2036   if (N->getValueType(0) == MVT::i32)
2037     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2038   else
2039     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2040 }
2041 static bool isIntS16Immediate(SDValue Op, short &Imm) {
2042   return isIntS16Immediate(Op.getNode(), Imm);
2043 }
2044
2045 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2046 /// can be represented as an indexed [r+r] operation.  Returns false if it
2047 /// can be more efficiently represented with [r+imm].
2048 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
2049                                             SDValue &Index,
2050                                             SelectionDAG &DAG) const {
2051   short imm = 0;
2052   if (N.getOpcode() == ISD::ADD) {
2053     if (isIntS16Immediate(N.getOperand(1), imm))
2054       return false;    // r+i
2055     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2056       return false;    // r+i
2057
2058     Base = N.getOperand(0);
2059     Index = N.getOperand(1);
2060     return true;
2061   } else if (N.getOpcode() == ISD::OR) {
2062     if (isIntS16Immediate(N.getOperand(1), imm))
2063       return false;    // r+i can fold it if we can.
2064
2065     // If this is an or of disjoint bitfields, we can codegen this as an add
2066     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2067     // disjoint.
2068     KnownBits LHSKnown, RHSKnown;
2069     DAG.computeKnownBits(N.getOperand(0), LHSKnown);
2070
2071     if (LHSKnown.Zero.getBoolValue()) {
2072       DAG.computeKnownBits(N.getOperand(1), RHSKnown);
2073       // If all of the bits are known zero on the LHS or RHS, the add won't
2074       // carry.
2075       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2076         Base = N.getOperand(0);
2077         Index = N.getOperand(1);
2078         return true;
2079       }
2080     }
2081   }
2082
2083   return false;
2084 }
2085
2086 // If we happen to be doing an i64 load or store into a stack slot that has
2087 // less than a 4-byte alignment, then the frame-index elimination may need to
2088 // use an indexed load or store instruction (because the offset may not be a
2089 // multiple of 4). The extra register needed to hold the offset comes from the
2090 // register scavenger, and it is possible that the scavenger will need to use
2091 // an emergency spill slot. As a result, we need to make sure that a spill slot
2092 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2093 // stack slot.
2094 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2095   // FIXME: This does not handle the LWA case.
2096   if (VT != MVT::i64)
2097     return;
2098
2099   // NOTE: We'll exclude negative FIs here, which come from argument
2100   // lowering, because there are no known test cases triggering this problem
2101   // using packed structures (or similar). We can remove this exclusion if
2102   // we find such a test case. The reason why this is so test-case driven is
2103   // because this entire 'fixup' is only to prevent crashes (from the
2104   // register scavenger) on not-really-valid inputs. For example, if we have:
2105   //   %a = alloca i1
2106   //   %b = bitcast i1* %a to i64*
2107   //   store i64* a, i64 b
2108   // then the store should really be marked as 'align 1', but is not. If it
2109   // were marked as 'align 1' then the indexed form would have been
2110   // instruction-selected initially, and the problem this 'fixup' is preventing
2111   // won't happen regardless.
2112   if (FrameIdx < 0)
2113     return;
2114
2115   MachineFunction &MF = DAG.getMachineFunction();
2116   MachineFrameInfo &MFI = MF.getFrameInfo();
2117
2118   unsigned Align = MFI.getObjectAlignment(FrameIdx);
2119   if (Align >= 4)
2120     return;
2121
2122   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2123   FuncInfo->setHasNonRISpills();
2124 }
2125
2126 /// Returns true if the address N can be represented by a base register plus
2127 /// a signed 16-bit displacement [r+imm], and if it is not better
2128 /// represented as reg+reg.  If Aligned is true, only accept displacements
2129 /// suitable for STD and friends, i.e. multiples of 4.
2130 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
2131                                             SDValue &Base,
2132                                             SelectionDAG &DAG,
2133                                             bool Aligned) const {
2134   // FIXME dl should come from parent load or store, not from address
2135   SDLoc dl(N);
2136   // If this can be more profitably realized as r+r, fail.
2137   if (SelectAddressRegReg(N, Disp, Base, DAG))
2138     return false;
2139
2140   if (N.getOpcode() == ISD::ADD) {
2141     short imm = 0;
2142     if (isIntS16Immediate(N.getOperand(1), imm) &&
2143         (!Aligned || (imm & 3) == 0)) {
2144       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2145       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2146         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2147         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2148       } else {
2149         Base = N.getOperand(0);
2150       }
2151       return true; // [r+i]
2152     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2153       // Match LOAD (ADD (X, Lo(G))).
2154       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2155              && "Cannot handle constant offsets yet!");
2156       Disp = N.getOperand(1).getOperand(0);  // The global address.
2157       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2158              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2159              Disp.getOpcode() == ISD::TargetConstantPool ||
2160              Disp.getOpcode() == ISD::TargetJumpTable);
2161       Base = N.getOperand(0);
2162       return true;  // [&g+r]
2163     }
2164   } else if (N.getOpcode() == ISD::OR) {
2165     short imm = 0;
2166     if (isIntS16Immediate(N.getOperand(1), imm) &&
2167         (!Aligned || (imm & 3) == 0)) {
2168       // If this is an or of disjoint bitfields, we can codegen this as an add
2169       // (for better address arithmetic) if the LHS and RHS of the OR are
2170       // provably disjoint.
2171       KnownBits LHSKnown;
2172       DAG.computeKnownBits(N.getOperand(0), LHSKnown);
2173
2174       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2175         // If all of the bits are known zero on the LHS or RHS, the add won't
2176         // carry.
2177         if (FrameIndexSDNode *FI =
2178               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2179           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2180           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2181         } else {
2182           Base = N.getOperand(0);
2183         }
2184         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2185         return true;
2186       }
2187     }
2188   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2189     // Loading from a constant address.
2190
2191     // If this address fits entirely in a 16-bit sext immediate field, codegen
2192     // this as "d, 0"
2193     short Imm;
2194     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
2195       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2196       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2197                              CN->getValueType(0));
2198       return true;
2199     }
2200
2201     // Handle 32-bit sext immediates with LIS + addr mode.
2202     if ((CN->getValueType(0) == MVT::i32 ||
2203          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2204         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
2205       int Addr = (int)CN->getZExtValue();
2206
2207       // Otherwise, break this down into an LIS + disp.
2208       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2209
2210       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2211                                    MVT::i32);
2212       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2213       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2214       return true;
2215     }
2216   }
2217
2218   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2219   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2220     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2221     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2222   } else
2223     Base = N;
2224   return true;      // [r+0]
2225 }
2226
2227 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2228 /// represented as an indexed [r+r] operation.
2229 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2230                                                 SDValue &Index,
2231                                                 SelectionDAG &DAG) const {
2232   // Check to see if we can easily represent this as an [r+r] address.  This
2233   // will fail if it thinks that the address is more profitably represented as
2234   // reg+imm, e.g. where imm = 0.
2235   if (SelectAddressRegReg(N, Base, Index, DAG))
2236     return true;
2237
2238   // If the operand is an addition, always emit this as [r+r], since this is
2239   // better (for code size, and execution, as the memop does the add for free)
2240   // than emitting an explicit add.
2241   if (N.getOpcode() == ISD::ADD) {
2242     Base = N.getOperand(0);
2243     Index = N.getOperand(1);
2244     return true;
2245   }
2246
2247   // Otherwise, do it the hard way, using R0 as the base register.
2248   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2249                          N.getValueType());
2250   Index = N;
2251   return true;
2252 }
2253
2254 /// getPreIndexedAddressParts - returns true by value, base pointer and
2255 /// offset pointer and addressing mode by reference if the node's address
2256 /// can be legally represented as pre-indexed load / store address.
2257 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2258                                                   SDValue &Offset,
2259                                                   ISD::MemIndexedMode &AM,
2260                                                   SelectionDAG &DAG) const {
2261   if (DisablePPCPreinc) return false;
2262
2263   bool isLoad = true;
2264   SDValue Ptr;
2265   EVT VT;
2266   unsigned Alignment;
2267   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2268     Ptr = LD->getBasePtr();
2269     VT = LD->getMemoryVT();
2270     Alignment = LD->getAlignment();
2271   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2272     Ptr = ST->getBasePtr();
2273     VT  = ST->getMemoryVT();
2274     Alignment = ST->getAlignment();
2275     isLoad = false;
2276   } else
2277     return false;
2278
2279   // PowerPC doesn't have preinc load/store instructions for vectors (except
2280   // for QPX, which does have preinc r+r forms).
2281   if (VT.isVector()) {
2282     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2283       return false;
2284     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2285       AM = ISD::PRE_INC;
2286       return true;
2287     }
2288   }
2289
2290   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2291     // Common code will reject creating a pre-inc form if the base pointer
2292     // is a frame index, or if N is a store and the base pointer is either
2293     // the same as or a predecessor of the value being stored.  Check for
2294     // those situations here, and try with swapped Base/Offset instead.
2295     bool Swap = false;
2296
2297     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2298       Swap = true;
2299     else if (!isLoad) {
2300       SDValue Val = cast<StoreSDNode>(N)->getValue();
2301       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2302         Swap = true;
2303     }
2304
2305     if (Swap)
2306       std::swap(Base, Offset);
2307
2308     AM = ISD::PRE_INC;
2309     return true;
2310   }
2311
2312   // LDU/STU can only handle immediates that are a multiple of 4.
2313   if (VT != MVT::i64) {
2314     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
2315       return false;
2316   } else {
2317     // LDU/STU need an address with at least 4-byte alignment.
2318     if (Alignment < 4)
2319       return false;
2320
2321     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
2322       return false;
2323   }
2324
2325   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2326     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2327     // sext i32 to i64 when addr mode is r+i.
2328     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2329         LD->getExtensionType() == ISD::SEXTLOAD &&
2330         isa<ConstantSDNode>(Offset))
2331       return false;
2332   }
2333
2334   AM = ISD::PRE_INC;
2335   return true;
2336 }
2337
2338 //===----------------------------------------------------------------------===//
2339 //  LowerOperation implementation
2340 //===----------------------------------------------------------------------===//
2341
2342 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2343 /// and LoOpFlags to the target MO flags.
2344 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2345                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2346                                const GlobalValue *GV = nullptr) {
2347   HiOpFlags = PPCII::MO_HA;
2348   LoOpFlags = PPCII::MO_LO;
2349
2350   // Don't use the pic base if not in PIC relocation model.
2351   if (IsPIC) {
2352     HiOpFlags |= PPCII::MO_PIC_FLAG;
2353     LoOpFlags |= PPCII::MO_PIC_FLAG;
2354   }
2355
2356   // If this is a reference to a global value that requires a non-lazy-ptr, make
2357   // sure that instruction lowering adds it.
2358   if (GV && Subtarget.hasLazyResolverStub(GV)) {
2359     HiOpFlags |= PPCII::MO_NLP_FLAG;
2360     LoOpFlags |= PPCII::MO_NLP_FLAG;
2361
2362     if (GV->hasHiddenVisibility()) {
2363       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2364       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2365     }
2366   }
2367 }
2368
2369 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2370                              SelectionDAG &DAG) {
2371   SDLoc DL(HiPart);
2372   EVT PtrVT = HiPart.getValueType();
2373   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2374
2375   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2376   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2377
2378   // With PIC, the first instruction is actually "GR+hi(&G)".
2379   if (isPIC)
2380     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2381                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2382
2383   // Generate non-pic code that has direct accesses to the constant pool.
2384   // The address of the global is just (hi(&g)+lo(&g)).
2385   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2386 }
2387
2388 static void setUsesTOCBasePtr(MachineFunction &MF) {
2389   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2390   FuncInfo->setUsesTOCBasePtr();
2391 }
2392
2393 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2394   setUsesTOCBasePtr(DAG.getMachineFunction());
2395 }
2396
2397 static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2398                            SDValue GA) {
2399   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2400   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2401                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2402
2403   SDValue Ops[] = { GA, Reg };
2404   return DAG.getMemIntrinsicNode(
2405       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2406       MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
2407       false, 0);
2408 }
2409
2410 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2411                                              SelectionDAG &DAG) const {
2412   EVT PtrVT = Op.getValueType();
2413   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2414   const Constant *C = CP->getConstVal();
2415
2416   // 64-bit SVR4 ABI code is always position-independent.
2417   // The actual address of the GlobalValue is stored in the TOC.
2418   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2419     setUsesTOCBasePtr(DAG);
2420     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2421     return getTOCEntry(DAG, SDLoc(CP), true, GA);
2422   }
2423
2424   unsigned MOHiFlag, MOLoFlag;
2425   bool IsPIC = isPositionIndependent();
2426   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2427
2428   if (IsPIC && Subtarget.isSVR4ABI()) {
2429     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2430                                            PPCII::MO_PIC_FLAG);
2431     return getTOCEntry(DAG, SDLoc(CP), false, GA);
2432   }
2433
2434   SDValue CPIHi =
2435     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2436   SDValue CPILo =
2437     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2438   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2439 }
2440
2441 // For 64-bit PowerPC, prefer the more compact relative encodings.
2442 // This trades 32 bits per jump table entry for one or two instructions
2443 // on the jump site.
2444 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2445   if (isJumpTableRelative())
2446     return MachineJumpTableInfo::EK_LabelDifference32;
2447
2448   return TargetLowering::getJumpTableEncoding();
2449 }
2450
2451 bool PPCTargetLowering::isJumpTableRelative() const {
2452   if (Subtarget.isPPC64())
2453     return true;
2454   return TargetLowering::isJumpTableRelative();
2455 }
2456
2457 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2458                                                     SelectionDAG &DAG) const {
2459   if (!Subtarget.isPPC64())
2460     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2461
2462   switch (getTargetMachine().getCodeModel()) {
2463   case CodeModel::Default:
2464   case CodeModel::Small:
2465   case CodeModel::Medium:
2466     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2467   default:
2468     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2469                        getPointerTy(DAG.getDataLayout()));
2470   }
2471 }
2472
2473 const MCExpr *
2474 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2475                                                 unsigned JTI,
2476                                                 MCContext &Ctx) const {
2477   if (!Subtarget.isPPC64())
2478     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2479
2480   switch (getTargetMachine().getCodeModel()) {
2481   case CodeModel::Default:
2482   case CodeModel::Small:
2483   case CodeModel::Medium:
2484     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2485   default:
2486     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2487   }
2488 }
2489
2490 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2491   EVT PtrVT = Op.getValueType();
2492   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2493
2494   // 64-bit SVR4 ABI code is always position-independent.
2495   // The actual address of the GlobalValue is stored in the TOC.
2496   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2497     setUsesTOCBasePtr(DAG);
2498     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2499     return getTOCEntry(DAG, SDLoc(JT), true, GA);
2500   }
2501
2502   unsigned MOHiFlag, MOLoFlag;
2503   bool IsPIC = isPositionIndependent();
2504   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2505
2506   if (IsPIC && Subtarget.isSVR4ABI()) {
2507     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2508                                         PPCII::MO_PIC_FLAG);
2509     return getTOCEntry(DAG, SDLoc(GA), false, GA);
2510   }
2511
2512   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2513   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2514   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2515 }
2516
2517 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2518                                              SelectionDAG &DAG) const {
2519   EVT PtrVT = Op.getValueType();
2520   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2521   const BlockAddress *BA = BASDN->getBlockAddress();
2522
2523   // 64-bit SVR4 ABI code is always position-independent.
2524   // The actual BlockAddress is stored in the TOC.
2525   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2526     setUsesTOCBasePtr(DAG);
2527     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2528     return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
2529   }
2530
2531   unsigned MOHiFlag, MOLoFlag;
2532   bool IsPIC = isPositionIndependent();
2533   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2534   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2535   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2536   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2537 }
2538
2539 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2540                                               SelectionDAG &DAG) const {
2541   // FIXME: TLS addresses currently use medium model code sequences,
2542   // which is the most useful form.  Eventually support for small and
2543   // large models could be added if users need it, at the cost of
2544   // additional complexity.
2545   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2546   if (DAG.getTarget().Options.EmulatedTLS)
2547     return LowerToTLSEmulatedModel(GA, DAG);
2548
2549   SDLoc dl(GA);
2550   const GlobalValue *GV = GA->getGlobal();
2551   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2552   bool is64bit = Subtarget.isPPC64();
2553   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
2554   PICLevel::Level picLevel = M->getPICLevel();
2555
2556   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
2557
2558   if (Model == TLSModel::LocalExec) {
2559     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2560                                                PPCII::MO_TPREL_HA);
2561     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2562                                                PPCII::MO_TPREL_LO);
2563     SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
2564                                      is64bit ? MVT::i64 : MVT::i32);
2565     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2566     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2567   }
2568
2569   if (Model == TLSModel::InitialExec) {
2570     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2571     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2572                                                 PPCII::MO_TLS);
2573     SDValue GOTPtr;
2574     if (is64bit) {
2575       setUsesTOCBasePtr(DAG);
2576       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2577       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2578                            PtrVT, GOTReg, TGA);
2579     } else
2580       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2581     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2582                                    PtrVT, TGA, GOTPtr);
2583     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2584   }
2585
2586   if (Model == TLSModel::GeneralDynamic) {
2587     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2588     SDValue GOTPtr;
2589     if (is64bit) {
2590       setUsesTOCBasePtr(DAG);
2591       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2592       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2593                                    GOTReg, TGA);
2594     } else {
2595       if (picLevel == PICLevel::SmallPIC)
2596         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2597       else
2598         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2599     }
2600     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2601                        GOTPtr, TGA, TGA);
2602   }
2603
2604   if (Model == TLSModel::LocalDynamic) {
2605     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2606     SDValue GOTPtr;
2607     if (is64bit) {
2608       setUsesTOCBasePtr(DAG);
2609       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2610       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2611                            GOTReg, TGA);
2612     } else {
2613       if (picLevel == PICLevel::SmallPIC)
2614         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2615       else
2616         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2617     }
2618     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2619                                   PtrVT, GOTPtr, TGA, TGA);
2620     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2621                                       PtrVT, TLSAddr, TGA);
2622     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2623   }
2624
2625   llvm_unreachable("Unknown TLS model!");
2626 }
2627
2628 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2629                                               SelectionDAG &DAG) const {
2630   EVT PtrVT = Op.getValueType();
2631   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2632   SDLoc DL(GSDN);
2633   const GlobalValue *GV = GSDN->getGlobal();
2634
2635   // 64-bit SVR4 ABI code is always position-independent.
2636   // The actual address of the GlobalValue is stored in the TOC.
2637   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2638     setUsesTOCBasePtr(DAG);
2639     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2640     return getTOCEntry(DAG, DL, true, GA);
2641   }
2642
2643   unsigned MOHiFlag, MOLoFlag;
2644   bool IsPIC = isPositionIndependent();
2645   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2646
2647   if (IsPIC && Subtarget.isSVR4ABI()) {
2648     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2649                                             GSDN->getOffset(),
2650                                             PPCII::MO_PIC_FLAG);
2651     return getTOCEntry(DAG, DL, false, GA);
2652   }
2653
2654   SDValue GAHi =
2655     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2656   SDValue GALo =
2657     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2658
2659   SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2660
2661   // If the global reference is actually to a non-lazy-pointer, we have to do an
2662   // extra load to get the address of the global.
2663   if (MOHiFlag & PPCII::MO_NLP_FLAG)
2664     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2665   return Ptr;
2666 }
2667
2668 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2669   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2670   SDLoc dl(Op);
2671
2672   if (Op.getValueType() == MVT::v2i64) {
2673     // When the operands themselves are v2i64 values, we need to do something
2674     // special because VSX has no underlying comparison operations for these.
2675     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2676       // Equality can be handled by casting to the legal type for Altivec
2677       // comparisons, everything else needs to be expanded.
2678       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2679         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2680                  DAG.getSetCC(dl, MVT::v4i32,
2681                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2682                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2683                    CC));
2684       }
2685
2686       return SDValue();
2687     }
2688
2689     // We handle most of these in the usual way.
2690     return Op;
2691   }
2692
2693   // If we're comparing for equality to zero, expose the fact that this is
2694   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2695   // fold the new nodes.
2696   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2697     return V;
2698
2699   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2700     // Leave comparisons against 0 and -1 alone for now, since they're usually
2701     // optimized.  FIXME: revisit this when we can custom lower all setcc
2702     // optimizations.
2703     if (C->isAllOnesValue() || C->isNullValue())
2704       return SDValue();
2705   }
2706
2707   // If we have an integer seteq/setne, turn it into a compare against zero
2708   // by xor'ing the rhs with the lhs, which is faster than setting a
2709   // condition register, reading it back out, and masking the correct bit.  The
2710   // normal approach here uses sub to do this instead of xor.  Using xor exposes
2711   // the result to other bit-twiddling opportunities.
2712   EVT LHSVT = Op.getOperand(0).getValueType();
2713   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2714     EVT VT = Op.getValueType();
2715     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2716                                 Op.getOperand(1));
2717     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2718   }
2719   return SDValue();
2720 }
2721
2722 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2723   SDNode *Node = Op.getNode();
2724   EVT VT = Node->getValueType(0);
2725   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2726   SDValue InChain = Node->getOperand(0);
2727   SDValue VAListPtr = Node->getOperand(1);
2728   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2729   SDLoc dl(Node);
2730
2731   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2732
2733   // gpr_index
2734   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2735                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
2736   InChain = GprIndex.getValue(1);
2737
2738   if (VT == MVT::i64) {
2739     // Check if GprIndex is even
2740     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2741                                  DAG.getConstant(1, dl, MVT::i32));
2742     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2743                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2744     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2745                                           DAG.getConstant(1, dl, MVT::i32));
2746     // Align GprIndex to be even if it isn't
2747     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2748                            GprIndex);
2749   }
2750
2751   // fpr index is 1 byte after gpr
2752   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2753                                DAG.getConstant(1, dl, MVT::i32));
2754
2755   // fpr
2756   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2757                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
2758   InChain = FprIndex.getValue(1);
2759
2760   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2761                                        DAG.getConstant(8, dl, MVT::i32));
2762
2763   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2764                                         DAG.getConstant(4, dl, MVT::i32));
2765
2766   // areas
2767   SDValue OverflowArea =
2768       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
2769   InChain = OverflowArea.getValue(1);
2770
2771   SDValue RegSaveArea =
2772       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
2773   InChain = RegSaveArea.getValue(1);
2774
2775   // select overflow_area if index > 8
2776   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2777                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2778
2779   // adjustment constant gpr_index * 4/8
2780   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2781                                     VT.isInteger() ? GprIndex : FprIndex,
2782                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2783                                                     MVT::i32));
2784
2785   // OurReg = RegSaveArea + RegConstant
2786   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2787                                RegConstant);
2788
2789   // Floating types are 32 bytes into RegSaveArea
2790   if (VT.isFloatingPoint())
2791     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2792                          DAG.getConstant(32, dl, MVT::i32));
2793
2794   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2795   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2796                                    VT.isInteger() ? GprIndex : FprIndex,
2797                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2798                                                    MVT::i32));
2799
2800   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2801                               VT.isInteger() ? VAListPtr : FprPtr,
2802                               MachinePointerInfo(SV), MVT::i8);
2803
2804   // determine if we should load from reg_save_area or overflow_area
2805   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2806
2807   // increase overflow_area by 4/8 if gpr/fpr > 8
2808   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2809                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
2810                                           dl, MVT::i32));
2811
2812   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2813                              OverflowAreaPlusN);
2814
2815   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
2816                               MachinePointerInfo(), MVT::i32);
2817
2818   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
2819 }
2820
2821 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
2822   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2823
2824   // We have to copy the entire va_list struct:
2825   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2826   return DAG.getMemcpy(Op.getOperand(0), Op,
2827                        Op.getOperand(1), Op.getOperand(2),
2828                        DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2829                        false, MachinePointerInfo(), MachinePointerInfo());
2830 }
2831
2832 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2833                                                   SelectionDAG &DAG) const {
2834   return Op.getOperand(0);
2835 }
2836
2837 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2838                                                 SelectionDAG &DAG) const {
2839   SDValue Chain = Op.getOperand(0);
2840   SDValue Trmp = Op.getOperand(1); // trampoline
2841   SDValue FPtr = Op.getOperand(2); // nested function
2842   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2843   SDLoc dl(Op);
2844
2845   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2846   bool isPPC64 = (PtrVT == MVT::i64);
2847   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
2848
2849   TargetLowering::ArgListTy Args;
2850   TargetLowering::ArgListEntry Entry;
2851
2852   Entry.Ty = IntPtrTy;
2853   Entry.Node = Trmp; Args.push_back(Entry);
2854
2855   // TrampSize == (isPPC64 ? 48 : 40);
2856   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
2857                                isPPC64 ? MVT::i64 : MVT::i32);
2858   Args.push_back(Entry);
2859
2860   Entry.Node = FPtr; Args.push_back(Entry);
2861   Entry.Node = Nest; Args.push_back(Entry);
2862
2863   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2864   TargetLowering::CallLoweringInfo CLI(DAG);
2865   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2866       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2867       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
2868
2869   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2870   return CallResult.second;
2871 }
2872
2873 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2874   MachineFunction &MF = DAG.getMachineFunction();
2875   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2876   EVT PtrVT = getPointerTy(MF.getDataLayout());
2877
2878   SDLoc dl(Op);
2879
2880   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2881     // vastart just stores the address of the VarArgsFrameIndex slot into the
2882     // memory location argument.
2883     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2884     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2885     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2886                         MachinePointerInfo(SV));
2887   }
2888
2889   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2890   // We suppose the given va_list is already allocated.
2891   //
2892   // typedef struct {
2893   //  char gpr;     /* index into the array of 8 GPRs
2894   //                 * stored in the register save area
2895   //                 * gpr=0 corresponds to r3,
2896   //                 * gpr=1 to r4, etc.
2897   //                 */
2898   //  char fpr;     /* index into the array of 8 FPRs
2899   //                 * stored in the register save area
2900   //                 * fpr=0 corresponds to f1,
2901   //                 * fpr=1 to f2, etc.
2902   //                 */
2903   //  char *overflow_arg_area;
2904   //                /* location on stack that holds
2905   //                 * the next overflow argument
2906   //                 */
2907   //  char *reg_save_area;
2908   //               /* where r3:r10 and f1:f8 (if saved)
2909   //                * are stored
2910   //                */
2911   // } va_list[1];
2912
2913   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
2914   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
2915   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2916                                             PtrVT);
2917   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2918                                  PtrVT);
2919
2920   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2921   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
2922
2923   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2924   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
2925
2926   uint64_t FPROffset = 1;
2927   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
2928
2929   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2930
2931   // Store first byte : number of int regs
2932   SDValue firstStore =
2933       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
2934                         MachinePointerInfo(SV), MVT::i8);
2935   uint64_t nextOffset = FPROffset;
2936   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2937                                   ConstFPROffset);
2938
2939   // Store second byte : number of float regs
2940   SDValue secondStore =
2941       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2942                         MachinePointerInfo(SV, nextOffset), MVT::i8);
2943   nextOffset += StackOffset;
2944   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2945
2946   // Store second word : arguments given on stack
2947   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2948                                     MachinePointerInfo(SV, nextOffset));
2949   nextOffset += FrameOffset;
2950   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2951
2952   // Store third word : arguments given in registers
2953   return DAG.getStore(thirdStore, dl, FR, nextPtr,
2954                       MachinePointerInfo(SV, nextOffset));
2955 }
2956
2957 #include "PPCGenCallingConv.inc"
2958
2959 // Function whose sole purpose is to kill compiler warnings
2960 // stemming from unused functions included from PPCGenCallingConv.inc.
2961 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2962   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2963 }
2964
2965 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2966                                       CCValAssign::LocInfo &LocInfo,
2967                                       ISD::ArgFlagsTy &ArgFlags,
2968                                       CCState &State) {
2969   return true;
2970 }
2971
2972 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2973                                              MVT &LocVT,
2974                                              CCValAssign::LocInfo &LocInfo,
2975                                              ISD::ArgFlagsTy &ArgFlags,
2976                                              CCState &State) {
2977   static const MCPhysReg ArgRegs[] = {
2978     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2979     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2980   };
2981   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2982
2983   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2984
2985   // Skip one register if the first unallocated register has an even register
2986   // number and there are still argument registers available which have not been
2987   // allocated yet. RegNum is actually an index into ArgRegs, which means we
2988   // need to skip a register if RegNum is odd.
2989   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2990     State.AllocateReg(ArgRegs[RegNum]);
2991   }
2992
2993   // Always return false here, as this function only makes sure that the first
2994   // unallocated register has an odd register number and does not actually
2995   // allocate a register for the current argument.
2996   return false;
2997 }
2998
2999 bool
3000 llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
3001                                                   MVT &LocVT,
3002                                                   CCValAssign::LocInfo &LocInfo,
3003                                                   ISD::ArgFlagsTy &ArgFlags,
3004                                                   CCState &State) {
3005   static const MCPhysReg ArgRegs[] = {
3006     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3007     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3008   };
3009   const unsigned NumArgRegs = array_lengthof(ArgRegs);
3010
3011   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3012   int RegsLeft = NumArgRegs - RegNum;
3013
3014   // Skip if there is not enough registers left for long double type (4 gpr regs
3015   // in soft float mode) and put long double argument on the stack.
3016   if (RegNum != NumArgRegs && RegsLeft < 4) {
3017     for (int i = 0; i < RegsLeft; i++) {
3018       State.AllocateReg(ArgRegs[RegNum + i]);
3019     }
3020   }
3021
3022   return false;
3023 }
3024
3025 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
3026                                                MVT &LocVT,
3027                                                CCValAssign::LocInfo &LocInfo,
3028                                                ISD::ArgFlagsTy &ArgFlags,
3029                                                CCState &State) {
3030   static const MCPhysReg ArgRegs[] = {
3031     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3032     PPC::F8
3033   };
3034
3035   const unsigned NumArgRegs = array_lengthof(ArgRegs);
3036
3037   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
3038
3039   // If there is only one Floating-point register left we need to put both f64
3040   // values of a split ppc_fp128 value on the stack.
3041   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
3042     State.AllocateReg(ArgRegs[RegNum]);
3043   }
3044
3045   // Always return false here, as this function only makes sure that the two f64
3046   // values a ppc_fp128 value is split into are both passed in registers or both
3047   // passed on the stack and does not actually allocate a register for the
3048   // current argument.
3049   return false;
3050 }
3051
3052 /// FPR - The set of FP registers that should be allocated for arguments,
3053 /// on Darwin.
3054 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3055                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3056                                 PPC::F11, PPC::F12, PPC::F13};
3057
3058 /// QFPR - The set of QPX registers that should be allocated for arguments.
3059 static const MCPhysReg QFPR[] = {
3060     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
3061     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3062
3063 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3064 /// the stack.
3065 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3066                                        unsigned PtrByteSize) {
3067   unsigned ArgSize = ArgVT.getStoreSize();
3068   if (Flags.isByVal())
3069     ArgSize = Flags.getByValSize();
3070
3071   // Round up to multiples of the pointer size, except for array members,
3072   // which are always packed.
3073   if (!Flags.isInConsecutiveRegs())
3074     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3075
3076   return ArgSize;
3077 }
3078
3079 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3080 /// on the stack.
3081 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3082                                             ISD::ArgFlagsTy Flags,
3083                                             unsigned PtrByteSize) {
3084   unsigned Align = PtrByteSize;
3085
3086   // Altivec parameters are padded to a 16 byte boundary.
3087   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3088       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3089       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3090       ArgVT == MVT::v1i128)
3091     Align = 16;
3092   // QPX vector types stored in double-precision are padded to a 32 byte
3093   // boundary.
3094   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3095     Align = 32;
3096
3097   // ByVal parameters are aligned as requested.
3098   if (Flags.isByVal()) {
3099     unsigned BVAlign = Flags.getByValAlign();
3100     if (BVAlign > PtrByteSize) {
3101       if (BVAlign % PtrByteSize != 0)
3102           llvm_unreachable(
3103             "ByVal alignment is not a multiple of the pointer size");
3104
3105       Align = BVAlign;
3106     }
3107   }
3108
3109   // Array members are always packed to their original alignment.
3110   if (Flags.isInConsecutiveRegs()) {
3111     // If the array member was split into multiple registers, the first
3112     // needs to be aligned to the size of the full type.  (Except for
3113     // ppcf128, which is only aligned as its f64 components.)
3114     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3115       Align = OrigVT.getStoreSize();
3116     else
3117       Align = ArgVT.getStoreSize();
3118   }
3119
3120   return Align;
3121 }
3122
3123 /// CalculateStackSlotUsed - Return whether this argument will use its
3124 /// stack slot (instead of being passed in registers).  ArgOffset,
3125 /// AvailableFPRs, and AvailableVRs must hold the current argument
3126 /// position, and will be updated to account for this argument.
3127 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3128                                    ISD::ArgFlagsTy Flags,
3129                                    unsigned PtrByteSize,
3130                                    unsigned LinkageSize,
3131                                    unsigned ParamAreaSize,
3132                                    unsigned &ArgOffset,
3133                                    unsigned &AvailableFPRs,
3134                                    unsigned &AvailableVRs, bool HasQPX) {
3135   bool UseMemory = false;
3136
3137   // Respect alignment of argument on the stack.
3138   unsigned Align =
3139     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3140   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3141   // If there's no space left in the argument save area, we must
3142   // use memory (this check also catches zero-sized arguments).
3143   if (ArgOffset >= LinkageSize + ParamAreaSize)
3144     UseMemory = true;
3145
3146   // Allocate argument on the stack.
3147   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3148   if (Flags.isInConsecutiveRegsLast())
3149     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3150   // If we overran the argument save area, we must use memory
3151   // (this check catches arguments passed partially in memory)
3152   if (ArgOffset > LinkageSize + ParamAreaSize)
3153     UseMemory = true;
3154
3155   // However, if the argument is actually passed in an FPR or a VR,
3156   // we don't use memory after all.
3157   if (!Flags.isByVal()) {
3158     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3159         // QPX registers overlap with the scalar FP registers.
3160         (HasQPX && (ArgVT == MVT::v4f32 ||
3161                     ArgVT == MVT::v4f64 ||
3162                     ArgVT == MVT::v4i1)))
3163       if (AvailableFPRs > 0) {
3164         --AvailableFPRs;
3165         return false;
3166       }
3167     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3168         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3169         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3170         ArgVT == MVT::v1i128)
3171       if (AvailableVRs > 0) {
3172         --AvailableVRs;
3173         return false;
3174       }
3175   }
3176
3177   return UseMemory;
3178 }
3179
3180 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3181 /// ensure minimum alignment required for target.
3182 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3183                                      unsigned NumBytes) {
3184   unsigned TargetAlign = Lowering->getStackAlignment();
3185   unsigned AlignMask = TargetAlign - 1;
3186   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3187   return NumBytes;
3188 }
3189
3190 SDValue PPCTargetLowering::LowerFormalArguments(
3191     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3192     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3193     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3194   if (Subtarget.isSVR4ABI()) {
3195     if (Subtarget.isPPC64())
3196       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
3197                                          dl, DAG, InVals);
3198     else
3199       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
3200                                          dl, DAG, InVals);
3201   } else {
3202     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
3203                                        dl, DAG, InVals);
3204   }
3205 }
3206
3207 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3208     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3209     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3210     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3211
3212   // 32-bit SVR4 ABI Stack Frame Layout:
3213   //              +-----------------------------------+
3214   //        +-->  |            Back chain             |
3215   //        |     +-----------------------------------+
3216   //        |     | Floating-point register save area |
3217   //        |     +-----------------------------------+
3218   //        |     |    General register save area     |
3219   //        |     +-----------------------------------+
3220   //        |     |          CR save word             |
3221   //        |     +-----------------------------------+
3222   //        |     |         VRSAVE save word          |
3223   //        |     +-----------------------------------+
3224   //        |     |         Alignment padding         |
3225   //        |     +-----------------------------------+
3226   //        |     |     Vector register save area     |
3227   //        |     +-----------------------------------+
3228   //        |     |       Local variable space        |
3229   //        |     +-----------------------------------+
3230   //        |     |        Parameter list area        |
3231   //        |     +-----------------------------------+
3232   //        |     |           LR save word            |
3233   //        |     +-----------------------------------+
3234   // SP-->  +---  |            Back chain             |
3235   //              +-----------------------------------+
3236   //
3237   // Specifications:
3238   //   System V Application Binary Interface PowerPC Processor Supplement
3239   //   AltiVec Technology Programming Interface Manual
3240
3241   MachineFunction &MF = DAG.getMachineFunction();
3242   MachineFrameInfo &MFI = MF.getFrameInfo();
3243   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3244
3245   EVT PtrVT = getPointerTy(MF.getDataLayout());
3246   // Potential tail calls could cause overwriting of argument stack slots.
3247   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3248                        (CallConv == CallingConv::Fast));
3249   unsigned PtrByteSize = 4;
3250
3251   // Assign locations to all of the incoming arguments.
3252   SmallVector<CCValAssign, 16> ArgLocs;
3253   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3254                  *DAG.getContext());
3255
3256   // Reserve space for the linkage area on the stack.
3257   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3258   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3259   if (useSoftFloat())
3260     CCInfo.PreAnalyzeFormalArguments(Ins);
3261
3262   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3263   CCInfo.clearWasPPCF128();
3264
3265   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3266     CCValAssign &VA = ArgLocs[i];
3267
3268     // Arguments stored in registers.
3269     if (VA.isRegLoc()) {
3270       const TargetRegisterClass *RC;
3271       EVT ValVT = VA.getValVT();
3272
3273       switch (ValVT.getSimpleVT().SimpleTy) {
3274         default:
3275           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3276         case MVT::i1:
3277         case MVT::i32:
3278           RC = &PPC::GPRCRegClass;
3279           break;
3280         case MVT::f32:
3281           if (Subtarget.hasP8Vector())
3282             RC = &PPC::VSSRCRegClass;
3283           else
3284             RC = &PPC::F4RCRegClass;
3285           break;
3286         case MVT::f64:
3287           if (Subtarget.hasVSX())
3288             RC = &PPC::VSFRCRegClass;
3289           else
3290             RC = &PPC::F8RCRegClass;
3291           break;
3292         case MVT::v16i8:
3293         case MVT::v8i16:
3294         case MVT::v4i32:
3295           RC = &PPC::VRRCRegClass;
3296           break;
3297         case MVT::v4f32:
3298           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3299           break;
3300         case MVT::v2f64:
3301         case MVT::v2i64:
3302           RC = &PPC::VRRCRegClass;
3303           break;
3304         case MVT::v4f64:
3305           RC = &PPC::QFRCRegClass;
3306           break;
3307         case MVT::v4i1:
3308           RC = &PPC::QBRCRegClass;
3309           break;
3310       }
3311
3312       // Transform the arguments stored in physical registers into virtual ones.
3313       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3314       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3315                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
3316
3317       if (ValVT == MVT::i1)
3318         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3319
3320       InVals.push_back(ArgValue);
3321     } else {
3322       // Argument stored in memory.
3323       assert(VA.isMemLoc());
3324
3325       unsigned ArgSize = VA.getLocVT().getStoreSize();
3326       int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
3327                                      isImmutable);
3328
3329       // Create load nodes to retrieve arguments from the stack.
3330       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3331       InVals.push_back(
3332           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3333     }
3334   }
3335
3336   // Assign locations to all of the incoming aggregate by value arguments.
3337   // Aggregates passed by value are stored in the local variable space of the
3338   // caller's stack frame, right above the parameter list area.
3339   SmallVector<CCValAssign, 16> ByValArgLocs;
3340   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3341                       ByValArgLocs, *DAG.getContext());
3342
3343   // Reserve stack space for the allocations in CCInfo.
3344   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3345
3346   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3347
3348   // Area that is at least reserved in the caller of this function.
3349   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3350   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3351
3352   // Set the size that is at least reserved in caller of this function.  Tail
3353   // call optimized function's reserved stack space needs to be aligned so that
3354   // taking the difference between two stack areas will result in an aligned
3355   // stack.
3356   MinReservedArea =
3357       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3358   FuncInfo->setMinReservedArea(MinReservedArea);
3359
3360   SmallVector<SDValue, 8> MemOps;
3361
3362   // If the function takes variable number of arguments, make a frame index for
3363   // the start of the first vararg value... for expansion of llvm.va_start.
3364   if (isVarArg) {
3365     static const MCPhysReg GPArgRegs[] = {
3366       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3367       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3368     };
3369     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3370
3371     static const MCPhysReg FPArgRegs[] = {
3372       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3373       PPC::F8
3374     };
3375     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3376
3377     if (useSoftFloat())
3378        NumFPArgRegs = 0;
3379
3380     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3381     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3382
3383     // Make room for NumGPArgRegs and NumFPArgRegs.
3384     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3385                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3386
3387     FuncInfo->setVarArgsStackOffset(
3388       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3389                             CCInfo.getNextStackOffset(), true));
3390
3391     FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3392     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3393
3394     // The fixed integer arguments of a variadic function are stored to the
3395     // VarArgsFrameIndex on the stack so that they may be loaded by
3396     // dereferencing the result of va_next.
3397     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3398       // Get an existing live-in vreg, or add a new one.
3399       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3400       if (!VReg)
3401         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3402
3403       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3404       SDValue Store =
3405           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3406       MemOps.push_back(Store);
3407       // Increment the address by four for the next argument to store
3408       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3409       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3410     }
3411
3412     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3413     // is set.
3414     // The double arguments are stored to the VarArgsFrameIndex
3415     // on the stack.
3416     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3417       // Get an existing live-in vreg, or add a new one.
3418       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3419       if (!VReg)
3420         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3421
3422       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3423       SDValue Store =
3424           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3425       MemOps.push_back(Store);
3426       // Increment the address by eight for the next argument to store
3427       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3428                                          PtrVT);
3429       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3430     }
3431   }
3432
3433   if (!MemOps.empty())
3434     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3435
3436   return Chain;
3437 }
3438
3439 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3440 // value to MVT::i64 and then truncate to the correct register size.
3441 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3442                                              EVT ObjectVT, SelectionDAG &DAG,
3443                                              SDValue ArgVal,
3444                                              const SDLoc &dl) const {
3445   if (Flags.isSExt())
3446     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3447                          DAG.getValueType(ObjectVT));
3448   else if (Flags.isZExt())
3449     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3450                          DAG.getValueType(ObjectVT));
3451
3452   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3453 }
3454
3455 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3456     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3457     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3458     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3459   // TODO: add description of PPC stack frame format, or at least some docs.
3460   //
3461   bool isELFv2ABI = Subtarget.isELFv2ABI();
3462   bool isLittleEndian = Subtarget.isLittleEndian();
3463   MachineFunction &MF = DAG.getMachineFunction();
3464   MachineFrameInfo &MFI = MF.getFrameInfo();
3465   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3466
3467   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3468          "fastcc not supported on varargs functions");
3469
3470   EVT PtrVT = getPointerTy(MF.getDataLayout());
3471   // Potential tail calls could cause overwriting of argument stack slots.
3472   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3473                        (CallConv == CallingConv::Fast));
3474   unsigned PtrByteSize = 8;
3475   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3476
3477   static const MCPhysReg GPR[] = {
3478     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3479     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3480   };
3481   static const MCPhysReg VR[] = {
3482     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3483     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3484   };
3485
3486   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3487   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3488   const unsigned Num_VR_Regs  = array_lengthof(VR);
3489   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3490
3491   // Do a first pass over the arguments to determine whether the ABI
3492   // guarantees that our caller has allocated the parameter save area
3493   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3494   // in the ELFv2 ABI, it is true if this is a vararg function or if
3495   // any parameter is located in a stack slot.
3496
3497   bool HasParameterArea = !isELFv2ABI || isVarArg;
3498   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3499   unsigned NumBytes = LinkageSize;
3500   unsigned AvailableFPRs = Num_FPR_Regs;
3501   unsigned AvailableVRs = Num_VR_Regs;
3502   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3503     if (Ins[i].Flags.isNest())
3504       continue;
3505
3506     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3507                                PtrByteSize, LinkageSize, ParamAreaSize,
3508                                NumBytes, AvailableFPRs, AvailableVRs,
3509                                Subtarget.hasQPX()))
3510       HasParameterArea = true;
3511   }
3512
3513   // Add DAG nodes to load the arguments or copy them out of registers.  On
3514   // entry to a function on PPC, the arguments start after the linkage area,
3515   // although the first ones are often in registers.
3516
3517   unsigned ArgOffset = LinkageSize;
3518   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3519   unsigned &QFPR_idx = FPR_idx;
3520   SmallVector<SDValue, 8> MemOps;
3521   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3522   unsigned CurArgIdx = 0;
3523   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3524     SDValue ArgVal;
3525     bool needsLoad = false;
3526     EVT ObjectVT = Ins[ArgNo].VT;
3527     EVT OrigVT = Ins[ArgNo].ArgVT;
3528     unsigned ObjSize = ObjectVT.getStoreSize();
3529     unsigned ArgSize = ObjSize;
3530     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3531     if (Ins[ArgNo].isOrigArg()) {
3532       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3533       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3534     }
3535     // We re-align the argument offset for each argument, except when using the
3536     // fast calling convention, when we need to make sure we do that only when
3537     // we'll actually use a stack slot.
3538     unsigned CurArgOffset, Align;
3539     auto ComputeArgOffset = [&]() {
3540       /* Respect alignment of argument on the stack.  */
3541       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3542       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3543       CurArgOffset = ArgOffset;
3544     };
3545
3546     if (CallConv != CallingConv::Fast) {
3547       ComputeArgOffset();
3548
3549       /* Compute GPR index associated with argument offset.  */
3550       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3551       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3552     }
3553
3554     // FIXME the codegen can be much improved in some cases.
3555     // We do not have to keep everything in memory.
3556     if (Flags.isByVal()) {
3557       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3558
3559       if (CallConv == CallingConv::Fast)
3560         ComputeArgOffset();
3561
3562       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3563       ObjSize = Flags.getByValSize();
3564       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3565       // Empty aggregate parameters do not take up registers.  Examples:
3566       //   struct { } a;
3567       //   union  { } b;
3568       //   int c[0];
3569       // etc.  However, we have to provide a place-holder in InVals, so
3570       // pretend we have an 8-byte item at the current address for that
3571       // purpose.
3572       if (!ObjSize) {
3573         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3574         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3575         InVals.push_back(FIN);
3576         continue;
3577       }
3578
3579       // Create a stack object covering all stack doublewords occupied
3580       // by the argument.  If the argument is (fully or partially) on
3581       // the stack, or if the argument is fully in registers but the
3582       // caller has allocated the parameter save anyway, we can refer
3583       // directly to the caller's stack frame.  Otherwise, create a
3584       // local copy in our own frame.
3585       int FI;
3586       if (HasParameterArea ||
3587           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3588         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3589       else
3590         FI = MFI.CreateStackObject(ArgSize, Align, false);
3591       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3592
3593       // Handle aggregates smaller than 8 bytes.
3594       if (ObjSize < PtrByteSize) {
3595         // The value of the object is its address, which differs from the
3596         // address of the enclosing doubleword on big-endian systems.
3597         SDValue Arg = FIN;
3598         if (!isLittleEndian) {
3599           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3600           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3601         }
3602         InVals.push_back(Arg);
3603
3604         if (GPR_idx != Num_GPR_Regs) {
3605           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3606           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3607           SDValue Store;
3608
3609           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3610             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3611                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
3612             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3613                                       MachinePointerInfo(&*FuncArg), ObjType);
3614           } else {
3615             // For sizes that don't fit a truncating store (3, 5, 6, 7),
3616             // store the whole register as-is to the parameter save area
3617             // slot.
3618             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3619                                  MachinePointerInfo(&*FuncArg));
3620           }
3621
3622           MemOps.push_back(Store);
3623         }
3624         // Whether we copied from a register or not, advance the offset
3625         // into the parameter save area by a full doubleword.
3626         ArgOffset += PtrByteSize;
3627         continue;
3628       }
3629
3630       // The value of the object is its address, which is the address of
3631       // its first stack doubleword.
3632       InVals.push_back(FIN);
3633
3634       // Store whatever pieces of the object are in registers to memory.
3635       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3636         if (GPR_idx == Num_GPR_Regs)
3637           break;
3638
3639         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3640         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3641         SDValue Addr = FIN;
3642         if (j) {
3643           SDValue Off = DAG.getConstant(j, dl, PtrVT);
3644           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3645         }
3646         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3647                                      MachinePointerInfo(&*FuncArg, j));
3648         MemOps.push_back(Store);
3649         ++GPR_idx;
3650       }
3651       ArgOffset += ArgSize;
3652       continue;
3653     }
3654
3655     switch (ObjectVT.getSimpleVT().SimpleTy) {
3656     default: llvm_unreachable("Unhandled argument type!");
3657     case MVT::i1:
3658     case MVT::i32:
3659     case MVT::i64:
3660       if (Flags.isNest()) {
3661         // The 'nest' parameter, if any, is passed in R11.
3662         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3663         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3664
3665         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3666           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3667
3668         break;
3669       }
3670
3671       // These can be scalar arguments or elements of an integer array type
3672       // passed directly.  Clang may use those instead of "byval" aggregate
3673       // types to avoid forcing arguments to memory unnecessarily.
3674       if (GPR_idx != Num_GPR_Regs) {
3675         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3676         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3677
3678         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3679           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3680           // value to MVT::i64 and then truncate to the correct register size.
3681           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3682       } else {
3683         if (CallConv == CallingConv::Fast)
3684           ComputeArgOffset();
3685
3686         needsLoad = true;
3687         ArgSize = PtrByteSize;
3688       }
3689       if (CallConv != CallingConv::Fast || needsLoad)
3690         ArgOffset += 8;
3691       break;
3692
3693     case MVT::f32:
3694     case MVT::f64:
3695       // These can be scalar arguments or elements of a float array type
3696       // passed directly.  The latter are used to implement ELFv2 homogenous
3697       // float aggregates.
3698       if (FPR_idx != Num_FPR_Regs) {
3699         unsigned VReg;
3700
3701         if (ObjectVT == MVT::f32)
3702           VReg = MF.addLiveIn(FPR[FPR_idx],
3703                               Subtarget.hasP8Vector()
3704                                   ? &PPC::VSSRCRegClass
3705                                   : &PPC::F4RCRegClass);
3706         else
3707           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3708                                                 ? &PPC::VSFRCRegClass
3709                                                 : &PPC::F8RCRegClass);
3710
3711         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3712         ++FPR_idx;
3713       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3714         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3715         // once we support fp <-> gpr moves.
3716
3717         // This can only ever happen in the presence of f32 array types,
3718         // since otherwise we never run out of FPRs before running out
3719         // of GPRs.
3720         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3721         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3722
3723         if (ObjectVT == MVT::f32) {
3724           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3725             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3726                                  DAG.getConstant(32, dl, MVT::i32));
3727           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3728         }
3729
3730         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3731       } else {
3732         if (CallConv == CallingConv::Fast)
3733           ComputeArgOffset();
3734
3735         needsLoad = true;
3736       }
3737
3738       // When passing an array of floats, the array occupies consecutive
3739       // space in the argument area; only round up to the next doubleword
3740       // at the end of the array.  Otherwise, each float takes 8 bytes.
3741       if (CallConv != CallingConv::Fast || needsLoad) {
3742         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3743         ArgOffset += ArgSize;
3744         if (Flags.isInConsecutiveRegsLast())
3745           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3746       }
3747       break;
3748     case MVT::v4f32:
3749     case MVT::v4i32:
3750     case MVT::v8i16:
3751     case MVT::v16i8:
3752     case MVT::v2f64:
3753     case MVT::v2i64:
3754     case MVT::v1i128:
3755       if (!Subtarget.hasQPX()) {
3756       // These can be scalar arguments or elements of a vector array type
3757       // passed directly.  The latter are used to implement ELFv2 homogenous
3758       // vector aggregates.
3759       if (VR_idx != Num_VR_Regs) {
3760         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3761         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3762         ++VR_idx;
3763       } else {
3764         if (CallConv == CallingConv::Fast)
3765           ComputeArgOffset();
3766
3767         needsLoad = true;
3768       }
3769       if (CallConv != CallingConv::Fast || needsLoad)
3770         ArgOffset += 16;
3771       break;
3772       } // not QPX
3773
3774       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3775              "Invalid QPX parameter type");
3776       /* fall through */
3777
3778     case MVT::v4f64:
3779     case MVT::v4i1:
3780       // QPX vectors are treated like their scalar floating-point subregisters
3781       // (except that they're larger).
3782       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3783       if (QFPR_idx != Num_QFPR_Regs) {
3784         const TargetRegisterClass *RC;
3785         switch (ObjectVT.getSimpleVT().SimpleTy) {
3786         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3787         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3788         default:         RC = &PPC::QBRCRegClass; break;
3789         }
3790
3791         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3792         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3793         ++QFPR_idx;
3794       } else {
3795         if (CallConv == CallingConv::Fast)
3796           ComputeArgOffset();
3797         needsLoad = true;
3798       }
3799       if (CallConv != CallingConv::Fast || needsLoad)
3800         ArgOffset += Sz;
3801       break;
3802     }
3803
3804     // We need to load the argument to a virtual register if we determined
3805     // above that we ran out of physical registers of the appropriate type.
3806     if (needsLoad) {
3807       if (ObjSize < ArgSize && !isLittleEndian)
3808         CurArgOffset += ArgSize - ObjSize;
3809       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3810       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3811       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
3812     }
3813
3814     InVals.push_back(ArgVal);
3815   }
3816
3817   // Area that is at least reserved in the caller of this function.
3818   unsigned MinReservedArea;
3819   if (HasParameterArea)
3820     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3821   else
3822     MinReservedArea = LinkageSize;
3823
3824   // Set the size that is at least reserved in caller of this function.  Tail
3825   // call optimized functions' reserved stack space needs to be aligned so that
3826   // taking the difference between two stack areas will result in an aligned
3827   // stack.
3828   MinReservedArea =
3829       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3830   FuncInfo->setMinReservedArea(MinReservedArea);
3831
3832   // If the function takes variable number of arguments, make a frame index for
3833   // the start of the first vararg value... for expansion of llvm.va_start.
3834   if (isVarArg) {
3835     int Depth = ArgOffset;
3836
3837     FuncInfo->setVarArgsFrameIndex(
3838       MFI.CreateFixedObject(PtrByteSize, Depth, true));
3839     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3840
3841     // If this function is vararg, store any remaining integer argument regs
3842     // to their spots on the stack so that they may be loaded by dereferencing
3843     // the result of va_next.
3844     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3845          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
3846       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3847       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3848       SDValue Store =
3849           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3850       MemOps.push_back(Store);
3851       // Increment the address by four for the next argument to store
3852       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
3853       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3854     }
3855   }
3856
3857   if (!MemOps.empty())
3858     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3859
3860   return Chain;
3861 }
3862
3863 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
3864     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3865     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3866     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3867   // TODO: add description of PPC stack frame format, or at least some docs.
3868   //
3869   MachineFunction &MF = DAG.getMachineFunction();
3870   MachineFrameInfo &MFI = MF.getFrameInfo();
3871   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3872
3873   EVT PtrVT = getPointerTy(MF.getDataLayout());
3874   bool isPPC64 = PtrVT == MVT::i64;
3875   // Potential tail calls could cause overwriting of argument stack slots.
3876   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3877                        (CallConv == CallingConv::Fast));
3878   unsigned PtrByteSize = isPPC64 ? 8 : 4;
3879   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3880   unsigned ArgOffset = LinkageSize;
3881   // Area that is at least reserved in caller of this function.
3882   unsigned MinReservedArea = ArgOffset;
3883
3884   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
3885     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3886     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3887   };
3888   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
3889     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3890     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3891   };
3892   static const MCPhysReg VR[] = {
3893     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3894     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3895   };
3896
3897   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3898   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3899   const unsigned Num_VR_Regs  = array_lengthof( VR);
3900
3901   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3902
3903   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
3904
3905   // In 32-bit non-varargs functions, the stack space for vectors is after the
3906   // stack space for non-vectors.  We do not use this space unless we have
3907   // too many vectors to fit in registers, something that only occurs in
3908   // constructed examples:), but we have to walk the arglist to figure
3909   // that out...for the pathological case, compute VecArgOffset as the
3910   // start of the vector parameter area.  Computing VecArgOffset is the
3911   // entire point of the following loop.
3912   unsigned VecArgOffset = ArgOffset;
3913   if (!isVarArg && !isPPC64) {
3914     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3915          ++ArgNo) {
3916       EVT ObjectVT = Ins[ArgNo].VT;
3917       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3918
3919       if (Flags.isByVal()) {
3920         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3921         unsigned ObjSize = Flags.getByValSize();
3922         unsigned ArgSize =
3923                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3924         VecArgOffset += ArgSize;
3925         continue;
3926       }
3927
3928       switch(ObjectVT.getSimpleVT().SimpleTy) {
3929       default: llvm_unreachable("Unhandled argument type!");
3930       case MVT::i1:
3931       case MVT::i32:
3932       case MVT::f32:
3933         VecArgOffset += 4;
3934         break;
3935       case MVT::i64:  // PPC64
3936       case MVT::f64:
3937         // FIXME: We are guaranteed to be !isPPC64 at this point.
3938         // Does MVT::i64 apply?
3939         VecArgOffset += 8;
3940         break;
3941       case MVT::v4f32:
3942       case MVT::v4i32:
3943       case MVT::v8i16:
3944       case MVT::v16i8:
3945         // Nothing to do, we're only looking at Nonvector args here.
3946         break;
3947       }
3948     }
3949   }
3950   // We've found where the vector parameter area in memory is.  Skip the
3951   // first 12 parameters; these don't use that memory.
3952   VecArgOffset = ((VecArgOffset+15)/16)*16;
3953   VecArgOffset += 12*16;
3954
3955   // Add DAG nodes to load the arguments or copy them out of registers.  On
3956   // entry to a function on PPC, the arguments start after the linkage area,
3957   // although the first ones are often in registers.
3958
3959   SmallVector<SDValue, 8> MemOps;
3960   unsigned nAltivecParamsAtEnd = 0;
3961   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3962   unsigned CurArgIdx = 0;
3963   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3964     SDValue ArgVal;
3965     bool needsLoad = false;
3966     EVT ObjectVT = Ins[ArgNo].VT;
3967     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3968     unsigned ArgSize = ObjSize;
3969     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3970     if (Ins[ArgNo].isOrigArg()) {
3971       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3972       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3973     }
3974     unsigned CurArgOffset = ArgOffset;
3975
3976     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3977     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3978         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3979       if (isVarArg || isPPC64) {
3980         MinReservedArea = ((MinReservedArea+15)/16)*16;
3981         MinReservedArea += CalculateStackSlotSize(ObjectVT,
3982                                                   Flags,
3983                                                   PtrByteSize);
3984       } else  nAltivecParamsAtEnd++;
3985     } else
3986       // Calculate min reserved area.
3987       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3988                                                 Flags,
3989                                                 PtrByteSize);
3990
3991     // FIXME the codegen can be much improved in some cases.
3992     // We do not have to keep everything in memory.
3993     if (Flags.isByVal()) {
3994       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3995
3996       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3997       ObjSize = Flags.getByValSize();
3998       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3999       // Objects of size 1 and 2 are right justified, everything else is
4000       // left justified.  This means the memory address is adjusted forwards.
4001       if (ObjSize==1 || ObjSize==2) {
4002         CurArgOffset = CurArgOffset + (4 - ObjSize);
4003       }
4004       // The value of the object is its address.
4005       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4006       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4007       InVals.push_back(FIN);
4008       if (ObjSize==1 || ObjSize==2) {
4009         if (GPR_idx != Num_GPR_Regs) {
4010           unsigned VReg;
4011           if (isPPC64)
4012             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4013           else
4014             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4015           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4016           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4017           SDValue Store =
4018               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4019                                 MachinePointerInfo(&*FuncArg), ObjType);
4020           MemOps.push_back(Store);
4021           ++GPR_idx;
4022         }
4023
4024         ArgOffset += PtrByteSize;
4025
4026         continue;
4027       }
4028       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4029         // Store whatever pieces of the object are in registers
4030         // to memory.  ArgOffset will be the address of the beginning
4031         // of the object.
4032         if (GPR_idx != Num_GPR_Regs) {
4033           unsigned VReg;
4034           if (isPPC64)
4035             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4036           else
4037             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4038           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4039           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4040           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4041           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4042                                        MachinePointerInfo(&*FuncArg, j));
4043           MemOps.push_back(Store);
4044           ++GPR_idx;
4045           ArgOffset += PtrByteSize;
4046         } else {
4047           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4048           break;
4049         }
4050       }
4051       continue;
4052     }
4053
4054     switch (ObjectVT.getSimpleVT().SimpleTy) {
4055     default: llvm_unreachable("Unhandled argument type!");
4056     case MVT::i1:
4057     case MVT::i32:
4058       if (!isPPC64) {
4059         if (GPR_idx != Num_GPR_Regs) {
4060           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4061           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4062
4063           if (ObjectVT == MVT::i1)
4064             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4065
4066           ++GPR_idx;
4067         } else {
4068           needsLoad = true;
4069           ArgSize = PtrByteSize;
4070         }
4071         // All int arguments reserve stack space in the Darwin ABI.
4072         ArgOffset += PtrByteSize;
4073         break;
4074       }
4075       LLVM_FALLTHROUGH;
4076     case MVT::i64:  // PPC64
4077       if (GPR_idx != Num_GPR_Regs) {
4078         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4079         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4080
4081         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4082           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4083           // value to MVT::i64 and then truncate to the correct register size.
4084           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4085
4086         ++GPR_idx;
4087       } else {
4088         needsLoad = true;
4089         ArgSize = PtrByteSize;
4090       }
4091       // All int arguments reserve stack space in the Darwin ABI.
4092       ArgOffset += 8;
4093       break;
4094
4095     case MVT::f32:
4096     case MVT::f64:
4097       // Every 4 bytes of argument space consumes one of the GPRs available for
4098       // argument passing.
4099       if (GPR_idx != Num_GPR_Regs) {
4100         ++GPR_idx;
4101         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4102           ++GPR_idx;
4103       }
4104       if (FPR_idx != Num_FPR_Regs) {
4105         unsigned VReg;
4106
4107         if (ObjectVT == MVT::f32)
4108           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4109         else
4110           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4111
4112         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4113         ++FPR_idx;
4114       } else {
4115         needsLoad = true;
4116       }
4117
4118       // All FP arguments reserve stack space in the Darwin ABI.
4119       ArgOffset += isPPC64 ? 8 : ObjSize;
4120       break;
4121     case MVT::v4f32:
4122     case MVT::v4i32:
4123     case MVT::v8i16:
4124     case MVT::v16i8:
4125       // Note that vector arguments in registers don't reserve stack space,
4126       // except in varargs functions.
4127       if (VR_idx != Num_VR_Regs) {
4128         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4129         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4130         if (isVarArg) {
4131           while ((ArgOffset % 16) != 0) {
4132             ArgOffset += PtrByteSize;
4133             if (GPR_idx != Num_GPR_Regs)
4134               GPR_idx++;
4135           }
4136           ArgOffset += 16;
4137           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4138         }
4139         ++VR_idx;
4140       } else {
4141         if (!isVarArg && !isPPC64) {
4142           // Vectors go after all the nonvectors.
4143           CurArgOffset = VecArgOffset;
4144           VecArgOffset += 16;
4145         } else {
4146           // Vectors are aligned.
4147           ArgOffset = ((ArgOffset+15)/16)*16;
4148           CurArgOffset = ArgOffset;
4149           ArgOffset += 16;
4150         }
4151         needsLoad = true;
4152       }
4153       break;
4154     }
4155
4156     // We need to load the argument to a virtual register if we determined above
4157     // that we ran out of physical registers of the appropriate type.
4158     if (needsLoad) {
4159       int FI = MFI.CreateFixedObject(ObjSize,
4160                                      CurArgOffset + (ArgSize - ObjSize),
4161                                      isImmutable);
4162       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4163       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4164     }
4165
4166     InVals.push_back(ArgVal);
4167   }
4168
4169   // Allow for Altivec parameters at the end, if needed.
4170   if (nAltivecParamsAtEnd) {
4171     MinReservedArea = ((MinReservedArea+15)/16)*16;
4172     MinReservedArea += 16*nAltivecParamsAtEnd;
4173   }
4174
4175   // Area that is at least reserved in the caller of this function.
4176   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4177
4178   // Set the size that is at least reserved in caller of this function.  Tail
4179   // call optimized functions' reserved stack space needs to be aligned so that
4180   // taking the difference between two stack areas will result in an aligned
4181   // stack.
4182   MinReservedArea =
4183       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4184   FuncInfo->setMinReservedArea(MinReservedArea);
4185
4186   // If the function takes variable number of arguments, make a frame index for
4187   // the start of the first vararg value... for expansion of llvm.va_start.
4188   if (isVarArg) {
4189     int Depth = ArgOffset;
4190
4191     FuncInfo->setVarArgsFrameIndex(
4192       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4193                             Depth, true));
4194     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4195
4196     // If this function is vararg, store any remaining integer argument regs
4197     // to their spots on the stack so that they may be loaded by dereferencing
4198     // the result of va_next.
4199     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4200       unsigned VReg;
4201
4202       if (isPPC64)
4203         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4204       else
4205         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4206
4207       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4208       SDValue Store =
4209           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4210       MemOps.push_back(Store);
4211       // Increment the address by four for the next argument to store
4212       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4213       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4214     }
4215   }
4216
4217   if (!MemOps.empty())
4218     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4219
4220   return Chain;
4221 }
4222
4223 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4224 /// adjusted to accommodate the arguments for the tailcall.
4225 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4226                                    unsigned ParamSize) {
4227
4228   if (!isTailCall) return 0;
4229
4230   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4231   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4232   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4233   // Remember only if the new adjustement is bigger.
4234   if (SPDiff < FI->getTailCallSPDelta())
4235     FI->setTailCallSPDelta(SPDiff);
4236
4237   return SPDiff;
4238 }
4239
4240 static bool isFunctionGlobalAddress(SDValue Callee);
4241
4242 static bool
4243 resideInSameSection(const Function *Caller, SDValue Callee,
4244                     const TargetMachine &TM) {
4245   // If !G, Callee can be an external symbol.
4246   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4247   if (!G)
4248     return false;
4249
4250   const GlobalValue *GV = G->getGlobal();
4251   if (!GV->isStrongDefinitionForLinker())
4252     return false;
4253
4254   // Any explicitly-specified sections and section prefixes must also match.
4255   // Also, if we're using -ffunction-sections, then each function is always in
4256   // a different section (the same is true for COMDAT functions).
4257   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4258       GV->getSection() != Caller->getSection())
4259     return false;
4260   if (const auto *F = dyn_cast<Function>(GV)) {
4261     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4262       return false;
4263   }
4264
4265   // If the callee might be interposed, then we can't assume the ultimate call
4266   // target will be in the same section. Even in cases where we can assume that
4267   // interposition won't happen, in any case where the linker might insert a
4268   // stub to allow for interposition, we must generate code as though
4269   // interposition might occur. To understand why this matters, consider a
4270   // situation where: a -> b -> c where the arrows indicate calls. b and c are
4271   // in the same section, but a is in a different module (i.e. has a different
4272   // TOC base pointer). If the linker allows for interposition between b and c,
4273   // then it will generate a stub for the call edge between b and c which will
4274   // save the TOC pointer into the designated stack slot allocated by b. If we
4275   // return true here, and therefore allow a tail call between b and c, that
4276   // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4277   // pointer into the stack slot allocated by a (where the a -> b stub saved
4278   // a's TOC base pointer). If we're not considering a tail call, but rather,
4279   // whether a nop is needed after the call instruction in b, because the linker
4280   // will insert a stub, it might complain about a missing nop if we omit it
4281   // (although many don't complain in this case).
4282   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4283     return false;
4284
4285   return true;
4286 }
4287
4288 static bool
4289 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4290                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4291   assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
4292
4293   const unsigned PtrByteSize = 8;
4294   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4295
4296   static const MCPhysReg GPR[] = {
4297     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4298     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4299   };
4300   static const MCPhysReg VR[] = {
4301     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4302     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4303   };
4304
4305   const unsigned NumGPRs = array_lengthof(GPR);
4306   const unsigned NumFPRs = 13;
4307   const unsigned NumVRs = array_lengthof(VR);
4308   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4309
4310   unsigned NumBytes = LinkageSize;
4311   unsigned AvailableFPRs = NumFPRs;
4312   unsigned AvailableVRs = NumVRs;
4313
4314   for (const ISD::OutputArg& Param : Outs) {
4315     if (Param.Flags.isNest()) continue;
4316
4317     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4318                                PtrByteSize, LinkageSize, ParamAreaSize,
4319                                NumBytes, AvailableFPRs, AvailableVRs,
4320                                Subtarget.hasQPX()))
4321       return true;
4322   }
4323   return false;
4324 }
4325
4326 static bool
4327 hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
4328   if (CS->arg_size() != CallerFn->arg_size())
4329     return false;
4330
4331   ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
4332   ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end();
4333   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4334
4335   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4336     const Value* CalleeArg = *CalleeArgIter;
4337     const Value* CallerArg = &(*CallerArgIter);
4338     if (CalleeArg == CallerArg)
4339       continue;
4340
4341     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4342     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4343     //      }
4344     // 1st argument of callee is undef and has the same type as caller.
4345     if (CalleeArg->getType() == CallerArg->getType() &&
4346         isa<UndefValue>(CalleeArg))
4347       continue;
4348
4349     return false;
4350   }
4351
4352   return true;
4353 }
4354
4355 bool
4356 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4357                                     SDValue Callee,
4358                                     CallingConv::ID CalleeCC,
4359                                     ImmutableCallSite *CS,
4360                                     bool isVarArg,
4361                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4362                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4363                                     SelectionDAG& DAG) const {
4364   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4365
4366   if (DisableSCO && !TailCallOpt) return false;
4367
4368   // Variadic argument functions are not supported.
4369   if (isVarArg) return false;
4370
4371   MachineFunction &MF = DAG.getMachineFunction();
4372   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4373
4374   // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has
4375   // the same calling convention
4376   if (CallerCC != CalleeCC) return false;
4377
4378   // SCO support C calling convention
4379   if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C)
4380     return false;
4381
4382   // Caller contains any byval parameter is not supported.
4383   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4384     return false;
4385
4386   // Callee contains any byval parameter is not supported, too.
4387   // Note: This is a quick work around, because in some cases, e.g.
4388   // caller's stack size > callee's stack size, we are still able to apply
4389   // sibling call optimization. See: https://reviews.llvm.org/D23441#513574
4390   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4391     return false;
4392
4393   // No TCO/SCO on indirect call because Caller have to restore its TOC
4394   if (!isFunctionGlobalAddress(Callee) &&
4395       !isa<ExternalSymbolSDNode>(Callee))
4396     return false;
4397
4398   // Check if Callee resides in the same section, because for now, PPC64 SVR4
4399   // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
4400   // section.
4401   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4402   if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine()))
4403     return false;
4404
4405   // TCO allows altering callee ABI, so we don't have to check further.
4406   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4407     return true;
4408
4409   if (DisableSCO) return false;
4410
4411   // If callee use the same argument list that caller is using, then we can
4412   // apply SCO on this case. If it is not, then we need to check if callee needs
4413   // stack for passing arguments.
4414   if (!hasSameArgumentList(MF.getFunction(), CS) &&
4415       needStackSlotPassParameters(Subtarget, Outs)) {
4416     return false;
4417   }
4418
4419   return true;
4420 }
4421
4422 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4423 /// for tail call optimization. Targets which want to do tail call
4424 /// optimization should implement this function.
4425 bool
4426 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4427                                                      CallingConv::ID CalleeCC,
4428                                                      bool isVarArg,
4429                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4430                                                      SelectionDAG& DAG) const {
4431   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4432     return false;
4433
4434   // Variable argument functions are not supported.
4435   if (isVarArg)
4436     return false;
4437
4438   MachineFunction &MF = DAG.getMachineFunction();
4439   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4440   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4441     // Functions containing by val parameters are not supported.
4442     for (unsigned i = 0; i != Ins.size(); i++) {
4443        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4444        if (Flags.isByVal()) return false;
4445     }
4446
4447     // Non-PIC/GOT tail calls are supported.
4448     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4449       return true;
4450
4451     // At the moment we can only do local tail calls (in same module, hidden
4452     // or protected) if we are generating PIC.
4453     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4454       return G->getGlobal()->hasHiddenVisibility()
4455           || G->getGlobal()->hasProtectedVisibility();
4456   }
4457
4458   return false;
4459 }
4460
4461 /// isCallCompatibleAddress - Return the immediate to use if the specified
4462 /// 32-bit value is representable in the immediate field of a BxA instruction.
4463 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4464   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4465   if (!C) return nullptr;
4466
4467   int Addr = C->getZExtValue();
4468   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4469       SignExtend32<26>(Addr) != Addr)
4470     return nullptr;  // Top 6 bits have to be sext of immediate.
4471
4472   return DAG
4473       .getConstant(
4474           (int)C->getZExtValue() >> 2, SDLoc(Op),
4475           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4476       .getNode();
4477 }
4478
4479 namespace {
4480
4481 struct TailCallArgumentInfo {
4482   SDValue Arg;
4483   SDValue FrameIdxOp;
4484   int FrameIdx = 0;
4485
4486   TailCallArgumentInfo() = default;
4487 };
4488
4489 } // end anonymous namespace
4490
4491 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4492 static void StoreTailCallArgumentsToStackSlot(
4493     SelectionDAG &DAG, SDValue Chain,
4494     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4495     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4496   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4497     SDValue Arg = TailCallArgs[i].Arg;
4498     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4499     int FI = TailCallArgs[i].FrameIdx;
4500     // Store relative to framepointer.
4501     MemOpChains.push_back(DAG.getStore(
4502         Chain, dl, Arg, FIN,
4503         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4504   }
4505 }
4506
4507 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4508 /// the appropriate stack slot for the tail call optimized function call.
4509 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4510                                              SDValue OldRetAddr, SDValue OldFP,
4511                                              int SPDiff, const SDLoc &dl) {
4512   if (SPDiff) {
4513     // Calculate the new stack slot for the return address.
4514     MachineFunction &MF = DAG.getMachineFunction();
4515     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4516     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4517     bool isPPC64 = Subtarget.isPPC64();
4518     int SlotSize = isPPC64 ? 8 : 4;
4519     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4520     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4521                                                          NewRetAddrLoc, true);
4522     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4523     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4524     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4525                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4526
4527     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4528     // slot as the FP is never overwritten.
4529     if (Subtarget.isDarwinABI()) {
4530       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4531       int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4532                                                          true);
4533       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4534       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4535                            MachinePointerInfo::getFixedStack(
4536                                DAG.getMachineFunction(), NewFPIdx));
4537     }
4538   }
4539   return Chain;
4540 }
4541
4542 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4543 /// the position of the argument.
4544 static void
4545 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4546                          SDValue Arg, int SPDiff, unsigned ArgOffset,
4547                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4548   int Offset = ArgOffset + SPDiff;
4549   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4550   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4551   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4552   SDValue FIN = DAG.getFrameIndex(FI, VT);
4553   TailCallArgumentInfo Info;
4554   Info.Arg = Arg;
4555   Info.FrameIdxOp = FIN;
4556   Info.FrameIdx = FI;
4557   TailCallArguments.push_back(Info);
4558 }
4559
4560 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4561 /// stack slot. Returns the chain as result and the loaded frame pointers in
4562 /// LROpOut/FPOpout. Used when tail calling.
4563 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4564     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4565     SDValue &FPOpOut, const SDLoc &dl) const {
4566   if (SPDiff) {
4567     // Load the LR and FP stack slot for later adjusting.
4568     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4569     LROpOut = getReturnAddrFrameIndex(DAG);
4570     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4571     Chain = SDValue(LROpOut.getNode(), 1);
4572
4573     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4574     // slot as the FP is never overwritten.
4575     if (Subtarget.isDarwinABI()) {
4576       FPOpOut = getFramePointerFrameIndex(DAG);
4577       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4578       Chain = SDValue(FPOpOut.getNode(), 1);
4579     }
4580   }
4581   return Chain;
4582 }
4583
4584 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4585 /// by "Src" to address "Dst" of size "Size".  Alignment information is
4586 /// specified by the specific parameter attribute. The copy will be passed as
4587 /// a byval function parameter.
4588 /// Sometimes what we are copying is the end of a larger object, the part that
4589 /// does not fit in registers.
4590 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4591                                          SDValue Chain, ISD::ArgFlagsTy Flags,
4592                                          SelectionDAG &DAG, const SDLoc &dl) {
4593   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4594   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4595                        false, false, false, MachinePointerInfo(),
4596                        MachinePointerInfo());
4597 }
4598
4599 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4600 /// tail calls.
4601 static void LowerMemOpCallTo(
4602     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4603     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4604     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4605     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4606   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4607   if (!isTailCall) {
4608     if (isVector) {
4609       SDValue StackPtr;
4610       if (isPPC64)
4611         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4612       else
4613         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4614       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4615                            DAG.getConstant(ArgOffset, dl, PtrVT));
4616     }
4617     MemOpChains.push_back(
4618         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4619     // Calculate and remember argument location.
4620   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4621                                   TailCallArguments);
4622 }
4623
4624 static void
4625 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4626                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4627                 SDValue FPOp,
4628                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4629   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4630   // might overwrite each other in case of tail call optimization.
4631   SmallVector<SDValue, 8> MemOpChains2;
4632   // Do not flag preceding copytoreg stuff together with the following stuff.
4633   InFlag = SDValue();
4634   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4635                                     MemOpChains2, dl);
4636   if (!MemOpChains2.empty())
4637     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4638
4639   // Store the return address to the appropriate stack slot.
4640   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4641
4642   // Emit callseq_end just before tailcall node.
4643   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4644                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4645   InFlag = Chain.getValue(1);
4646 }
4647
4648 // Is this global address that of a function that can be called by name? (as
4649 // opposed to something that must hold a descriptor for an indirect call).
4650 static bool isFunctionGlobalAddress(SDValue Callee) {
4651   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4652     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4653         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4654       return false;
4655
4656     return G->getGlobal()->getValueType()->isFunctionTy();
4657   }
4658
4659   return false;
4660 }
4661
4662 static unsigned
4663 PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4664             SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4665             bool isPatchPoint, bool hasNest,
4666             SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4667             SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4668             ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
4669   bool isPPC64 = Subtarget.isPPC64();
4670   bool isSVR4ABI = Subtarget.isSVR4ABI();
4671   bool isELFv2ABI = Subtarget.isELFv2ABI();
4672
4673   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4674   NodeTys.push_back(MVT::Other);   // Returns a chain
4675   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
4676
4677   unsigned CallOpc = PPCISD::CALL;
4678
4679   bool needIndirectCall = true;
4680   if (!isSVR4ABI || !isPPC64)
4681     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4682       // If this is an absolute destination address, use the munged value.
4683       Callee = SDValue(Dest, 0);
4684       needIndirectCall = false;
4685     }
4686
4687   // PC-relative references to external symbols should go through $stub, unless
4688   // we're building with the leopard linker or later, which automatically
4689   // synthesizes these stubs.
4690   const TargetMachine &TM = DAG.getTarget();
4691   const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
4692   const GlobalValue *GV = nullptr;
4693   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4694     GV = G->getGlobal();
4695   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4696   bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4697
4698   if (isFunctionGlobalAddress(Callee)) {
4699     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4700     // A call to a TLS address is actually an indirect call to a
4701     // thread-specific pointer.
4702     unsigned OpFlags = 0;
4703     if (UsePlt)
4704       OpFlags = PPCII::MO_PLT;
4705
4706     // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4707     // every direct call is) turn it into a TargetGlobalAddress /
4708     // TargetExternalSymbol node so that legalize doesn't hack it.
4709     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4710                                         Callee.getValueType(), 0, OpFlags);
4711     needIndirectCall = false;
4712   }
4713
4714   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4715     unsigned char OpFlags = 0;
4716
4717     if (UsePlt)
4718       OpFlags = PPCII::MO_PLT;
4719
4720     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4721                                          OpFlags);
4722     needIndirectCall = false;
4723   }
4724
4725   if (isPatchPoint) {
4726     // We'll form an invalid direct call when lowering a patchpoint; the full
4727     // sequence for an indirect call is complicated, and many of the
4728     // instructions introduced might have side effects (and, thus, can't be
4729     // removed later). The call itself will be removed as soon as the
4730     // argument/return lowering is complete, so the fact that it has the wrong
4731     // kind of operands should not really matter.
4732     needIndirectCall = false;
4733   }
4734
4735   if (needIndirectCall) {
4736     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
4737     // to do the call, we can't use PPCISD::CALL.
4738     SDValue MTCTROps[] = {Chain, Callee, InFlag};
4739
4740     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4741       // Function pointers in the 64-bit SVR4 ABI do not point to the function
4742       // entry point, but to the function descriptor (the function entry point
4743       // address is part of the function descriptor though).
4744       // The function descriptor is a three doubleword structure with the
4745       // following fields: function entry point, TOC base address and
4746       // environment pointer.
4747       // Thus for a call through a function pointer, the following actions need
4748       // to be performed:
4749       //   1. Save the TOC of the caller in the TOC save area of its stack
4750       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4751       //   2. Load the address of the function entry point from the function
4752       //      descriptor.
4753       //   3. Load the TOC of the callee from the function descriptor into r2.
4754       //   4. Load the environment pointer from the function descriptor into
4755       //      r11.
4756       //   5. Branch to the function entry point address.
4757       //   6. On return of the callee, the TOC of the caller needs to be
4758       //      restored (this is done in FinishCall()).
4759       //
4760       // The loads are scheduled at the beginning of the call sequence, and the
4761       // register copies are flagged together to ensure that no other
4762       // operations can be scheduled in between. E.g. without flagging the
4763       // copies together, a TOC access in the caller could be scheduled between
4764       // the assignment of the callee TOC and the branch to the callee, which
4765       // results in the TOC access going through the TOC of the callee instead
4766       // of going through the TOC of the caller, which leads to incorrect code.
4767
4768       // Load the address of the function entry point from the function
4769       // descriptor.
4770       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4771       if (LDChain.getValueType() == MVT::Glue)
4772         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4773
4774       auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
4775                           ? (MachineMemOperand::MODereferenceable |
4776                              MachineMemOperand::MOInvariant)
4777                           : MachineMemOperand::MONone;
4778
4779       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
4780       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4781                                         /* Alignment = */ 8, MMOFlags);
4782
4783       // Load environment pointer into r11.
4784       SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4785       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4786       SDValue LoadEnvPtr =
4787           DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
4788                       /* Alignment = */ 8, MMOFlags);
4789
4790       SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4791       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4792       SDValue TOCPtr =
4793           DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
4794                       /* Alignment = */ 8, MMOFlags);
4795
4796       setUsesTOCBasePtr(DAG);
4797       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4798                                         InFlag);
4799       Chain = TOCVal.getValue(0);
4800       InFlag = TOCVal.getValue(1);
4801
4802       // If the function call has an explicit 'nest' parameter, it takes the
4803       // place of the environment pointer.
4804       if (!hasNest) {
4805         SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4806                                           InFlag);
4807
4808         Chain = EnvVal.getValue(0);
4809         InFlag = EnvVal.getValue(1);
4810       }
4811
4812       MTCTROps[0] = Chain;
4813       MTCTROps[1] = LoadFuncPtr;
4814       MTCTROps[2] = InFlag;
4815     }
4816
4817     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4818                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
4819     InFlag = Chain.getValue(1);
4820
4821     NodeTys.clear();
4822     NodeTys.push_back(MVT::Other);
4823     NodeTys.push_back(MVT::Glue);
4824     Ops.push_back(Chain);
4825     CallOpc = PPCISD::BCTRL;
4826     Callee.setNode(nullptr);
4827     // Add use of X11 (holding environment pointer)
4828     if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
4829       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4830     // Add CTR register as callee so a bctr can be emitted later.
4831     if (isTailCall)
4832       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
4833   }
4834
4835   // If this is a direct call, pass the chain and the callee.
4836   if (Callee.getNode()) {
4837     Ops.push_back(Chain);
4838     Ops.push_back(Callee);
4839   }
4840   // If this is a tail call add stack pointer delta.
4841   if (isTailCall)
4842     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
4843
4844   // Add argument registers to the end of the list so that they are known live
4845   // into the call.
4846   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4847     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4848                                   RegsToPass[i].second.getValueType()));
4849
4850   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4851   // into the call.
4852   if (isSVR4ABI && isPPC64 && !isPatchPoint) {
4853     setUsesTOCBasePtr(DAG);
4854     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4855   }
4856
4857   return CallOpc;
4858 }
4859
4860 SDValue PPCTargetLowering::LowerCallResult(
4861     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4862     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4863     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4864   SmallVector<CCValAssign, 16> RVLocs;
4865   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4866                     *DAG.getContext());
4867   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4868
4869   // Copy all of the result registers out of their specified physreg.
4870   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4871     CCValAssign &VA = RVLocs[i];
4872     assert(VA.isRegLoc() && "Can only return in registers!");
4873
4874     SDValue Val = DAG.getCopyFromReg(Chain, dl,
4875                                      VA.getLocReg(), VA.getLocVT(), InFlag);
4876     Chain = Val.getValue(1);
4877     InFlag = Val.getValue(2);
4878
4879     switch (VA.getLocInfo()) {
4880     default: llvm_unreachable("Unknown loc info!");
4881     case CCValAssign::Full: break;
4882     case CCValAssign::AExt:
4883       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4884       break;
4885     case CCValAssign::ZExt:
4886       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4887                         DAG.getValueType(VA.getValVT()));
4888       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4889       break;
4890     case CCValAssign::SExt:
4891       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4892                         DAG.getValueType(VA.getValVT()));
4893       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4894       break;
4895     }
4896
4897     InVals.push_back(Val);
4898   }
4899
4900   return Chain;
4901 }
4902
4903 SDValue PPCTargetLowering::FinishCall(
4904     CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
4905     bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
4906     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
4907     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
4908     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
4909     SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
4910   std::vector<EVT> NodeTys;
4911   SmallVector<SDValue, 8> Ops;
4912   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4913                                  SPDiff, isTailCall, isPatchPoint, hasNest,
4914                                  RegsToPass, Ops, NodeTys, CS, Subtarget);
4915
4916   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4917   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
4918     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4919
4920   // When performing tail call optimization the callee pops its arguments off
4921   // the stack. Account for this here so these bytes can be pushed back on in
4922   // PPCFrameLowering::eliminateCallFramePseudoInstr.
4923   int BytesCalleePops =
4924     (CallConv == CallingConv::Fast &&
4925      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
4926
4927   // Add a register mask operand representing the call-preserved registers.
4928   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4929   const uint32_t *Mask =
4930       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
4931   assert(Mask && "Missing call preserved mask for calling convention");
4932   Ops.push_back(DAG.getRegisterMask(Mask));
4933
4934   if (InFlag.getNode())
4935     Ops.push_back(InFlag);
4936
4937   // Emit tail call.
4938   if (isTailCall) {
4939     assert(((Callee.getOpcode() == ISD::Register &&
4940              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4941             Callee.getOpcode() == ISD::TargetExternalSymbol ||
4942             Callee.getOpcode() == ISD::TargetGlobalAddress ||
4943             isa<ConstantSDNode>(Callee)) &&
4944     "Expecting an global address, external symbol, absolute value or register");
4945
4946     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
4947     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4948   }
4949
4950   // Add a NOP immediately after the branch instruction when using the 64-bit
4951   // SVR4 ABI. At link time, if caller and callee are in a different module and
4952   // thus have a different TOC, the call will be replaced with a call to a stub
4953   // function which saves the current TOC, loads the TOC of the callee and
4954   // branches to the callee. The NOP will be replaced with a load instruction
4955   // which restores the TOC of the caller from the TOC save slot of the current
4956   // stack frame. If caller and callee belong to the same module (and have the
4957   // same TOC), the NOP will remain unchanged.
4958
4959   MachineFunction &MF = DAG.getMachineFunction();
4960   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
4961       !isPatchPoint) {
4962     if (CallOpc == PPCISD::BCTRL) {
4963       // This is a call through a function pointer.
4964       // Restore the caller TOC from the save area into R2.
4965       // See PrepareCall() for more information about calls through function
4966       // pointers in the 64-bit SVR4 ABI.
4967       // We are using a target-specific load with r2 hard coded, because the
4968       // result of a target-independent load would never go directly into r2,
4969       // since r2 is a reserved register (which prevents the register allocator
4970       // from allocating it), resulting in an additional register being
4971       // allocated and an unnecessary move instruction being generated.
4972       CallOpc = PPCISD::BCTRL_LOAD_TOC;
4973
4974       EVT PtrVT = getPointerTy(DAG.getDataLayout());
4975       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
4976       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
4977       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
4978       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
4979
4980       // The address needs to go after the chain input but before the flag (or
4981       // any other variadic arguments).
4982       Ops.insert(std::next(Ops.begin()), AddTOC);
4983     } else if (CallOpc == PPCISD::CALL &&
4984       !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) {
4985       // Otherwise insert NOP for non-local calls.
4986       CallOpc = PPCISD::CALL_NOP;
4987     }
4988   }
4989
4990   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
4991   InFlag = Chain.getValue(1);
4992
4993   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4994                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
4995                              InFlag, dl);
4996   if (!Ins.empty())
4997     InFlag = Chain.getValue(1);
4998
4999   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5000                          Ins, dl, DAG, InVals);
5001 }
5002
5003 SDValue
5004 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5005                              SmallVectorImpl<SDValue> &InVals) const {
5006   SelectionDAG &DAG                     = CLI.DAG;
5007   SDLoc &dl                             = CLI.DL;
5008   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5009   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5010   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5011   SDValue Chain                         = CLI.Chain;
5012   SDValue Callee                        = CLI.Callee;
5013   bool &isTailCall                      = CLI.IsTailCall;
5014   CallingConv::ID CallConv              = CLI.CallConv;
5015   bool isVarArg                         = CLI.IsVarArg;
5016   bool isPatchPoint                     = CLI.IsPatchPoint;
5017   ImmutableCallSite *CS                 = CLI.CS;
5018
5019   if (isTailCall) {
5020     if (Subtarget.useLongCalls() && !(CS && CS->isMustTailCall()))
5021       isTailCall = false;
5022     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5023       isTailCall =
5024         IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5025                                                  isVarArg, Outs, Ins, DAG);
5026     else
5027       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5028                                                      Ins, DAG);
5029     if (isTailCall) {
5030       ++NumTailCalls;
5031       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5032         ++NumSiblingCalls;
5033
5034       assert(isa<GlobalAddressSDNode>(Callee) &&
5035              "Callee should be an llvm::Function object.");
5036       DEBUG(
5037         const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5038         const unsigned Width = 80 - strlen("TCO caller: ")
5039                                   - strlen(", callee linkage: 0, 0");
5040         dbgs() << "TCO caller: "
5041                << left_justify(DAG.getMachineFunction().getName(), Width)
5042                << ", callee linkage: "
5043                << GV->getVisibility() << ", " << GV->getLinkage() << "\n"
5044       );
5045     }
5046   }
5047
5048   if (!isTailCall && CS && CS->isMustTailCall())
5049     report_fatal_error("failed to perform tail call elimination on a call "
5050                        "site marked musttail");
5051
5052   // When long calls (i.e. indirect calls) are always used, calls are always
5053   // made via function pointer. If we have a function name, first translate it
5054   // into a pointer.
5055   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5056       !isTailCall)
5057     Callee = LowerGlobalAddress(Callee, DAG);
5058
5059   if (Subtarget.isSVR4ABI()) {
5060     if (Subtarget.isPPC64())
5061       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5062                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
5063                               dl, DAG, InVals, CS);
5064     else
5065       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5066                               isTailCall, isPatchPoint, Outs, OutVals, Ins,
5067                               dl, DAG, InVals, CS);
5068   }
5069
5070   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5071                           isTailCall, isPatchPoint, Outs, OutVals, Ins,
5072                           dl, DAG, InVals, CS);
5073 }
5074
5075 SDValue PPCTargetLowering::LowerCall_32SVR4(
5076     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5077     bool isTailCall, bool isPatchPoint,
5078     const SmallVectorImpl<ISD::OutputArg> &Outs,
5079     const SmallVectorImpl<SDValue> &OutVals,
5080     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5081     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5082     ImmutableCallSite *CS) const {
5083   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5084   // of the 32-bit SVR4 ABI stack frame layout.
5085
5086   assert((CallConv == CallingConv::C ||
5087           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5088
5089   unsigned PtrByteSize = 4;
5090
5091   MachineFunction &MF = DAG.getMachineFunction();
5092
5093   // Mark this function as potentially containing a function that contains a
5094   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5095   // and restoring the callers stack pointer in this functions epilog. This is
5096   // done because by tail calling the called function might overwrite the value
5097   // in this function's (MF) stack pointer stack slot 0(SP).
5098   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5099       CallConv == CallingConv::Fast)
5100     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5101
5102   // Count how many bytes are to be pushed on the stack, including the linkage
5103   // area, parameter list area and the part of the local variable space which
5104   // contains copies of aggregates which are passed by value.
5105
5106   // Assign locations to all of the outgoing arguments.
5107   SmallVector<CCValAssign, 16> ArgLocs;
5108   PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5109
5110   // Reserve space for the linkage area on the stack.
5111   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5112                        PtrByteSize);
5113   if (useSoftFloat())
5114     CCInfo.PreAnalyzeCallOperands(Outs);
5115
5116   if (isVarArg) {
5117     // Handle fixed and variable vector arguments differently.
5118     // Fixed vector arguments go into registers as long as registers are
5119     // available. Variable vector arguments always go into memory.
5120     unsigned NumArgs = Outs.size();
5121
5122     for (unsigned i = 0; i != NumArgs; ++i) {
5123       MVT ArgVT = Outs[i].VT;
5124       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5125       bool Result;
5126
5127       if (Outs[i].IsFixed) {
5128         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5129                                CCInfo);
5130       } else {
5131         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5132                                       ArgFlags, CCInfo);
5133       }
5134
5135       if (Result) {
5136 #ifndef NDEBUG
5137         errs() << "Call operand #" << i << " has unhandled type "
5138              << EVT(ArgVT).getEVTString() << "\n";
5139 #endif
5140         llvm_unreachable(nullptr);
5141       }
5142     }
5143   } else {
5144     // All arguments are treated the same.
5145     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5146   }
5147   CCInfo.clearWasPPCF128();
5148
5149   // Assign locations to all of the outgoing aggregate by value arguments.
5150   SmallVector<CCValAssign, 16> ByValArgLocs;
5151   CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5152
5153   // Reserve stack space for the allocations in CCInfo.
5154   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5155
5156   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5157
5158   // Size of the linkage area, parameter list area and the part of the local
5159   // space variable where copies of aggregates which are passed by value are
5160   // stored.
5161   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5162
5163   // Calculate by how many bytes the stack has to be adjusted in case of tail
5164   // call optimization.
5165   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5166
5167   // Adjust the stack pointer for the new arguments...
5168   // These operations are automatically eliminated by the prolog/epilog pass
5169   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5170   SDValue CallSeqStart = Chain;
5171
5172   // Load the return address and frame pointer so it can be moved somewhere else
5173   // later.
5174   SDValue LROp, FPOp;
5175   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5176
5177   // Set up a copy of the stack pointer for use loading and storing any
5178   // arguments that may not fit in the registers available for argument
5179   // passing.
5180   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5181
5182   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5183   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5184   SmallVector<SDValue, 8> MemOpChains;
5185
5186   bool seenFloatArg = false;
5187   // Walk the register/memloc assignments, inserting copies/loads.
5188   for (unsigned i = 0, j = 0, e = ArgLocs.size();
5189        i != e;
5190        ++i) {
5191     CCValAssign &VA = ArgLocs[i];
5192     SDValue Arg = OutVals[i];
5193     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5194
5195     if (Flags.isByVal()) {
5196       // Argument is an aggregate which is passed by value, thus we need to
5197       // create a copy of it in the local variable space of the current stack
5198       // frame (which is the stack frame of the caller) and pass the address of
5199       // this copy to the callee.
5200       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5201       CCValAssign &ByValVA = ByValArgLocs[j++];
5202       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5203
5204       // Memory reserved in the local variable space of the callers stack frame.
5205       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5206
5207       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5208       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5209                            StackPtr, PtrOff);
5210
5211       // Create a copy of the argument in the local area of the current
5212       // stack frame.
5213       SDValue MemcpyCall =
5214         CreateCopyOfByValArgument(Arg, PtrOff,
5215                                   CallSeqStart.getNode()->getOperand(0),
5216                                   Flags, DAG, dl);
5217
5218       // This must go outside the CALLSEQ_START..END.
5219       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5220                                                      SDLoc(MemcpyCall));
5221       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5222                              NewCallSeqStart.getNode());
5223       Chain = CallSeqStart = NewCallSeqStart;
5224
5225       // Pass the address of the aggregate copy on the stack either in a
5226       // physical register or in the parameter list area of the current stack
5227       // frame to the callee.
5228       Arg = PtrOff;
5229     }
5230
5231     if (VA.isRegLoc()) {
5232       if (Arg.getValueType() == MVT::i1)
5233         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
5234
5235       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5236       // Put argument in a physical register.
5237       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5238     } else {
5239       // Put argument in the parameter list area of the current stack frame.
5240       assert(VA.isMemLoc());
5241       unsigned LocMemOffset = VA.getLocMemOffset();
5242
5243       if (!isTailCall) {
5244         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5245         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5246                              StackPtr, PtrOff);
5247
5248         MemOpChains.push_back(
5249             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5250       } else {
5251         // Calculate and remember argument location.
5252         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5253                                  TailCallArguments);
5254       }
5255     }
5256   }
5257
5258   if (!MemOpChains.empty())
5259     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5260
5261   // Build a sequence of copy-to-reg nodes chained together with token chain
5262   // and flag operands which copy the outgoing args into the appropriate regs.
5263   SDValue InFlag;
5264   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5265     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5266                              RegsToPass[i].second, InFlag);
5267     InFlag = Chain.getValue(1);
5268   }
5269
5270   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5271   // registers.
5272   if (isVarArg) {
5273     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5274     SDValue Ops[] = { Chain, InFlag };
5275
5276     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5277                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5278
5279     InFlag = Chain.getValue(1);
5280   }
5281
5282   if (isTailCall)
5283     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5284                     TailCallArguments);
5285
5286   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5287                     /* unused except on PPC64 ELFv1 */ false, DAG,
5288                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5289                     NumBytes, Ins, InVals, CS);
5290 }
5291
5292 // Copy an argument into memory, being careful to do this outside the
5293 // call sequence for the call to which the argument belongs.
5294 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5295     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5296     SelectionDAG &DAG, const SDLoc &dl) const {
5297   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5298                         CallSeqStart.getNode()->getOperand(0),
5299                         Flags, DAG, dl);
5300   // The MEMCPY must go outside the CALLSEQ_START..END.
5301   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5302   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5303                                                  SDLoc(MemcpyCall));
5304   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5305                          NewCallSeqStart.getNode());
5306   return NewCallSeqStart;
5307 }
5308
5309 SDValue PPCTargetLowering::LowerCall_64SVR4(
5310     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5311     bool isTailCall, bool isPatchPoint,
5312     const SmallVectorImpl<ISD::OutputArg> &Outs,
5313     const SmallVectorImpl<SDValue> &OutVals,
5314     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5315     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5316     ImmutableCallSite *CS) const {
5317   bool isELFv2ABI = Subtarget.isELFv2ABI();
5318   bool isLittleEndian = Subtarget.isLittleEndian();
5319   unsigned NumOps = Outs.size();
5320   bool hasNest = false;
5321   bool IsSibCall = false;
5322
5323   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5324   unsigned PtrByteSize = 8;
5325
5326   MachineFunction &MF = DAG.getMachineFunction();
5327
5328   if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5329     IsSibCall = true;
5330
5331   // Mark this function as potentially containing a function that contains a
5332   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5333   // and restoring the callers stack pointer in this functions epilog. This is
5334   // done because by tail calling the called function might overwrite the value
5335   // in this function's (MF) stack pointer stack slot 0(SP).
5336   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5337       CallConv == CallingConv::Fast)
5338     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5339
5340   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5341          "fastcc not supported on varargs functions");
5342
5343   // Count how many bytes are to be pushed on the stack, including the linkage
5344   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5345   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5346   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5347   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5348   unsigned NumBytes = LinkageSize;
5349   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5350   unsigned &QFPR_idx = FPR_idx;
5351
5352   static const MCPhysReg GPR[] = {
5353     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5354     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5355   };
5356   static const MCPhysReg VR[] = {
5357     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5358     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5359   };
5360
5361   const unsigned NumGPRs = array_lengthof(GPR);
5362   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5363   const unsigned NumVRs  = array_lengthof(VR);
5364   const unsigned NumQFPRs = NumFPRs;
5365
5366   // On ELFv2, we can avoid allocating the parameter area if all the arguments
5367   // can be passed to the callee in registers.
5368   // For the fast calling convention, there is another check below.
5369   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5370   bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
5371   if (!HasParameterArea) {
5372     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5373     unsigned AvailableFPRs = NumFPRs;
5374     unsigned AvailableVRs = NumVRs;
5375     unsigned NumBytesTmp = NumBytes;
5376     for (unsigned i = 0; i != NumOps; ++i) {
5377       if (Outs[i].Flags.isNest()) continue;
5378       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5379                                 PtrByteSize, LinkageSize, ParamAreaSize,
5380                                 NumBytesTmp, AvailableFPRs, AvailableVRs,
5381                                 Subtarget.hasQPX()))
5382         HasParameterArea = true;
5383     }
5384   }
5385
5386   // When using the fast calling convention, we don't provide backing for
5387   // arguments that will be in registers.
5388   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5389
5390   // Add up all the space actually used.
5391   for (unsigned i = 0; i != NumOps; ++i) {
5392     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5393     EVT ArgVT = Outs[i].VT;
5394     EVT OrigVT = Outs[i].ArgVT;
5395
5396     if (Flags.isNest())
5397       continue;
5398
5399     if (CallConv == CallingConv::Fast) {
5400       if (Flags.isByVal())
5401         NumGPRsUsed += (Flags.getByValSize()+7)/8;
5402       else
5403         switch (ArgVT.getSimpleVT().SimpleTy) {
5404         default: llvm_unreachable("Unexpected ValueType for argument!");
5405         case MVT::i1:
5406         case MVT::i32:
5407         case MVT::i64:
5408           if (++NumGPRsUsed <= NumGPRs)
5409             continue;
5410           break;
5411         case MVT::v4i32:
5412         case MVT::v8i16:
5413         case MVT::v16i8:
5414         case MVT::v2f64:
5415         case MVT::v2i64:
5416         case MVT::v1i128:
5417           if (++NumVRsUsed <= NumVRs)
5418             continue;
5419           break;
5420         case MVT::v4f32:
5421           // When using QPX, this is handled like a FP register, otherwise, it
5422           // is an Altivec register.
5423           if (Subtarget.hasQPX()) {
5424             if (++NumFPRsUsed <= NumFPRs)
5425               continue;
5426           } else {
5427             if (++NumVRsUsed <= NumVRs)
5428               continue;
5429           }
5430           break;
5431         case MVT::f32:
5432         case MVT::f64:
5433         case MVT::v4f64: // QPX
5434         case MVT::v4i1:  // QPX
5435           if (++NumFPRsUsed <= NumFPRs)
5436             continue;
5437           break;
5438         }
5439     }
5440
5441     /* Respect alignment of argument on the stack.  */
5442     unsigned Align =
5443       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5444     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5445
5446     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5447     if (Flags.isInConsecutiveRegsLast())
5448       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5449   }
5450
5451   unsigned NumBytesActuallyUsed = NumBytes;
5452
5453   // In the old ELFv1 ABI,
5454   // the prolog code of the callee may store up to 8 GPR argument registers to
5455   // the stack, allowing va_start to index over them in memory if its varargs.
5456   // Because we cannot tell if this is needed on the caller side, we have to
5457   // conservatively assume that it is needed.  As such, make sure we have at
5458   // least enough stack space for the caller to store the 8 GPRs.
5459   // In the ELFv2 ABI, we allocate the parameter area iff a callee
5460   // really requires memory operands, e.g. a vararg function.
5461   if (HasParameterArea)
5462     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5463   else
5464     NumBytes = LinkageSize;
5465
5466   // Tail call needs the stack to be aligned.
5467   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5468       CallConv == CallingConv::Fast)
5469     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5470
5471   int SPDiff = 0;
5472
5473   // Calculate by how many bytes the stack has to be adjusted in case of tail
5474   // call optimization.
5475   if (!IsSibCall)
5476     SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5477
5478   // To protect arguments on the stack from being clobbered in a tail call,
5479   // force all the loads to happen before doing any other lowering.
5480   if (isTailCall)
5481     Chain = DAG.getStackArgumentTokenFactor(Chain);
5482
5483   // Adjust the stack pointer for the new arguments...
5484   // These operations are automatically eliminated by the prolog/epilog pass
5485   if (!IsSibCall)
5486     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5487   SDValue CallSeqStart = Chain;
5488
5489   // Load the return address and frame pointer so it can be move somewhere else
5490   // later.
5491   SDValue LROp, FPOp;
5492   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5493
5494   // Set up a copy of the stack pointer for use loading and storing any
5495   // arguments that may not fit in the registers available for argument
5496   // passing.
5497   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5498
5499   // Figure out which arguments are going to go in registers, and which in
5500   // memory.  Also, if this is a vararg function, floating point operations
5501   // must be stored to our stack, and loaded into integer regs as well, if
5502   // any integer regs are available for argument passing.
5503   unsigned ArgOffset = LinkageSize;
5504
5505   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5506   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5507
5508   SmallVector<SDValue, 8> MemOpChains;
5509   for (unsigned i = 0; i != NumOps; ++i) {
5510     SDValue Arg = OutVals[i];
5511     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5512     EVT ArgVT = Outs[i].VT;
5513     EVT OrigVT = Outs[i].ArgVT;
5514
5515     // PtrOff will be used to store the current argument to the stack if a
5516     // register cannot be found for it.
5517     SDValue PtrOff;
5518
5519     // We re-align the argument offset for each argument, except when using the
5520     // fast calling convention, when we need to make sure we do that only when
5521     // we'll actually use a stack slot.
5522     auto ComputePtrOff = [&]() {
5523       /* Respect alignment of argument on the stack.  */
5524       unsigned Align =
5525         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5526       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5527
5528       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5529
5530       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5531     };
5532
5533     if (CallConv != CallingConv::Fast) {
5534       ComputePtrOff();
5535
5536       /* Compute GPR index associated with argument offset.  */
5537       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5538       GPR_idx = std::min(GPR_idx, NumGPRs);
5539     }
5540
5541     // Promote integers to 64-bit values.
5542     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5543       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5544       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5545       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5546     }
5547
5548     // FIXME memcpy is used way more than necessary.  Correctness first.
5549     // Note: "by value" is code for passing a structure by value, not
5550     // basic types.
5551     if (Flags.isByVal()) {
5552       // Note: Size includes alignment padding, so
5553       //   struct x { short a; char b; }
5554       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5555       // These are the proper values we need for right-justifying the
5556       // aggregate in a parameter register.
5557       unsigned Size = Flags.getByValSize();
5558
5559       // An empty aggregate parameter takes up no storage and no
5560       // registers.
5561       if (Size == 0)
5562         continue;
5563
5564       if (CallConv == CallingConv::Fast)
5565         ComputePtrOff();
5566
5567       // All aggregates smaller than 8 bytes must be passed right-justified.
5568       if (Size==1 || Size==2 || Size==4) {
5569         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5570         if (GPR_idx != NumGPRs) {
5571           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5572                                         MachinePointerInfo(), VT);
5573           MemOpChains.push_back(Load.getValue(1));
5574           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5575
5576           ArgOffset += PtrByteSize;
5577           continue;
5578         }
5579       }
5580
5581       if (GPR_idx == NumGPRs && Size < 8) {
5582         SDValue AddPtr = PtrOff;
5583         if (!isLittleEndian) {
5584           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5585                                           PtrOff.getValueType());
5586           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5587         }
5588         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5589                                                           CallSeqStart,
5590                                                           Flags, DAG, dl);
5591         ArgOffset += PtrByteSize;
5592         continue;
5593       }
5594       // Copy entire object into memory.  There are cases where gcc-generated
5595       // code assumes it is there, even if it could be put entirely into
5596       // registers.  (This is not what the doc says.)
5597
5598       // FIXME: The above statement is likely due to a misunderstanding of the
5599       // documents.  All arguments must be copied into the parameter area BY
5600       // THE CALLEE in the event that the callee takes the address of any
5601       // formal argument.  That has not yet been implemented.  However, it is
5602       // reasonable to use the stack area as a staging area for the register
5603       // load.
5604
5605       // Skip this for small aggregates, as we will use the same slot for a
5606       // right-justified copy, below.
5607       if (Size >= 8)
5608         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5609                                                           CallSeqStart,
5610                                                           Flags, DAG, dl);
5611
5612       // When a register is available, pass a small aggregate right-justified.
5613       if (Size < 8 && GPR_idx != NumGPRs) {
5614         // The easiest way to get this right-justified in a register
5615         // is to copy the structure into the rightmost portion of a
5616         // local variable slot, then load the whole slot into the
5617         // register.
5618         // FIXME: The memcpy seems to produce pretty awful code for
5619         // small aggregates, particularly for packed ones.
5620         // FIXME: It would be preferable to use the slot in the
5621         // parameter save area instead of a new local variable.
5622         SDValue AddPtr = PtrOff;
5623         if (!isLittleEndian) {
5624           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5625           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5626         }
5627         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5628                                                           CallSeqStart,
5629                                                           Flags, DAG, dl);
5630
5631         // Load the slot into the register.
5632         SDValue Load =
5633             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5634         MemOpChains.push_back(Load.getValue(1));
5635         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5636
5637         // Done with this argument.
5638         ArgOffset += PtrByteSize;
5639         continue;
5640       }
5641
5642       // For aggregates larger than PtrByteSize, copy the pieces of the
5643       // object that fit into registers from the parameter save area.
5644       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5645         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5646         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5647         if (GPR_idx != NumGPRs) {
5648           SDValue Load =
5649               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5650           MemOpChains.push_back(Load.getValue(1));
5651           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5652           ArgOffset += PtrByteSize;
5653         } else {
5654           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5655           break;
5656         }
5657       }
5658       continue;
5659     }
5660
5661     switch (Arg.getSimpleValueType().SimpleTy) {
5662     default: llvm_unreachable("Unexpected ValueType for argument!");
5663     case MVT::i1:
5664     case MVT::i32:
5665     case MVT::i64:
5666       if (Flags.isNest()) {
5667         // The 'nest' parameter, if any, is passed in R11.
5668         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5669         hasNest = true;
5670         break;
5671       }
5672
5673       // These can be scalar arguments or elements of an integer array type
5674       // passed directly.  Clang may use those instead of "byval" aggregate
5675       // types to avoid forcing arguments to memory unnecessarily.
5676       if (GPR_idx != NumGPRs) {
5677         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5678       } else {
5679         if (CallConv == CallingConv::Fast)
5680           ComputePtrOff();
5681
5682         assert(HasParameterArea &&
5683                "Parameter area must exist to pass an argument in memory.");
5684         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5685                          true, isTailCall, false, MemOpChains,
5686                          TailCallArguments, dl);
5687         if (CallConv == CallingConv::Fast)
5688           ArgOffset += PtrByteSize;
5689       }
5690       if (CallConv != CallingConv::Fast)
5691         ArgOffset += PtrByteSize;
5692       break;
5693     case MVT::f32:
5694     case MVT::f64: {
5695       // These can be scalar arguments or elements of a float array type
5696       // passed directly.  The latter are used to implement ELFv2 homogenous
5697       // float aggregates.
5698
5699       // Named arguments go into FPRs first, and once they overflow, the
5700       // remaining arguments go into GPRs and then the parameter save area.
5701       // Unnamed arguments for vararg functions always go to GPRs and
5702       // then the parameter save area.  For now, put all arguments to vararg
5703       // routines always in both locations (FPR *and* GPR or stack slot).
5704       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
5705       bool NeededLoad = false;
5706
5707       // First load the argument into the next available FPR.
5708       if (FPR_idx != NumFPRs)
5709         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5710
5711       // Next, load the argument into GPR or stack slot if needed.
5712       if (!NeedGPROrStack)
5713         ;
5714       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
5715         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5716         // once we support fp <-> gpr moves.
5717
5718         // In the non-vararg case, this can only ever happen in the
5719         // presence of f32 array types, since otherwise we never run
5720         // out of FPRs before running out of GPRs.
5721         SDValue ArgVal;
5722
5723         // Double values are always passed in a single GPR.
5724         if (Arg.getValueType() != MVT::f32) {
5725           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
5726
5727         // Non-array float values are extended and passed in a GPR.
5728         } else if (!Flags.isInConsecutiveRegs()) {
5729           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5730           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5731
5732         // If we have an array of floats, we collect every odd element
5733         // together with its predecessor into one GPR.
5734         } else if (ArgOffset % PtrByteSize != 0) {
5735           SDValue Lo, Hi;
5736           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
5737           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5738           if (!isLittleEndian)
5739             std::swap(Lo, Hi);
5740           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5741
5742         // The final element, if even, goes into the first half of a GPR.
5743         } else if (Flags.isInConsecutiveRegsLast()) {
5744           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5745           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5746           if (!isLittleEndian)
5747             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
5748                                  DAG.getConstant(32, dl, MVT::i32));
5749
5750         // Non-final even elements are skipped; they will be handled
5751         // together the with subsequent argument on the next go-around.
5752         } else
5753           ArgVal = SDValue();
5754
5755         if (ArgVal.getNode())
5756           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
5757       } else {
5758         if (CallConv == CallingConv::Fast)
5759           ComputePtrOff();
5760
5761         // Single-precision floating-point values are mapped to the
5762         // second (rightmost) word of the stack doubleword.
5763         if (Arg.getValueType() == MVT::f32 &&
5764             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
5765           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5766           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5767         }
5768
5769         assert(HasParameterArea &&
5770                "Parameter area must exist to pass an argument in memory.");
5771         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5772                          true, isTailCall, false, MemOpChains,
5773                          TailCallArguments, dl);
5774
5775         NeededLoad = true;
5776       }
5777       // When passing an array of floats, the array occupies consecutive
5778       // space in the argument area; only round up to the next doubleword
5779       // at the end of the array.  Otherwise, each float takes 8 bytes.
5780       if (CallConv != CallingConv::Fast || NeededLoad) {
5781         ArgOffset += (Arg.getValueType() == MVT::f32 &&
5782                       Flags.isInConsecutiveRegs()) ? 4 : 8;
5783         if (Flags.isInConsecutiveRegsLast())
5784           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5785       }
5786       break;
5787     }
5788     case MVT::v4f32:
5789     case MVT::v4i32:
5790     case MVT::v8i16:
5791     case MVT::v16i8:
5792     case MVT::v2f64:
5793     case MVT::v2i64:
5794     case MVT::v1i128:
5795       if (!Subtarget.hasQPX()) {
5796       // These can be scalar arguments or elements of a vector array type
5797       // passed directly.  The latter are used to implement ELFv2 homogenous
5798       // vector aggregates.
5799
5800       // For a varargs call, named arguments go into VRs or on the stack as
5801       // usual; unnamed arguments always go to the stack or the corresponding
5802       // GPRs when within range.  For now, we always put the value in both
5803       // locations (or even all three).
5804       if (isVarArg) {
5805         assert(HasParameterArea &&
5806                "Parameter area must exist if we have a varargs call.");
5807         // We could elide this store in the case where the object fits
5808         // entirely in R registers.  Maybe later.
5809         SDValue Store =
5810             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5811         MemOpChains.push_back(Store);
5812         if (VR_idx != NumVRs) {
5813           SDValue Load =
5814               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
5815           MemOpChains.push_back(Load.getValue(1));
5816           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5817         }
5818         ArgOffset += 16;
5819         for (unsigned i=0; i<16; i+=PtrByteSize) {
5820           if (GPR_idx == NumGPRs)
5821             break;
5822           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5823                                    DAG.getConstant(i, dl, PtrVT));
5824           SDValue Load =
5825               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5826           MemOpChains.push_back(Load.getValue(1));
5827           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5828         }
5829         break;
5830       }
5831
5832       // Non-varargs Altivec params go into VRs or on the stack.
5833       if (VR_idx != NumVRs) {
5834         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5835       } else {
5836         if (CallConv == CallingConv::Fast)
5837           ComputePtrOff();
5838
5839         assert(HasParameterArea &&
5840                "Parameter area must exist to pass an argument in memory.");
5841         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5842                          true, isTailCall, true, MemOpChains,
5843                          TailCallArguments, dl);
5844         if (CallConv == CallingConv::Fast)
5845           ArgOffset += 16;
5846       }
5847
5848       if (CallConv != CallingConv::Fast)
5849         ArgOffset += 16;
5850       break;
5851       } // not QPX
5852
5853       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
5854              "Invalid QPX parameter type");
5855
5856       /* fall through */
5857     case MVT::v4f64:
5858     case MVT::v4i1: {
5859       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5860       if (isVarArg) {
5861         assert(HasParameterArea &&
5862                "Parameter area must exist if we have a varargs call.");
5863         // We could elide this store in the case where the object fits
5864         // entirely in R registers.  Maybe later.
5865         SDValue Store =
5866             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
5867         MemOpChains.push_back(Store);
5868         if (QFPR_idx != NumQFPRs) {
5869           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
5870                                      PtrOff, MachinePointerInfo());
5871           MemOpChains.push_back(Load.getValue(1));
5872           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5873         }
5874         ArgOffset += (IsF32 ? 16 : 32);
5875         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
5876           if (GPR_idx == NumGPRs)
5877             break;
5878           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5879                                    DAG.getConstant(i, dl, PtrVT));
5880           SDValue Load =
5881               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
5882           MemOpChains.push_back(Load.getValue(1));
5883           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5884         }
5885         break;
5886       }
5887
5888       // Non-varargs QPX params go into registers or on the stack.
5889       if (QFPR_idx != NumQFPRs) {
5890         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5891       } else {
5892         if (CallConv == CallingConv::Fast)
5893           ComputePtrOff();
5894
5895         assert(HasParameterArea &&
5896                "Parameter area must exist to pass an argument in memory.");
5897         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5898                          true, isTailCall, true, MemOpChains,
5899                          TailCallArguments, dl);
5900         if (CallConv == CallingConv::Fast)
5901           ArgOffset += (IsF32 ? 16 : 32);
5902       }
5903
5904       if (CallConv != CallingConv::Fast)
5905         ArgOffset += (IsF32 ? 16 : 32);
5906       break;
5907       }
5908     }
5909   }
5910
5911   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
5912          "mismatch in size of parameter area");
5913   (void)NumBytesActuallyUsed;
5914
5915   if (!MemOpChains.empty())
5916     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5917
5918   // Check if this is an indirect call (MTCTR/BCTRL).
5919   // See PrepareCall() for more information about calls through function
5920   // pointers in the 64-bit SVR4 ABI.
5921   if (!isTailCall && !isPatchPoint &&
5922       !isFunctionGlobalAddress(Callee) &&
5923       !isa<ExternalSymbolSDNode>(Callee)) {
5924     // Load r2 into a virtual register and store it to the TOC save area.
5925     setUsesTOCBasePtr(DAG);
5926     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5927     // TOC save area offset.
5928     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5929     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5930     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5931     Chain = DAG.getStore(
5932         Val.getValue(1), dl, Val, AddPtr,
5933         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
5934     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5935     // This does not mean the MTCTR instruction must use R12; it's easier
5936     // to model this as an extra parameter, so do that.
5937     if (isELFv2ABI && !isPatchPoint)
5938       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5939   }
5940
5941   // Build a sequence of copy-to-reg nodes chained together with token chain
5942   // and flag operands which copy the outgoing args into the appropriate regs.
5943   SDValue InFlag;
5944   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5945     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5946                              RegsToPass[i].second, InFlag);
5947     InFlag = Chain.getValue(1);
5948   }
5949
5950   if (isTailCall && !IsSibCall)
5951     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5952                     TailCallArguments);
5953
5954   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
5955                     DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
5956                     SPDiff, NumBytes, Ins, InVals, CS);
5957 }
5958
5959 SDValue PPCTargetLowering::LowerCall_Darwin(
5960     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5961     bool isTailCall, bool isPatchPoint,
5962     const SmallVectorImpl<ISD::OutputArg> &Outs,
5963     const SmallVectorImpl<SDValue> &OutVals,
5964     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5965     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5966     ImmutableCallSite *CS) const {
5967   unsigned NumOps = Outs.size();
5968
5969   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5970   bool isPPC64 = PtrVT == MVT::i64;
5971   unsigned PtrByteSize = isPPC64 ? 8 : 4;
5972
5973   MachineFunction &MF = DAG.getMachineFunction();
5974
5975   // Mark this function as potentially containing a function that contains a
5976   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5977   // and restoring the callers stack pointer in this functions epilog. This is
5978   // done because by tail calling the called function might overwrite the value
5979   // in this function's (MF) stack pointer stack slot 0(SP).
5980   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5981       CallConv == CallingConv::Fast)
5982     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5983
5984   // Count how many bytes are to be pushed on the stack, including the linkage
5985   // area, and parameter passing area.  We start with 24/48 bytes, which is
5986   // prereserved space for [SP][CR][LR][3 x unused].
5987   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5988   unsigned NumBytes = LinkageSize;
5989
5990   // Add up all the space actually used.
5991   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
5992   // they all go in registers, but we must reserve stack space for them for
5993   // possible use by the caller.  In varargs or 64-bit calls, parameters are
5994   // assigned stack space in order, with padding so Altivec parameters are
5995   // 16-byte aligned.
5996   unsigned nAltivecParamsAtEnd = 0;
5997   for (unsigned i = 0; i != NumOps; ++i) {
5998     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5999     EVT ArgVT = Outs[i].VT;
6000     // Varargs Altivec parameters are padded to a 16 byte boundary.
6001     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6002         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6003         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6004       if (!isVarArg && !isPPC64) {
6005         // Non-varargs Altivec parameters go after all the non-Altivec
6006         // parameters; handle those later so we know how much padding we need.
6007         nAltivecParamsAtEnd++;
6008         continue;
6009       }
6010       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6011       NumBytes = ((NumBytes+15)/16)*16;
6012     }
6013     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6014   }
6015
6016   // Allow for Altivec parameters at the end, if needed.
6017   if (nAltivecParamsAtEnd) {
6018     NumBytes = ((NumBytes+15)/16)*16;
6019     NumBytes += 16*nAltivecParamsAtEnd;
6020   }
6021
6022   // The prolog code of the callee may store up to 8 GPR argument registers to
6023   // the stack, allowing va_start to index over them in memory if its varargs.
6024   // Because we cannot tell if this is needed on the caller side, we have to
6025   // conservatively assume that it is needed.  As such, make sure we have at
6026   // least enough stack space for the caller to store the 8 GPRs.
6027   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6028
6029   // Tail call needs the stack to be aligned.
6030   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6031       CallConv == CallingConv::Fast)
6032     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6033
6034   // Calculate by how many bytes the stack has to be adjusted in case of tail
6035   // call optimization.
6036   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
6037
6038   // To protect arguments on the stack from being clobbered in a tail call,
6039   // force all the loads to happen before doing any other lowering.
6040   if (isTailCall)
6041     Chain = DAG.getStackArgumentTokenFactor(Chain);
6042
6043   // Adjust the stack pointer for the new arguments...
6044   // These operations are automatically eliminated by the prolog/epilog pass
6045   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6046   SDValue CallSeqStart = Chain;
6047
6048   // Load the return address and frame pointer so it can be move somewhere else
6049   // later.
6050   SDValue LROp, FPOp;
6051   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6052
6053   // Set up a copy of the stack pointer for use loading and storing any
6054   // arguments that may not fit in the registers available for argument
6055   // passing.
6056   SDValue StackPtr;
6057   if (isPPC64)
6058     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6059   else
6060     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6061
6062   // Figure out which arguments are going to go in registers, and which in
6063   // memory.  Also, if this is a vararg function, floating point operations
6064   // must be stored to our stack, and loaded into integer regs as well, if
6065   // any integer regs are available for argument passing.
6066   unsigned ArgOffset = LinkageSize;
6067   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6068
6069   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6070     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6071     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6072   };
6073   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6074     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6075     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6076   };
6077   static const MCPhysReg VR[] = {
6078     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6079     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6080   };
6081   const unsigned NumGPRs = array_lengthof(GPR_32);
6082   const unsigned NumFPRs = 13;
6083   const unsigned NumVRs  = array_lengthof(VR);
6084
6085   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6086
6087   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6088   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6089
6090   SmallVector<SDValue, 8> MemOpChains;
6091   for (unsigned i = 0; i != NumOps; ++i) {
6092     SDValue Arg = OutVals[i];
6093     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6094
6095     // PtrOff will be used to store the current argument to the stack if a
6096     // register cannot be found for it.
6097     SDValue PtrOff;
6098
6099     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6100
6101     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6102
6103     // On PPC64, promote integers to 64-bit values.
6104     if (isPPC64 && Arg.getValueType() == MVT::i32) {
6105       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6106       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6107       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6108     }
6109
6110     // FIXME memcpy is used way more than necessary.  Correctness first.
6111     // Note: "by value" is code for passing a structure by value, not
6112     // basic types.
6113     if (Flags.isByVal()) {
6114       unsigned Size = Flags.getByValSize();
6115       // Very small objects are passed right-justified.  Everything else is
6116       // passed left-justified.
6117       if (Size==1 || Size==2) {
6118         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6119         if (GPR_idx != NumGPRs) {
6120           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6121                                         MachinePointerInfo(), VT);
6122           MemOpChains.push_back(Load.getValue(1));
6123           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6124
6125           ArgOffset += PtrByteSize;
6126         } else {
6127           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6128                                           PtrOff.getValueType());
6129           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6130           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6131                                                             CallSeqStart,
6132                                                             Flags, DAG, dl);
6133           ArgOffset += PtrByteSize;
6134         }
6135         continue;
6136       }
6137       // Copy entire object into memory.  There are cases where gcc-generated
6138       // code assumes it is there, even if it could be put entirely into
6139       // registers.  (This is not what the doc says.)
6140       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6141                                                         CallSeqStart,
6142                                                         Flags, DAG, dl);
6143
6144       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6145       // copy the pieces of the object that fit into registers from the
6146       // parameter save area.
6147       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6148         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6149         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6150         if (GPR_idx != NumGPRs) {
6151           SDValue Load =
6152               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6153           MemOpChains.push_back(Load.getValue(1));
6154           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6155           ArgOffset += PtrByteSize;
6156         } else {
6157           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6158           break;
6159         }
6160       }
6161       continue;
6162     }
6163
6164     switch (Arg.getSimpleValueType().SimpleTy) {
6165     default: llvm_unreachable("Unexpected ValueType for argument!");
6166     case MVT::i1:
6167     case MVT::i32:
6168     case MVT::i64:
6169       if (GPR_idx != NumGPRs) {
6170         if (Arg.getValueType() == MVT::i1)
6171           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6172
6173         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6174       } else {
6175         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6176                          isPPC64, isTailCall, false, MemOpChains,
6177                          TailCallArguments, dl);
6178       }
6179       ArgOffset += PtrByteSize;
6180       break;
6181     case MVT::f32:
6182     case MVT::f64:
6183       if (FPR_idx != NumFPRs) {
6184         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6185
6186         if (isVarArg) {
6187           SDValue Store =
6188               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6189           MemOpChains.push_back(Store);
6190
6191           // Float varargs are always shadowed in available integer registers
6192           if (GPR_idx != NumGPRs) {
6193             SDValue Load =
6194                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6195             MemOpChains.push_back(Load.getValue(1));
6196             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6197           }
6198           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6199             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6200             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6201             SDValue Load =
6202                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6203             MemOpChains.push_back(Load.getValue(1));
6204             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6205           }
6206         } else {
6207           // If we have any FPRs remaining, we may also have GPRs remaining.
6208           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6209           // GPRs.
6210           if (GPR_idx != NumGPRs)
6211             ++GPR_idx;
6212           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6213               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6214             ++GPR_idx;
6215         }
6216       } else
6217         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6218                          isPPC64, isTailCall, false, MemOpChains,
6219                          TailCallArguments, dl);
6220       if (isPPC64)
6221         ArgOffset += 8;
6222       else
6223         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6224       break;
6225     case MVT::v4f32:
6226     case MVT::v4i32:
6227     case MVT::v8i16:
6228     case MVT::v16i8:
6229       if (isVarArg) {
6230         // These go aligned on the stack, or in the corresponding R registers
6231         // when within range.  The Darwin PPC ABI doc claims they also go in
6232         // V registers; in fact gcc does this only for arguments that are
6233         // prototyped, not for those that match the ...  We do it for all
6234         // arguments, seems to work.
6235         while (ArgOffset % 16 !=0) {
6236           ArgOffset += PtrByteSize;
6237           if (GPR_idx != NumGPRs)
6238             GPR_idx++;
6239         }
6240         // We could elide this store in the case where the object fits
6241         // entirely in R registers.  Maybe later.
6242         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6243                              DAG.getConstant(ArgOffset, dl, PtrVT));
6244         SDValue Store =
6245             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6246         MemOpChains.push_back(Store);
6247         if (VR_idx != NumVRs) {
6248           SDValue Load =
6249               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6250           MemOpChains.push_back(Load.getValue(1));
6251           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6252         }
6253         ArgOffset += 16;
6254         for (unsigned i=0; i<16; i+=PtrByteSize) {
6255           if (GPR_idx == NumGPRs)
6256             break;
6257           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6258                                    DAG.getConstant(i, dl, PtrVT));
6259           SDValue Load =
6260               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6261           MemOpChains.push_back(Load.getValue(1));
6262           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6263         }
6264         break;
6265       }
6266
6267       // Non-varargs Altivec params generally go in registers, but have
6268       // stack space allocated at the end.
6269       if (VR_idx != NumVRs) {
6270         // Doesn't have GPR space allocated.
6271         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6272       } else if (nAltivecParamsAtEnd==0) {
6273         // We are emitting Altivec params in order.
6274         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6275                          isPPC64, isTailCall, true, MemOpChains,
6276                          TailCallArguments, dl);
6277         ArgOffset += 16;
6278       }
6279       break;
6280     }
6281   }
6282   // If all Altivec parameters fit in registers, as they usually do,
6283   // they get stack space following the non-Altivec parameters.  We
6284   // don't track this here because nobody below needs it.
6285   // If there are more Altivec parameters than fit in registers emit
6286   // the stores here.
6287   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
6288     unsigned j = 0;
6289     // Offset is aligned; skip 1st 12 params which go in V registers.
6290     ArgOffset = ((ArgOffset+15)/16)*16;
6291     ArgOffset += 12*16;
6292     for (unsigned i = 0; i != NumOps; ++i) {
6293       SDValue Arg = OutVals[i];
6294       EVT ArgType = Outs[i].VT;
6295       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6296           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6297         if (++j > NumVRs) {
6298           SDValue PtrOff;
6299           // We are emitting Altivec params in order.
6300           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6301                            isPPC64, isTailCall, true, MemOpChains,
6302                            TailCallArguments, dl);
6303           ArgOffset += 16;
6304         }
6305       }
6306     }
6307   }
6308
6309   if (!MemOpChains.empty())
6310     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6311
6312   // On Darwin, R12 must contain the address of an indirect callee.  This does
6313   // not mean the MTCTR instruction must use R12; it's easier to model this as
6314   // an extra parameter, so do that.
6315   if (!isTailCall &&
6316       !isFunctionGlobalAddress(Callee) &&
6317       !isa<ExternalSymbolSDNode>(Callee) &&
6318       !isBLACompatibleAddress(Callee, DAG))
6319     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6320                                                    PPC::R12), Callee));
6321
6322   // Build a sequence of copy-to-reg nodes chained together with token chain
6323   // and flag operands which copy the outgoing args into the appropriate regs.
6324   SDValue InFlag;
6325   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6326     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6327                              RegsToPass[i].second, InFlag);
6328     InFlag = Chain.getValue(1);
6329   }
6330
6331   if (isTailCall)
6332     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6333                     TailCallArguments);
6334
6335   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6336                     /* unused except on PPC64 ELFv1 */ false, DAG,
6337                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6338                     NumBytes, Ins, InVals, CS);
6339 }
6340
6341 bool
6342 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6343                                   MachineFunction &MF, bool isVarArg,
6344                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
6345                                   LLVMContext &Context) const {
6346   SmallVector<CCValAssign, 16> RVLocs;
6347   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6348   return CCInfo.CheckReturn(Outs, RetCC_PPC);
6349 }
6350
6351 SDValue
6352 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6353                                bool isVarArg,
6354                                const SmallVectorImpl<ISD::OutputArg> &Outs,
6355                                const SmallVectorImpl<SDValue> &OutVals,
6356                                const SDLoc &dl, SelectionDAG &DAG) const {
6357   SmallVector<CCValAssign, 16> RVLocs;
6358   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6359                  *DAG.getContext());
6360   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
6361
6362   SDValue Flag;
6363   SmallVector<SDValue, 4> RetOps(1, Chain);
6364
6365   // Copy the result values into the output registers.
6366   for (unsigned i = 0; i != RVLocs.size(); ++i) {
6367     CCValAssign &VA = RVLocs[i];
6368     assert(VA.isRegLoc() && "Can only return in registers!");
6369
6370     SDValue Arg = OutVals[i];
6371
6372     switch (VA.getLocInfo()) {
6373     default: llvm_unreachable("Unknown loc info!");
6374     case CCValAssign::Full: break;
6375     case CCValAssign::AExt:
6376       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6377       break;
6378     case CCValAssign::ZExt:
6379       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6380       break;
6381     case CCValAssign::SExt:
6382       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6383       break;
6384     }
6385
6386     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6387     Flag = Chain.getValue(1);
6388     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6389   }
6390
6391   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6392   const MCPhysReg *I =
6393     TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6394   if (I) {
6395     for (; *I; ++I) {
6396
6397       if (PPC::G8RCRegClass.contains(*I))
6398         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6399       else if (PPC::F8RCRegClass.contains(*I))
6400         RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6401       else if (PPC::CRRCRegClass.contains(*I))
6402         RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6403       else if (PPC::VRRCRegClass.contains(*I))
6404         RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6405       else
6406         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6407     }
6408   }
6409
6410   RetOps[0] = Chain;  // Update chain.
6411
6412   // Add the flag if we have it.
6413   if (Flag.getNode())
6414     RetOps.push_back(Flag);
6415
6416   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6417 }
6418
6419 SDValue
6420 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6421                                                 SelectionDAG &DAG) const {
6422   SDLoc dl(Op);
6423
6424   // Get the corect type for integers.
6425   EVT IntVT = Op.getValueType();
6426
6427   // Get the inputs.
6428   SDValue Chain = Op.getOperand(0);
6429   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6430   // Build a DYNAREAOFFSET node.
6431   SDValue Ops[2] = {Chain, FPSIdx};
6432   SDVTList VTs = DAG.getVTList(IntVT);
6433   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6434 }
6435
6436 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6437                                              SelectionDAG &DAG) const {
6438   // When we pop the dynamic allocation we need to restore the SP link.
6439   SDLoc dl(Op);
6440
6441   // Get the corect type for pointers.
6442   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6443
6444   // Construct the stack pointer operand.
6445   bool isPPC64 = Subtarget.isPPC64();
6446   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6447   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6448
6449   // Get the operands for the STACKRESTORE.
6450   SDValue Chain = Op.getOperand(0);
6451   SDValue SaveSP = Op.getOperand(1);
6452
6453   // Load the old link SP.
6454   SDValue LoadLinkSP =
6455       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6456
6457   // Restore the stack pointer.
6458   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6459
6460   // Store the old link SP.
6461   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6462 }
6463
6464 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6465   MachineFunction &MF = DAG.getMachineFunction();
6466   bool isPPC64 = Subtarget.isPPC64();
6467   EVT PtrVT = getPointerTy(MF.getDataLayout());
6468
6469   // Get current frame pointer save index.  The users of this index will be
6470   // primarily DYNALLOC instructions.
6471   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6472   int RASI = FI->getReturnAddrSaveIndex();
6473
6474   // If the frame pointer save index hasn't been defined yet.
6475   if (!RASI) {
6476     // Find out what the fix offset of the frame pointer save area.
6477     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6478     // Allocate the frame index for frame pointer save area.
6479     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6480     // Save the result.
6481     FI->setReturnAddrSaveIndex(RASI);
6482   }
6483   return DAG.getFrameIndex(RASI, PtrVT);
6484 }
6485
6486 SDValue
6487 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6488   MachineFunction &MF = DAG.getMachineFunction();
6489   bool isPPC64 = Subtarget.isPPC64();
6490   EVT PtrVT = getPointerTy(MF.getDataLayout());
6491
6492   // Get current frame pointer save index.  The users of this index will be
6493   // primarily DYNALLOC instructions.
6494   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6495   int FPSI = FI->getFramePointerSaveIndex();
6496
6497   // If the frame pointer save index hasn't been defined yet.
6498   if (!FPSI) {
6499     // Find out what the fix offset of the frame pointer save area.
6500     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6501     // Allocate the frame index for frame pointer save area.
6502     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
6503     // Save the result.
6504     FI->setFramePointerSaveIndex(FPSI);
6505   }
6506   return DAG.getFrameIndex(FPSI, PtrVT);
6507 }
6508
6509 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6510                                                    SelectionDAG &DAG) const {
6511   // Get the inputs.
6512   SDValue Chain = Op.getOperand(0);
6513   SDValue Size  = Op.getOperand(1);
6514   SDLoc dl(Op);
6515
6516   // Get the corect type for pointers.
6517   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6518   // Negate the size.
6519   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
6520                                 DAG.getConstant(0, dl, PtrVT), Size);
6521   // Construct a node for the frame pointer save index.
6522   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6523   // Build a DYNALLOC node.
6524   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
6525   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
6526   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
6527 }
6528
6529 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
6530                                                      SelectionDAG &DAG) const {
6531   MachineFunction &MF = DAG.getMachineFunction();
6532
6533   bool isPPC64 = Subtarget.isPPC64();
6534   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6535
6536   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
6537   return DAG.getFrameIndex(FI, PtrVT);
6538 }
6539
6540 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
6541                                                SelectionDAG &DAG) const {
6542   SDLoc DL(Op);
6543   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
6544                      DAG.getVTList(MVT::i32, MVT::Other),
6545                      Op.getOperand(0), Op.getOperand(1));
6546 }
6547
6548 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
6549                                                 SelectionDAG &DAG) const {
6550   SDLoc DL(Op);
6551   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
6552                      Op.getOperand(0), Op.getOperand(1));
6553 }
6554
6555 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
6556   if (Op.getValueType().isVector())
6557     return LowerVectorLoad(Op, DAG);
6558
6559   assert(Op.getValueType() == MVT::i1 &&
6560          "Custom lowering only for i1 loads");
6561
6562   // First, load 8 bits into 32 bits, then truncate to 1 bit.
6563
6564   SDLoc dl(Op);
6565   LoadSDNode *LD = cast<LoadSDNode>(Op);
6566
6567   SDValue Chain = LD->getChain();
6568   SDValue BasePtr = LD->getBasePtr();
6569   MachineMemOperand *MMO = LD->getMemOperand();
6570
6571   SDValue NewLD =
6572       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
6573                      BasePtr, MVT::i8, MMO);
6574   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
6575
6576   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
6577   return DAG.getMergeValues(Ops, dl);
6578 }
6579
6580 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
6581   if (Op.getOperand(1).getValueType().isVector())
6582     return LowerVectorStore(Op, DAG);
6583
6584   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
6585          "Custom lowering only for i1 stores");
6586
6587   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
6588
6589   SDLoc dl(Op);
6590   StoreSDNode *ST = cast<StoreSDNode>(Op);
6591
6592   SDValue Chain = ST->getChain();
6593   SDValue BasePtr = ST->getBasePtr();
6594   SDValue Value = ST->getValue();
6595   MachineMemOperand *MMO = ST->getMemOperand();
6596
6597   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
6598                       Value);
6599   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
6600 }
6601
6602 // FIXME: Remove this once the ANDI glue bug is fixed:
6603 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
6604   assert(Op.getValueType() == MVT::i1 &&
6605          "Custom lowering only for i1 results");
6606
6607   SDLoc DL(Op);
6608   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
6609                      Op.getOperand(0));
6610 }
6611
6612 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
6613 /// possible.
6614 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
6615   // Not FP? Not a fsel.
6616   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
6617       !Op.getOperand(2).getValueType().isFloatingPoint())
6618     return Op;
6619
6620   // We might be able to do better than this under some circumstances, but in
6621   // general, fsel-based lowering of select is a finite-math-only optimization.
6622   // For more information, see section F.3 of the 2.06 ISA specification.
6623   if (!DAG.getTarget().Options.NoInfsFPMath ||
6624       !DAG.getTarget().Options.NoNaNsFPMath)
6625     return Op;
6626   // TODO: Propagate flags from the select rather than global settings.
6627   SDNodeFlags Flags;
6628   Flags.setNoInfs(true);
6629   Flags.setNoNaNs(true);
6630
6631   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
6632
6633   EVT ResVT = Op.getValueType();
6634   EVT CmpVT = Op.getOperand(0).getValueType();
6635   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6636   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
6637   SDLoc dl(Op);
6638
6639   // If the RHS of the comparison is a 0.0, we don't need to do the
6640   // subtraction at all.
6641   SDValue Sel1;
6642   if (isFloatingPointZero(RHS))
6643     switch (CC) {
6644     default: break;       // SETUO etc aren't handled by fsel.
6645     case ISD::SETNE:
6646       std::swap(TV, FV);
6647     case ISD::SETEQ:
6648       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6649         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6650       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6651       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6652         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6653       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6654                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
6655     case ISD::SETULT:
6656     case ISD::SETLT:
6657       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6658     case ISD::SETOGE:
6659     case ISD::SETGE:
6660       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6661         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6662       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6663     case ISD::SETUGT:
6664     case ISD::SETGT:
6665       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6666     case ISD::SETOLE:
6667     case ISD::SETLE:
6668       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6669         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6670       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6671                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
6672     }
6673
6674   SDValue Cmp;
6675   switch (CC) {
6676   default: break;       // SETUO etc aren't handled by fsel.
6677   case ISD::SETNE:
6678     std::swap(TV, FV);
6679   case ISD::SETEQ:
6680     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6681     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6682       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6683     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6684     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6685       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6686     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6687                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
6688   case ISD::SETULT:
6689   case ISD::SETLT:
6690     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6691     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6692       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6693     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6694   case ISD::SETOGE:
6695   case ISD::SETGE:
6696     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
6697     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6698       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6699     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6700   case ISD::SETUGT:
6701   case ISD::SETGT:
6702     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
6703     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6704       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6705     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6706   case ISD::SETOLE:
6707   case ISD::SETLE:
6708     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
6709     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6710       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6711     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6712   }
6713   return Op;
6714 }
6715
6716 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
6717                                                SelectionDAG &DAG,
6718                                                const SDLoc &dl) const {
6719   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6720   SDValue Src = Op.getOperand(0);
6721   if (Src.getValueType() == MVT::f32)
6722     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6723
6724   SDValue Tmp;
6725   switch (Op.getSimpleValueType().SimpleTy) {
6726   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6727   case MVT::i32:
6728     Tmp = DAG.getNode(
6729         Op.getOpcode() == ISD::FP_TO_SINT
6730             ? PPCISD::FCTIWZ
6731             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6732         dl, MVT::f64, Src);
6733     break;
6734   case MVT::i64:
6735     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6736            "i64 FP_TO_UINT is supported only with FPCVT");
6737     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6738                                                         PPCISD::FCTIDUZ,
6739                       dl, MVT::f64, Src);
6740     break;
6741   }
6742
6743   // Convert the FP value to an int value through memory.
6744   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
6745     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
6746   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
6747   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
6748   MachinePointerInfo MPI =
6749       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6750
6751   // Emit a store to the stack slot.
6752   SDValue Chain;
6753   if (i32Stack) {
6754     MachineFunction &MF = DAG.getMachineFunction();
6755     MachineMemOperand *MMO =
6756       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
6757     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
6758     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
6759               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
6760   } else
6761     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
6762
6763   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
6764   // add in a bias on big endian.
6765   if (Op.getValueType() == MVT::i32 && !i32Stack) {
6766     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
6767                         DAG.getConstant(4, dl, FIPtr.getValueType()));
6768     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
6769   }
6770
6771   RLI.Chain = Chain;
6772   RLI.Ptr = FIPtr;
6773   RLI.MPI = MPI;
6774 }
6775
6776 /// \brief Custom lowers floating point to integer conversions to use
6777 /// the direct move instructions available in ISA 2.07 to avoid the
6778 /// need for load/store combinations.
6779 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
6780                                                     SelectionDAG &DAG,
6781                                                     const SDLoc &dl) const {
6782   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6783   SDValue Src = Op.getOperand(0);
6784
6785   if (Src.getValueType() == MVT::f32)
6786     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6787
6788   SDValue Tmp;
6789   switch (Op.getSimpleValueType().SimpleTy) {
6790   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6791   case MVT::i32:
6792     Tmp = DAG.getNode(
6793         Op.getOpcode() == ISD::FP_TO_SINT
6794             ? PPCISD::FCTIWZ
6795             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6796         dl, MVT::f64, Src);
6797     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
6798     break;
6799   case MVT::i64:
6800     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6801            "i64 FP_TO_UINT is supported only with FPCVT");
6802     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6803                                                         PPCISD::FCTIDUZ,
6804                       dl, MVT::f64, Src);
6805     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
6806     break;
6807   }
6808   return Tmp;
6809 }
6810
6811 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
6812                                           const SDLoc &dl) const {
6813   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
6814     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
6815
6816   ReuseLoadInfo RLI;
6817   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6818
6819   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
6820                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
6821 }
6822
6823 // We're trying to insert a regular store, S, and then a load, L. If the
6824 // incoming value, O, is a load, we might just be able to have our load use the
6825 // address used by O. However, we don't know if anything else will store to
6826 // that address before we can load from it. To prevent this situation, we need
6827 // to insert our load, L, into the chain as a peer of O. To do this, we give L
6828 // the same chain operand as O, we create a token factor from the chain results
6829 // of O and L, and we replace all uses of O's chain result with that token
6830 // factor (see spliceIntoChain below for this last part).
6831 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
6832                                             ReuseLoadInfo &RLI,
6833                                             SelectionDAG &DAG,
6834                                             ISD::LoadExtType ET) const {
6835   SDLoc dl(Op);
6836   if (ET == ISD::NON_EXTLOAD &&
6837       (Op.getOpcode() == ISD::FP_TO_UINT ||
6838        Op.getOpcode() == ISD::FP_TO_SINT) &&
6839       isOperationLegalOrCustom(Op.getOpcode(),
6840                                Op.getOperand(0).getValueType())) {
6841
6842     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6843     return true;
6844   }
6845
6846   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
6847   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
6848       LD->isNonTemporal())
6849     return false;
6850   if (LD->getMemoryVT() != MemVT)
6851     return false;
6852
6853   RLI.Ptr = LD->getBasePtr();
6854   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
6855     assert(LD->getAddressingMode() == ISD::PRE_INC &&
6856            "Non-pre-inc AM on PPC?");
6857     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
6858                           LD->getOffset());
6859   }
6860
6861   RLI.Chain = LD->getChain();
6862   RLI.MPI = LD->getPointerInfo();
6863   RLI.IsDereferenceable = LD->isDereferenceable();
6864   RLI.IsInvariant = LD->isInvariant();
6865   RLI.Alignment = LD->getAlignment();
6866   RLI.AAInfo = LD->getAAInfo();
6867   RLI.Ranges = LD->getRanges();
6868
6869   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
6870   return true;
6871 }
6872
6873 // Given the head of the old chain, ResChain, insert a token factor containing
6874 // it and NewResChain, and make users of ResChain now be users of that token
6875 // factor.
6876 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
6877 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6878                                         SDValue NewResChain,
6879                                         SelectionDAG &DAG) const {
6880   if (!ResChain)
6881     return;
6882
6883   SDLoc dl(NewResChain);
6884
6885   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
6886                            NewResChain, DAG.getUNDEF(MVT::Other));
6887   assert(TF.getNode() != NewResChain.getNode() &&
6888          "A new TF really is required here");
6889
6890   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6891   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6892 }
6893
6894 /// \brief Analyze profitability of direct move
6895 /// prefer float load to int load plus direct move
6896 /// when there is no integer use of int load
6897 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
6898   SDNode *Origin = Op.getOperand(0).getNode();
6899   if (Origin->getOpcode() != ISD::LOAD)
6900     return true;
6901
6902   // If there is no LXSIBZX/LXSIHZX, like Power8,
6903   // prefer direct move if the memory size is 1 or 2 bytes.
6904   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
6905   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
6906     return true;
6907
6908   for (SDNode::use_iterator UI = Origin->use_begin(),
6909                             UE = Origin->use_end();
6910        UI != UE; ++UI) {
6911
6912     // Only look at the users of the loaded value.
6913     if (UI.getUse().get().getResNo() != 0)
6914       continue;
6915
6916     if (UI->getOpcode() != ISD::SINT_TO_FP &&
6917         UI->getOpcode() != ISD::UINT_TO_FP)
6918       return true;
6919   }
6920
6921   return false;
6922 }
6923
6924 /// \brief Custom lowers integer to floating point conversions to use
6925 /// the direct move instructions available in ISA 2.07 to avoid the
6926 /// need for load/store combinations.
6927 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
6928                                                     SelectionDAG &DAG,
6929                                                     const SDLoc &dl) const {
6930   assert((Op.getValueType() == MVT::f32 ||
6931           Op.getValueType() == MVT::f64) &&
6932          "Invalid floating point type as target of conversion");
6933   assert(Subtarget.hasFPCVT() &&
6934          "Int to FP conversions with direct moves require FPCVT");
6935   SDValue FP;
6936   SDValue Src = Op.getOperand(0);
6937   bool SinglePrec = Op.getValueType() == MVT::f32;
6938   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
6939   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
6940   unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
6941                              (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
6942
6943   if (WordInt) {
6944     FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
6945                      dl, MVT::f64, Src);
6946     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6947   }
6948   else {
6949     FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
6950     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6951   }
6952
6953   return FP;
6954 }
6955
6956 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6957                                           SelectionDAG &DAG) const {
6958   SDLoc dl(Op);
6959
6960   if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
6961     if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
6962       return SDValue();
6963
6964     SDValue Value = Op.getOperand(0);
6965     // The values are now known to be -1 (false) or 1 (true). To convert this
6966     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6967     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6968     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6969
6970     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
6971
6972     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
6973
6974     if (Op.getValueType() != MVT::v4f64)
6975       Value = DAG.getNode(ISD::FP_ROUND, dl,
6976                           Op.getValueType(), Value,
6977                           DAG.getIntPtrConstant(1, dl));
6978     return Value;
6979   }
6980
6981   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
6982   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
6983     return SDValue();
6984
6985   if (Op.getOperand(0).getValueType() == MVT::i1)
6986     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
6987                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
6988                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
6989
6990   // If we have direct moves, we can do all the conversion, skip the store/load
6991   // however, without FPCVT we can't do most conversions.
6992   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
6993       Subtarget.isPPC64() && Subtarget.hasFPCVT())
6994     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
6995
6996   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
6997          "UINT_TO_FP is supported only with FPCVT");
6998
6999   // If we have FCFIDS, then use it when converting to single-precision.
7000   // Otherwise, convert to double-precision and then round.
7001   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7002                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
7003                                                             : PPCISD::FCFIDS)
7004                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
7005                                                             : PPCISD::FCFID);
7006   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7007                   ? MVT::f32
7008                   : MVT::f64;
7009
7010   if (Op.getOperand(0).getValueType() == MVT::i64) {
7011     SDValue SINT = Op.getOperand(0);
7012     // When converting to single-precision, we actually need to convert
7013     // to double-precision first and then round to single-precision.
7014     // To avoid double-rounding effects during that operation, we have
7015     // to prepare the input operand.  Bits that might be truncated when
7016     // converting to double-precision are replaced by a bit that won't
7017     // be lost at this stage, but is below the single-precision rounding
7018     // position.
7019     //
7020     // However, if -enable-unsafe-fp-math is in effect, accept double
7021     // rounding to avoid the extra overhead.
7022     if (Op.getValueType() == MVT::f32 &&
7023         !Subtarget.hasFPCVT() &&
7024         !DAG.getTarget().Options.UnsafeFPMath) {
7025
7026       // Twiddle input to make sure the low 11 bits are zero.  (If this
7027       // is the case, we are guaranteed the value will fit into the 53 bit
7028       // mantissa of an IEEE double-precision value without rounding.)
7029       // If any of those low 11 bits were not zero originally, make sure
7030       // bit 12 (value 2048) is set instead, so that the final rounding
7031       // to single-precision gets the correct result.
7032       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7033                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
7034       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
7035                           Round, DAG.getConstant(2047, dl, MVT::i64));
7036       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
7037       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7038                           Round, DAG.getConstant(-2048, dl, MVT::i64));
7039
7040       // However, we cannot use that value unconditionally: if the magnitude
7041       // of the input value is small, the bit-twiddling we did above might
7042       // end up visibly changing the output.  Fortunately, in that case, we
7043       // don't need to twiddle bits since the original input will convert
7044       // exactly to double-precision floating-point already.  Therefore,
7045       // construct a conditional to use the original value if the top 11
7046       // bits are all sign-bit copies, and use the rounded value computed
7047       // above otherwise.
7048       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
7049                                  SINT, DAG.getConstant(53, dl, MVT::i32));
7050       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
7051                          Cond, DAG.getConstant(1, dl, MVT::i64));
7052       Cond = DAG.getSetCC(dl, MVT::i32,
7053                           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
7054
7055       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
7056     }
7057
7058     ReuseLoadInfo RLI;
7059     SDValue Bits;
7060
7061     MachineFunction &MF = DAG.getMachineFunction();
7062     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
7063       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7064                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7065       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7066     } else if (Subtarget.hasLFIWAX() &&
7067                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
7068       MachineMemOperand *MMO =
7069         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7070                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7071       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7072       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
7073                                      DAG.getVTList(MVT::f64, MVT::Other),
7074                                      Ops, MVT::i32, MMO);
7075       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7076     } else if (Subtarget.hasFPCVT() &&
7077                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
7078       MachineMemOperand *MMO =
7079         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7080                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7081       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7082       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
7083                                      DAG.getVTList(MVT::f64, MVT::Other),
7084                                      Ops, MVT::i32, MMO);
7085       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7086     } else if (((Subtarget.hasLFIWAX() &&
7087                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
7088                 (Subtarget.hasFPCVT() &&
7089                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
7090                SINT.getOperand(0).getValueType() == MVT::i32) {
7091       MachineFrameInfo &MFI = MF.getFrameInfo();
7092       EVT PtrVT = getPointerTy(DAG.getDataLayout());
7093
7094       int FrameIdx = MFI.CreateStackObject(4, 4, false);
7095       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7096
7097       SDValue Store =
7098           DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
7099                        MachinePointerInfo::getFixedStack(
7100                            DAG.getMachineFunction(), FrameIdx));
7101
7102       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7103              "Expected an i32 store");
7104
7105       RLI.Ptr = FIdx;
7106       RLI.Chain = Store;
7107       RLI.MPI =
7108           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7109       RLI.Alignment = 4;
7110
7111       MachineMemOperand *MMO =
7112         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7113                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7114       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7115       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
7116                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
7117                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
7118                                      Ops, MVT::i32, MMO);
7119     } else
7120       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
7121
7122     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
7123
7124     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7125       FP = DAG.getNode(ISD::FP_ROUND, dl,
7126                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
7127     return FP;
7128   }
7129
7130   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
7131          "Unhandled INT_TO_FP type in custom expander!");
7132   // Since we only generate this in 64-bit mode, we can take advantage of
7133   // 64-bit registers.  In particular, sign extend the input value into the
7134   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7135   // then lfd it and fcfid it.
7136   MachineFunction &MF = DAG.getMachineFunction();
7137   MachineFrameInfo &MFI = MF.getFrameInfo();
7138   EVT PtrVT = getPointerTy(MF.getDataLayout());
7139
7140   SDValue Ld;
7141   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
7142     ReuseLoadInfo RLI;
7143     bool ReusingLoad;
7144     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
7145                                             DAG))) {
7146       int FrameIdx = MFI.CreateStackObject(4, 4, false);
7147       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7148
7149       SDValue Store =
7150           DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7151                        MachinePointerInfo::getFixedStack(
7152                            DAG.getMachineFunction(), FrameIdx));
7153
7154       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7155              "Expected an i32 store");
7156
7157       RLI.Ptr = FIdx;
7158       RLI.Chain = Store;
7159       RLI.MPI =
7160           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7161       RLI.Alignment = 4;
7162     }
7163
7164     MachineMemOperand *MMO =
7165       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
7166                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7167     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7168     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
7169                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
7170                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
7171                                  Ops, MVT::i32, MMO);
7172     if (ReusingLoad)
7173       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
7174   } else {
7175     assert(Subtarget.isPPC64() &&
7176            "i32->FP without LFIWAX supported only on PPC64");
7177
7178     int FrameIdx = MFI.CreateStackObject(8, 8, false);
7179     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7180
7181     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
7182                                 Op.getOperand(0));
7183
7184     // STD the extended value into the stack slot.
7185     SDValue Store = DAG.getStore(
7186         DAG.getEntryNode(), dl, Ext64, FIdx,
7187         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7188
7189     // Load the value as a double.
7190     Ld = DAG.getLoad(
7191         MVT::f64, dl, Store, FIdx,
7192         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
7193   }
7194
7195   // FCFID it and return it.
7196   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
7197   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7198     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
7199                      DAG.getIntPtrConstant(0, dl));
7200   return FP;
7201 }
7202
7203 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
7204                                             SelectionDAG &DAG) const {
7205   SDLoc dl(Op);
7206   /*
7207    The rounding mode is in bits 30:31 of FPSR, and has the following
7208    settings:
7209      00 Round to nearest
7210      01 Round to 0
7211      10 Round to +inf
7212      11 Round to -inf
7213
7214   FLT_ROUNDS, on the other hand, expects the following:
7215     -1 Undefined
7216      0 Round to 0
7217      1 Round to nearest
7218      2 Round to +inf
7219      3 Round to -inf
7220
7221   To perform the conversion, we do:
7222     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7223   */
7224
7225   MachineFunction &MF = DAG.getMachineFunction();
7226   EVT VT = Op.getValueType();
7227   EVT PtrVT = getPointerTy(MF.getDataLayout());
7228
7229   // Save FP Control Word to register
7230   EVT NodeTys[] = {
7231     MVT::f64,    // return register
7232     MVT::Glue    // unused in this context
7233   };
7234   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
7235
7236   // Save FP register to stack slot
7237   int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
7238   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
7239   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
7240                                MachinePointerInfo());
7241
7242   // Load FP Control Word from low 32 bits of stack slot.
7243   SDValue Four = DAG.getConstant(4, dl, PtrVT);
7244   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
7245   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
7246
7247   // Transform as necessary
7248   SDValue CWD1 =
7249     DAG.getNode(ISD::AND, dl, MVT::i32,
7250                 CWD, DAG.getConstant(3, dl, MVT::i32));
7251   SDValue CWD2 =
7252     DAG.getNode(ISD::SRL, dl, MVT::i32,
7253                 DAG.getNode(ISD::AND, dl, MVT::i32,
7254                             DAG.getNode(ISD::XOR, dl, MVT::i32,
7255                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
7256                             DAG.getConstant(3, dl, MVT::i32)),
7257                 DAG.getConstant(1, dl, MVT::i32));
7258
7259   SDValue RetVal =
7260     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
7261
7262   return DAG.getNode((VT.getSizeInBits() < 16 ?
7263                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
7264 }
7265
7266 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7267   EVT VT = Op.getValueType();
7268   unsigned BitWidth = VT.getSizeInBits();
7269   SDLoc dl(Op);
7270   assert(Op.getNumOperands() == 3 &&
7271          VT == Op.getOperand(1).getValueType() &&
7272          "Unexpected SHL!");
7273
7274   // Expand into a bunch of logical ops.  Note that these ops
7275   // depend on the PPC behavior for oversized shift amounts.
7276   SDValue Lo = Op.getOperand(0);
7277   SDValue Hi = Op.getOperand(1);
7278   SDValue Amt = Op.getOperand(2);
7279   EVT AmtVT = Amt.getValueType();
7280
7281   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7282                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7283   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
7284   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
7285   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
7286   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7287                              DAG.getConstant(-BitWidth, dl, AmtVT));
7288   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
7289   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7290   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
7291   SDValue OutOps[] = { OutLo, OutHi };
7292   return DAG.getMergeValues(OutOps, dl);
7293 }
7294
7295 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7296   EVT VT = Op.getValueType();
7297   SDLoc dl(Op);
7298   unsigned BitWidth = VT.getSizeInBits();
7299   assert(Op.getNumOperands() == 3 &&
7300          VT == Op.getOperand(1).getValueType() &&
7301          "Unexpected SRL!");
7302
7303   // Expand into a bunch of logical ops.  Note that these ops
7304   // depend on the PPC behavior for oversized shift amounts.
7305   SDValue Lo = Op.getOperand(0);
7306   SDValue Hi = Op.getOperand(1);
7307   SDValue Amt = Op.getOperand(2);
7308   EVT AmtVT = Amt.getValueType();
7309
7310   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7311                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7312   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7313   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7314   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7315   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7316                              DAG.getConstant(-BitWidth, dl, AmtVT));
7317   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
7318   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7319   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
7320   SDValue OutOps[] = { OutLo, OutHi };
7321   return DAG.getMergeValues(OutOps, dl);
7322 }
7323
7324 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
7325   SDLoc dl(Op);
7326   EVT VT = Op.getValueType();
7327   unsigned BitWidth = VT.getSizeInBits();
7328   assert(Op.getNumOperands() == 3 &&
7329          VT == Op.getOperand(1).getValueType() &&
7330          "Unexpected SRA!");
7331
7332   // Expand into a bunch of logical ops, followed by a select_cc.
7333   SDValue Lo = Op.getOperand(0);
7334   SDValue Hi = Op.getOperand(1);
7335   SDValue Amt = Op.getOperand(2);
7336   EVT AmtVT = Amt.getValueType();
7337
7338   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7339                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7340   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7341   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7342   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7343   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7344                              DAG.getConstant(-BitWidth, dl, AmtVT));
7345   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
7346   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
7347   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
7348                                   Tmp4, Tmp6, ISD::SETLE);
7349   SDValue OutOps[] = { OutLo, OutHi };
7350   return DAG.getMergeValues(OutOps, dl);
7351 }
7352
7353 //===----------------------------------------------------------------------===//
7354 // Vector related lowering.
7355 //
7356
7357 /// BuildSplatI - Build a canonical splati of Val with an element size of
7358 /// SplatSize.  Cast the result to VT.
7359 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
7360                            SelectionDAG &DAG, const SDLoc &dl) {
7361   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
7362
7363   static const MVT VTys[] = { // canonical VT to use for each size.
7364     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
7365   };
7366
7367   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
7368
7369   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
7370   if (Val == -1)
7371     SplatSize = 1;
7372
7373   EVT CanonicalVT = VTys[SplatSize-1];
7374
7375   // Build a canonical splat for this value.
7376   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
7377 }
7378
7379 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
7380 /// specified intrinsic ID.
7381 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
7382                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
7383   if (DestVT == MVT::Other) DestVT = Op.getValueType();
7384   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7385                      DAG.getConstant(IID, dl, MVT::i32), Op);
7386 }
7387
7388 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
7389 /// specified intrinsic ID.
7390 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
7391                                 SelectionDAG &DAG, const SDLoc &dl,
7392                                 EVT DestVT = MVT::Other) {
7393   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
7394   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7395                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
7396 }
7397
7398 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
7399 /// specified intrinsic ID.
7400 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
7401                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
7402                                 EVT DestVT = MVT::Other) {
7403   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
7404   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7405                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
7406 }
7407
7408 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
7409 /// amount.  The result has the specified value type.
7410 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
7411                            SelectionDAG &DAG, const SDLoc &dl) {
7412   // Force LHS/RHS to be the right type.
7413   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
7414   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
7415
7416   int Ops[16];
7417   for (unsigned i = 0; i != 16; ++i)
7418     Ops[i] = i + Amt;
7419   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
7420   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7421 }
7422
7423 /// Do we have an efficient pattern in a .td file for this node?
7424 ///
7425 /// \param V - pointer to the BuildVectorSDNode being matched
7426 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
7427 ///
7428 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
7429 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
7430 /// the opposite is true (expansion is beneficial) are:
7431 /// - The node builds a vector out of integers that are not 32 or 64-bits
7432 /// - The node builds a vector out of constants
7433 /// - The node is a "load-and-splat"
7434 /// In all other cases, we will choose to keep the BUILD_VECTOR.
7435 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
7436                                             bool HasDirectMove) {
7437   EVT VecVT = V->getValueType(0);
7438   bool RightType = VecVT == MVT::v2f64 || VecVT == MVT::v4f32 ||
7439     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
7440   if (!RightType)
7441     return false;
7442
7443   bool IsSplat = true;
7444   bool IsLoad = false;
7445   SDValue Op0 = V->getOperand(0);
7446
7447   // This function is called in a block that confirms the node is not a constant
7448   // splat. So a constant BUILD_VECTOR here means the vector is built out of
7449   // different constants.
7450   if (V->isConstant())
7451     return false;
7452   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
7453     if (V->getOperand(i).isUndef())
7454       return false;
7455     // We want to expand nodes that represent load-and-splat even if the
7456     // loaded value is a floating point truncation or conversion to int.
7457     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
7458         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
7459          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7460         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
7461          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
7462         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
7463          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
7464       IsLoad = true;
7465     // If the operands are different or the input is not a load and has more
7466     // uses than just this BV node, then it isn't a splat.
7467     if (V->getOperand(i) != Op0 ||
7468         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
7469       IsSplat = false;
7470   }
7471   return !(IsSplat && IsLoad);
7472 }
7473
7474 // If this is a case we can't handle, return null and let the default
7475 // expansion code take care of it.  If we CAN select this case, and if it
7476 // selects to a single instruction, return Op.  Otherwise, if we can codegen
7477 // this case more efficiently than a constant pool load, lower it to the
7478 // sequence of ops that should be used.
7479 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
7480                                              SelectionDAG &DAG) const {
7481   SDLoc dl(Op);
7482   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
7483   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
7484
7485   if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
7486     // We first build an i32 vector, load it into a QPX register,
7487     // then convert it to a floating-point vector and compare it
7488     // to a zero vector to get the boolean result.
7489     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7490     int FrameIdx = MFI.CreateStackObject(16, 16, false);
7491     MachinePointerInfo PtrInfo =
7492         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7493     EVT PtrVT = getPointerTy(DAG.getDataLayout());
7494     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7495
7496     assert(BVN->getNumOperands() == 4 &&
7497       "BUILD_VECTOR for v4i1 does not have 4 operands");
7498
7499     bool IsConst = true;
7500     for (unsigned i = 0; i < 4; ++i) {
7501       if (BVN->getOperand(i).isUndef()) continue;
7502       if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
7503         IsConst = false;
7504         break;
7505       }
7506     }
7507
7508     if (IsConst) {
7509       Constant *One =
7510         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
7511       Constant *NegOne =
7512         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
7513
7514       Constant *CV[4];
7515       for (unsigned i = 0; i < 4; ++i) {
7516         if (BVN->getOperand(i).isUndef())
7517           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
7518         else if (isNullConstant(BVN->getOperand(i)))
7519           CV[i] = NegOne;
7520         else
7521           CV[i] = One;
7522       }
7523
7524       Constant *CP = ConstantVector::get(CV);
7525       SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
7526                                           16 /* alignment */);
7527
7528       SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
7529       SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
7530       return DAG.getMemIntrinsicNode(
7531           PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
7532           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
7533     }
7534
7535     SmallVector<SDValue, 4> Stores;
7536     for (unsigned i = 0; i < 4; ++i) {
7537       if (BVN->getOperand(i).isUndef()) continue;
7538
7539       unsigned Offset = 4*i;
7540       SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7541       Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7542
7543       unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
7544       if (StoreSize > 4) {
7545         Stores.push_back(
7546             DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
7547                               PtrInfo.getWithOffset(Offset), MVT::i32));
7548       } else {
7549         SDValue StoreValue = BVN->getOperand(i);
7550         if (StoreSize < 4)
7551           StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
7552
7553         Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
7554                                       PtrInfo.getWithOffset(Offset)));
7555       }
7556     }
7557
7558     SDValue StoreChain;
7559     if (!Stores.empty())
7560       StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7561     else
7562       StoreChain = DAG.getEntryNode();
7563
7564     // Now load from v4i32 into the QPX register; this will extend it to
7565     // v4i64 but not yet convert it to a floating point. Nevertheless, this
7566     // is typed as v4f64 because the QPX register integer states are not
7567     // explicitly represented.
7568
7569     SDValue Ops[] = {StoreChain,
7570                      DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
7571                      FIdx};
7572     SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
7573
7574     SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
7575       dl, VTs, Ops, MVT::v4i32, PtrInfo);
7576     LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7577       DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
7578       LoadedVect);
7579
7580     SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
7581
7582     return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
7583   }
7584
7585   // All other QPX vectors are handled by generic code.
7586   if (Subtarget.hasQPX())
7587     return SDValue();
7588
7589   // Check if this is a splat of a constant value.
7590   APInt APSplatBits, APSplatUndef;
7591   unsigned SplatBitSize;
7592   bool HasAnyUndefs;
7593   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
7594                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
7595       SplatBitSize > 32) {
7596     // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
7597     // lowered to VSX instructions under certain conditions.
7598     // Without VSX, there is no pattern more efficient than expanding the node.
7599     if (Subtarget.hasVSX() &&
7600         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove()))
7601       return Op;
7602     return SDValue();
7603   }
7604
7605   unsigned SplatBits = APSplatBits.getZExtValue();
7606   unsigned SplatUndef = APSplatUndef.getZExtValue();
7607   unsigned SplatSize = SplatBitSize / 8;
7608
7609   // First, handle single instruction cases.
7610
7611   // All zeros?
7612   if (SplatBits == 0) {
7613     // Canonicalize all zero vectors to be v4i32.
7614     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
7615       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
7616       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
7617     }
7618     return Op;
7619   }
7620
7621   // We have XXSPLTIB for constant splats one byte wide
7622   if (Subtarget.hasP9Vector() && SplatSize == 1) {
7623     // This is a splat of 1-byte elements with some elements potentially undef.
7624     // Rather than trying to match undef in the SDAG patterns, ensure that all
7625     // elements are the same constant.
7626     if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
7627       SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
7628                                                        dl, MVT::i32));
7629       SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
7630       if (Op.getValueType() != MVT::v16i8)
7631         return DAG.getBitcast(Op.getValueType(), NewBV);
7632       return NewBV;
7633     }
7634     return Op;
7635   }
7636
7637   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
7638   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
7639                     (32-SplatBitSize));
7640   if (SextVal >= -16 && SextVal <= 15)
7641     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
7642
7643   // Two instruction sequences.
7644
7645   // If this value is in the range [-32,30] and is even, use:
7646   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
7647   // If this value is in the range [17,31] and is odd, use:
7648   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
7649   // If this value is in the range [-31,-17] and is odd, use:
7650   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
7651   // Note the last two are three-instruction sequences.
7652   if (SextVal >= -32 && SextVal <= 31) {
7653     // To avoid having these optimizations undone by constant folding,
7654     // we convert to a pseudo that will be expanded later into one of
7655     // the above forms.
7656     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
7657     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
7658               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
7659     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
7660     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
7661     if (VT == Op.getValueType())
7662       return RetVal;
7663     else
7664       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
7665   }
7666
7667   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
7668   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
7669   // for fneg/fabs.
7670   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
7671     // Make -1 and vspltisw -1:
7672     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
7673
7674     // Make the VSLW intrinsic, computing 0x8000_0000.
7675     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
7676                                    OnesV, DAG, dl);
7677
7678     // xor by OnesV to invert it.
7679     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
7680     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7681   }
7682
7683   // Check to see if this is a wide variety of vsplti*, binop self cases.
7684   static const signed char SplatCsts[] = {
7685     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
7686     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
7687   };
7688
7689   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
7690     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
7691     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
7692     int i = SplatCsts[idx];
7693
7694     // Figure out what shift amount will be used by altivec if shifted by i in
7695     // this splat size.
7696     unsigned TypeShiftAmt = i & (SplatBitSize-1);
7697
7698     // vsplti + shl self.
7699     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
7700       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7701       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7702         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
7703         Intrinsic::ppc_altivec_vslw
7704       };
7705       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7706       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7707     }
7708
7709     // vsplti + srl self.
7710     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7711       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7712       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7713         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
7714         Intrinsic::ppc_altivec_vsrw
7715       };
7716       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7717       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7718     }
7719
7720     // vsplti + sra self.
7721     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7722       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7723       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7724         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
7725         Intrinsic::ppc_altivec_vsraw
7726       };
7727       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7728       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7729     }
7730
7731     // vsplti + rol self.
7732     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
7733                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
7734       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7735       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7736         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
7737         Intrinsic::ppc_altivec_vrlw
7738       };
7739       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7740       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7741     }
7742
7743     // t = vsplti c, result = vsldoi t, t, 1
7744     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
7745       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7746       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
7747       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7748     }
7749     // t = vsplti c, result = vsldoi t, t, 2
7750     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
7751       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7752       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
7753       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7754     }
7755     // t = vsplti c, result = vsldoi t, t, 3
7756     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
7757       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7758       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
7759       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7760     }
7761   }
7762
7763   return SDValue();
7764 }
7765
7766 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7767 /// the specified operations to build the shuffle.
7768 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7769                                       SDValue RHS, SelectionDAG &DAG,
7770                                       const SDLoc &dl) {
7771   unsigned OpNum = (PFEntry >> 26) & 0x0F;
7772   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7773   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
7774
7775   enum {
7776     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7777     OP_VMRGHW,
7778     OP_VMRGLW,
7779     OP_VSPLTISW0,
7780     OP_VSPLTISW1,
7781     OP_VSPLTISW2,
7782     OP_VSPLTISW3,
7783     OP_VSLDOI4,
7784     OP_VSLDOI8,
7785     OP_VSLDOI12
7786   };
7787
7788   if (OpNum == OP_COPY) {
7789     if (LHSID == (1*9+2)*9+3) return LHS;
7790     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7791     return RHS;
7792   }
7793
7794   SDValue OpLHS, OpRHS;
7795   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7796   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7797
7798   int ShufIdxs[16];
7799   switch (OpNum) {
7800   default: llvm_unreachable("Unknown i32 permute!");
7801   case OP_VMRGHW:
7802     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
7803     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
7804     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
7805     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
7806     break;
7807   case OP_VMRGLW:
7808     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
7809     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
7810     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
7811     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
7812     break;
7813   case OP_VSPLTISW0:
7814     for (unsigned i = 0; i != 16; ++i)
7815       ShufIdxs[i] = (i&3)+0;
7816     break;
7817   case OP_VSPLTISW1:
7818     for (unsigned i = 0; i != 16; ++i)
7819       ShufIdxs[i] = (i&3)+4;
7820     break;
7821   case OP_VSPLTISW2:
7822     for (unsigned i = 0; i != 16; ++i)
7823       ShufIdxs[i] = (i&3)+8;
7824     break;
7825   case OP_VSPLTISW3:
7826     for (unsigned i = 0; i != 16; ++i)
7827       ShufIdxs[i] = (i&3)+12;
7828     break;
7829   case OP_VSLDOI4:
7830     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
7831   case OP_VSLDOI8:
7832     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
7833   case OP_VSLDOI12:
7834     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
7835   }
7836   EVT VT = OpLHS.getValueType();
7837   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
7838   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
7839   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
7840   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7841 }
7842
7843 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
7844 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
7845 /// return the code it can be lowered into.  Worst case, it can always be
7846 /// lowered into a vperm.
7847 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
7848                                                SelectionDAG &DAG) const {
7849   SDLoc dl(Op);
7850   SDValue V1 = Op.getOperand(0);
7851   SDValue V2 = Op.getOperand(1);
7852   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7853   EVT VT = Op.getValueType();
7854   bool isLittleEndian = Subtarget.isLittleEndian();
7855
7856   unsigned ShiftElts, InsertAtByte;
7857   bool Swap;
7858   if (Subtarget.hasP9Vector() &&
7859       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
7860                            isLittleEndian)) {
7861     if (Swap)
7862       std::swap(V1, V2);
7863     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7864     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
7865     if (ShiftElts) {
7866       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
7867                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
7868       SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl,
7869                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
7870       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7871     }
7872     SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2,
7873                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
7874     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7875   }
7876
7877
7878   if (Subtarget.hasVSX() &&
7879       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
7880     if (Swap)
7881       std::swap(V1, V2);
7882     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7883     SDValue Conv2 =
7884         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
7885
7886     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
7887                               DAG.getConstant(ShiftElts, dl, MVT::i32));
7888     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
7889   }
7890
7891   if (Subtarget.hasVSX() &&
7892     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
7893     if (Swap)
7894       std::swap(V1, V2);
7895     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
7896     SDValue Conv2 =
7897         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
7898
7899     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
7900                               DAG.getConstant(ShiftElts, dl, MVT::i32));
7901     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
7902   }
7903
7904   if (Subtarget.hasP9Vector()) {
7905      if (PPC::isXXBRHShuffleMask(SVOp)) {
7906       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
7907       SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
7908       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
7909     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
7910       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7911       SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
7912       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
7913     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
7914       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
7915       SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
7916       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
7917     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
7918       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
7919       SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
7920       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
7921     }
7922   }
7923
7924   if (Subtarget.hasVSX()) {
7925     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
7926       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
7927
7928       // If the source for the shuffle is a scalar_to_vector that came from a
7929       // 32-bit load, it will have used LXVWSX so we don't need to splat again.
7930       if (Subtarget.hasP9Vector() &&
7931           ((isLittleEndian && SplatIdx == 3) ||
7932            (!isLittleEndian && SplatIdx == 0))) {
7933         SDValue Src = V1.getOperand(0);
7934         if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
7935             Src.getOperand(0).getOpcode() == ISD::LOAD &&
7936             Src.getOperand(0).hasOneUse())
7937           return V1;
7938       }
7939       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7940       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
7941                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
7942       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
7943     }
7944
7945     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
7946     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
7947       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
7948       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
7949       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
7950     }
7951   }
7952
7953   if (Subtarget.hasQPX()) {
7954     if (VT.getVectorNumElements() != 4)
7955       return SDValue();
7956
7957     if (V2.isUndef()) V2 = V1;
7958
7959     int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
7960     if (AlignIdx != -1) {
7961       return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
7962                          DAG.getConstant(AlignIdx, dl, MVT::i32));
7963     } else if (SVOp->isSplat()) {
7964       int SplatIdx = SVOp->getSplatIndex();
7965       if (SplatIdx >= 4) {
7966         std::swap(V1, V2);
7967         SplatIdx -= 4;
7968       }
7969
7970       return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
7971                          DAG.getConstant(SplatIdx, dl, MVT::i32));
7972     }
7973
7974     // Lower this into a qvgpci/qvfperm pair.
7975
7976     // Compute the qvgpci literal
7977     unsigned idx = 0;
7978     for (unsigned i = 0; i < 4; ++i) {
7979       int m = SVOp->getMaskElt(i);
7980       unsigned mm = m >= 0 ? (unsigned) m : i;
7981       idx |= mm << (3-i)*3;
7982     }
7983
7984     SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
7985                              DAG.getConstant(idx, dl, MVT::i32));
7986     return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
7987   }
7988
7989   // Cases that are handled by instructions that take permute immediates
7990   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
7991   // selected by the instruction selector.
7992   if (V2.isUndef()) {
7993     if (PPC::isSplatShuffleMask(SVOp, 1) ||
7994         PPC::isSplatShuffleMask(SVOp, 2) ||
7995         PPC::isSplatShuffleMask(SVOp, 4) ||
7996         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
7997         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
7998         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
7999         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
8000         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
8001         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
8002         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
8003         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
8004         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
8005         (Subtarget.hasP8Altivec() && (
8006          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
8007          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
8008          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
8009       return Op;
8010     }
8011   }
8012
8013   // Altivec has a variety of "shuffle immediates" that take two vector inputs
8014   // and produce a fixed permutation.  If any of these match, do not lower to
8015   // VPERM.
8016   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
8017   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8018       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8019       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
8020       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8021       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8022       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8023       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8024       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8025       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8026       (Subtarget.hasP8Altivec() && (
8027        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8028        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
8029        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
8030     return Op;
8031
8032   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
8033   // perfect shuffle table to emit an optimal matching sequence.
8034   ArrayRef<int> PermMask = SVOp->getMask();
8035
8036   unsigned PFIndexes[4];
8037   bool isFourElementShuffle = true;
8038   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
8039     unsigned EltNo = 8;   // Start out undef.
8040     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
8041       if (PermMask[i*4+j] < 0)
8042         continue;   // Undef, ignore it.
8043
8044       unsigned ByteSource = PermMask[i*4+j];
8045       if ((ByteSource & 3) != j) {
8046         isFourElementShuffle = false;
8047         break;
8048       }
8049
8050       if (EltNo == 8) {
8051         EltNo = ByteSource/4;
8052       } else if (EltNo != ByteSource/4) {
8053         isFourElementShuffle = false;
8054         break;
8055       }
8056     }
8057     PFIndexes[i] = EltNo;
8058   }
8059
8060   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8061   // perfect shuffle vector to determine if it is cost effective to do this as
8062   // discrete instructions, or whether we should use a vperm.
8063   // For now, we skip this for little endian until such time as we have a
8064   // little-endian perfect shuffle table.
8065   if (isFourElementShuffle && !isLittleEndian) {
8066     // Compute the index in the perfect shuffle table.
8067     unsigned PFTableIndex =
8068       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8069
8070     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8071     unsigned Cost  = (PFEntry >> 30);
8072
8073     // Determining when to avoid vperm is tricky.  Many things affect the cost
8074     // of vperm, particularly how many times the perm mask needs to be computed.
8075     // For example, if the perm mask can be hoisted out of a loop or is already
8076     // used (perhaps because there are multiple permutes with the same shuffle
8077     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
8078     // the loop requires an extra register.
8079     //
8080     // As a compromise, we only emit discrete instructions if the shuffle can be
8081     // generated in 3 or fewer operations.  When we have loop information
8082     // available, if this block is within a loop, we should avoid using vperm
8083     // for 3-operation perms and use a constant pool load instead.
8084     if (Cost < 3)
8085       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8086   }
8087
8088   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
8089   // vector that will get spilled to the constant pool.
8090   if (V2.isUndef()) V2 = V1;
8091
8092   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
8093   // that it is in input element units, not in bytes.  Convert now.
8094
8095   // For little endian, the order of the input vectors is reversed, and
8096   // the permutation mask is complemented with respect to 31.  This is
8097   // necessary to produce proper semantics with the big-endian-biased vperm
8098   // instruction.
8099   EVT EltVT = V1.getValueType().getVectorElementType();
8100   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
8101
8102   SmallVector<SDValue, 16> ResultMask;
8103   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
8104     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
8105
8106     for (unsigned j = 0; j != BytesPerElement; ++j)
8107       if (isLittleEndian)
8108         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
8109                                              dl, MVT::i32));
8110       else
8111         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
8112                                              MVT::i32));
8113   }
8114
8115   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
8116   if (isLittleEndian)
8117     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
8118                        V2, V1, VPermMask);
8119   else
8120     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
8121                        V1, V2, VPermMask);
8122 }
8123
8124 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
8125 /// vector comparison.  If it is, return true and fill in Opc/isDot with
8126 /// information about the intrinsic.
8127 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
8128                                  bool &isDot, const PPCSubtarget &Subtarget) {
8129   unsigned IntrinsicID =
8130       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
8131   CompareOpc = -1;
8132   isDot = false;
8133   switch (IntrinsicID) {
8134   default:
8135     return false;
8136   // Comparison predicates.
8137   case Intrinsic::ppc_altivec_vcmpbfp_p:
8138     CompareOpc = 966;
8139     isDot = true;
8140     break;
8141   case Intrinsic::ppc_altivec_vcmpeqfp_p:
8142     CompareOpc = 198;
8143     isDot = true;
8144     break;
8145   case Intrinsic::ppc_altivec_vcmpequb_p:
8146     CompareOpc = 6;
8147     isDot = true;
8148     break;
8149   case Intrinsic::ppc_altivec_vcmpequh_p:
8150     CompareOpc = 70;
8151     isDot = true;
8152     break;
8153   case Intrinsic::ppc_altivec_vcmpequw_p:
8154     CompareOpc = 134;
8155     isDot = true;
8156     break;
8157   case Intrinsic::ppc_altivec_vcmpequd_p:
8158     if (Subtarget.hasP8Altivec()) {
8159       CompareOpc = 199;
8160       isDot = true;
8161     } else
8162       return false;
8163     break;
8164   case Intrinsic::ppc_altivec_vcmpneb_p:
8165   case Intrinsic::ppc_altivec_vcmpneh_p:
8166   case Intrinsic::ppc_altivec_vcmpnew_p:
8167   case Intrinsic::ppc_altivec_vcmpnezb_p:
8168   case Intrinsic::ppc_altivec_vcmpnezh_p:
8169   case Intrinsic::ppc_altivec_vcmpnezw_p:
8170     if (Subtarget.hasP9Altivec()) {
8171       switch (IntrinsicID) {
8172       default:
8173         llvm_unreachable("Unknown comparison intrinsic.");
8174       case Intrinsic::ppc_altivec_vcmpneb_p:
8175         CompareOpc = 7;
8176         break;
8177       case Intrinsic::ppc_altivec_vcmpneh_p:
8178         CompareOpc = 71;
8179         break;
8180       case Intrinsic::ppc_altivec_vcmpnew_p:
8181         CompareOpc = 135;
8182         break;
8183       case Intrinsic::ppc_altivec_vcmpnezb_p:
8184         CompareOpc = 263;
8185         break;
8186       case Intrinsic::ppc_altivec_vcmpnezh_p:
8187         CompareOpc = 327;
8188         break;
8189       case Intrinsic::ppc_altivec_vcmpnezw_p:
8190         CompareOpc = 391;
8191         break;
8192       }
8193       isDot = true;
8194     } else
8195       return false;
8196     break;
8197   case Intrinsic::ppc_altivec_vcmpgefp_p:
8198     CompareOpc = 454;
8199     isDot = true;
8200     break;
8201   case Intrinsic::ppc_altivec_vcmpgtfp_p:
8202     CompareOpc = 710;
8203     isDot = true;
8204     break;
8205   case Intrinsic::ppc_altivec_vcmpgtsb_p:
8206     CompareOpc = 774;
8207     isDot = true;
8208     break;
8209   case Intrinsic::ppc_altivec_vcmpgtsh_p:
8210     CompareOpc = 838;
8211     isDot = true;
8212     break;
8213   case Intrinsic::ppc_altivec_vcmpgtsw_p:
8214     CompareOpc = 902;
8215     isDot = true;
8216     break;
8217   case Intrinsic::ppc_altivec_vcmpgtsd_p:
8218     if (Subtarget.hasP8Altivec()) {
8219       CompareOpc = 967;
8220       isDot = true;
8221     } else
8222       return false;
8223     break;
8224   case Intrinsic::ppc_altivec_vcmpgtub_p:
8225     CompareOpc = 518;
8226     isDot = true;
8227     break;
8228   case Intrinsic::ppc_altivec_vcmpgtuh_p:
8229     CompareOpc = 582;
8230     isDot = true;
8231     break;
8232   case Intrinsic::ppc_altivec_vcmpgtuw_p:
8233     CompareOpc = 646;
8234     isDot = true;
8235     break;
8236   case Intrinsic::ppc_altivec_vcmpgtud_p:
8237     if (Subtarget.hasP8Altivec()) {
8238       CompareOpc = 711;
8239       isDot = true;
8240     } else
8241       return false;
8242     break;
8243
8244   // VSX predicate comparisons use the same infrastructure
8245   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
8246   case Intrinsic::ppc_vsx_xvcmpgedp_p:
8247   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
8248   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
8249   case Intrinsic::ppc_vsx_xvcmpgesp_p:
8250   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
8251     if (Subtarget.hasVSX()) {
8252       switch (IntrinsicID) {
8253       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
8254         CompareOpc = 99;
8255         break;
8256       case Intrinsic::ppc_vsx_xvcmpgedp_p:
8257         CompareOpc = 115;
8258         break;
8259       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
8260         CompareOpc = 107;
8261         break;
8262       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
8263         CompareOpc = 67;
8264         break;
8265       case Intrinsic::ppc_vsx_xvcmpgesp_p:
8266         CompareOpc = 83;
8267         break;
8268       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
8269         CompareOpc = 75;
8270         break;
8271       }
8272       isDot = true;
8273     } else
8274       return false;
8275     break;
8276
8277   // Normal Comparisons.
8278   case Intrinsic::ppc_altivec_vcmpbfp:
8279     CompareOpc = 966;
8280     break;
8281   case Intrinsic::ppc_altivec_vcmpeqfp:
8282     CompareOpc = 198;
8283     break;
8284   case Intrinsic::ppc_altivec_vcmpequb:
8285     CompareOpc = 6;
8286     break;
8287   case Intrinsic::ppc_altivec_vcmpequh:
8288     CompareOpc = 70;
8289     break;
8290   case Intrinsic::ppc_altivec_vcmpequw:
8291     CompareOpc = 134;
8292     break;
8293   case Intrinsic::ppc_altivec_vcmpequd:
8294     if (Subtarget.hasP8Altivec())
8295       CompareOpc = 199;
8296     else
8297       return false;
8298     break;
8299   case Intrinsic::ppc_altivec_vcmpneb:
8300   case Intrinsic::ppc_altivec_vcmpneh:
8301   case Intrinsic::ppc_altivec_vcmpnew:
8302   case Intrinsic::ppc_altivec_vcmpnezb:
8303   case Intrinsic::ppc_altivec_vcmpnezh:
8304   case Intrinsic::ppc_altivec_vcmpnezw:
8305     if (Subtarget.hasP9Altivec())
8306       switch (IntrinsicID) {
8307       default:
8308         llvm_unreachable("Unknown comparison intrinsic.");
8309       case Intrinsic::ppc_altivec_vcmpneb:
8310         CompareOpc = 7;
8311         break;
8312       case Intrinsic::ppc_altivec_vcmpneh:
8313         CompareOpc = 71;
8314         break;
8315       case Intrinsic::ppc_altivec_vcmpnew:
8316         CompareOpc = 135;
8317         break;
8318       case Intrinsic::ppc_altivec_vcmpnezb:
8319         CompareOpc = 263;
8320         break;
8321       case Intrinsic::ppc_altivec_vcmpnezh:
8322         CompareOpc = 327;
8323         break;
8324       case Intrinsic::ppc_altivec_vcmpnezw:
8325         CompareOpc = 391;
8326         break;
8327       }
8328     else
8329       return false;
8330     break;
8331   case Intrinsic::ppc_altivec_vcmpgefp:
8332     CompareOpc = 454;
8333     break;
8334   case Intrinsic::ppc_altivec_vcmpgtfp:
8335     CompareOpc = 710;
8336     break;
8337   case Intrinsic::ppc_altivec_vcmpgtsb:
8338     CompareOpc = 774;
8339     break;
8340   case Intrinsic::ppc_altivec_vcmpgtsh:
8341     CompareOpc = 838;
8342     break;
8343   case Intrinsic::ppc_altivec_vcmpgtsw:
8344     CompareOpc = 902;
8345     break;
8346   case Intrinsic::ppc_altivec_vcmpgtsd:
8347     if (Subtarget.hasP8Altivec())
8348       CompareOpc = 967;
8349     else
8350       return false;
8351     break;
8352   case Intrinsic::ppc_altivec_vcmpgtub:
8353     CompareOpc = 518;
8354     break;
8355   case Intrinsic::ppc_altivec_vcmpgtuh:
8356     CompareOpc = 582;
8357     break;
8358   case Intrinsic::ppc_altivec_vcmpgtuw:
8359     CompareOpc = 646;
8360     break;
8361   case Intrinsic::ppc_altivec_vcmpgtud:
8362     if (Subtarget.hasP8Altivec())
8363       CompareOpc = 711;
8364     else
8365       return false;
8366     break;
8367   }
8368   return true;
8369 }
8370
8371 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
8372 /// lower, do it, otherwise return null.
8373 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8374                                                    SelectionDAG &DAG) const {
8375   unsigned IntrinsicID =
8376     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
8377
8378   if (IntrinsicID == Intrinsic::thread_pointer) {
8379     // Reads the thread pointer register, used for __builtin_thread_pointer.
8380     bool is64bit = Subtarget.isPPC64();
8381     return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
8382                            is64bit ? MVT::i64 : MVT::i32);
8383   }
8384
8385   // If this is a lowered altivec predicate compare, CompareOpc is set to the
8386   // opcode number of the comparison.
8387   SDLoc dl(Op);
8388   int CompareOpc;
8389   bool isDot;
8390   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
8391     return SDValue();    // Don't custom lower most intrinsics.
8392
8393   // If this is a non-dot comparison, make the VCMP node and we are done.
8394   if (!isDot) {
8395     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
8396                               Op.getOperand(1), Op.getOperand(2),
8397                               DAG.getConstant(CompareOpc, dl, MVT::i32));
8398     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
8399   }
8400
8401   // Create the PPCISD altivec 'dot' comparison node.
8402   SDValue Ops[] = {
8403     Op.getOperand(2),  // LHS
8404     Op.getOperand(3),  // RHS
8405     DAG.getConstant(CompareOpc, dl, MVT::i32)
8406   };
8407   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
8408   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
8409
8410   // Now that we have the comparison, emit a copy from the CR to a GPR.
8411   // This is flagged to the above dot comparison.
8412   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
8413                                 DAG.getRegister(PPC::CR6, MVT::i32),
8414                                 CompNode.getValue(1));
8415
8416   // Unpack the result based on how the target uses it.
8417   unsigned BitNo;   // Bit # of CR6.
8418   bool InvertBit;   // Invert result?
8419   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
8420   default:  // Can't happen, don't crash on invalid number though.
8421   case 0:   // Return the value of the EQ bit of CR6.
8422     BitNo = 0; InvertBit = false;
8423     break;
8424   case 1:   // Return the inverted value of the EQ bit of CR6.
8425     BitNo = 0; InvertBit = true;
8426     break;
8427   case 2:   // Return the value of the LT bit of CR6.
8428     BitNo = 2; InvertBit = false;
8429     break;
8430   case 3:   // Return the inverted value of the LT bit of CR6.
8431     BitNo = 2; InvertBit = true;
8432     break;
8433   }
8434
8435   // Shift the bit into the low position.
8436   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
8437                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
8438   // Isolate the bit.
8439   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
8440                       DAG.getConstant(1, dl, MVT::i32));
8441
8442   // If we are supposed to, toggle the bit.
8443   if (InvertBit)
8444     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
8445                         DAG.getConstant(1, dl, MVT::i32));
8446   return Flags;
8447 }
8448
8449 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
8450                                                SelectionDAG &DAG) const {
8451   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
8452   // the beginning of the argument list.
8453   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
8454   SDLoc DL(Op);
8455   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
8456   case Intrinsic::ppc_cfence: {
8457     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
8458     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
8459     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
8460                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
8461                                                   Op.getOperand(ArgStart + 1)),
8462                                       Op.getOperand(0)),
8463                    0);
8464   }
8465   default:
8466     break;
8467   }
8468   return SDValue();
8469 }
8470
8471 SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
8472   // Check for a DIV with the same operands as this REM.
8473   for (auto UI : Op.getOperand(1)->uses()) {
8474     if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
8475         (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
8476       if (UI->getOperand(0) == Op.getOperand(0) &&
8477           UI->getOperand(1) == Op.getOperand(1))
8478         return SDValue();
8479   }
8480   return Op;
8481 }
8482
8483 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
8484                                                   SelectionDAG &DAG) const {
8485   SDLoc dl(Op);
8486   // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
8487   // instructions), but for smaller types, we need to first extend up to v2i32
8488   // before doing going farther.
8489   if (Op.getValueType() == MVT::v2i64) {
8490     EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
8491     if (ExtVT != MVT::v2i32) {
8492       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
8493       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
8494                        DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
8495                                         ExtVT.getVectorElementType(), 4)));
8496       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
8497       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
8498                        DAG.getValueType(MVT::v2i32));
8499     }
8500
8501     return Op;
8502   }
8503
8504   return SDValue();
8505 }
8506
8507 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
8508                                                  SelectionDAG &DAG) const {
8509   SDLoc dl(Op);
8510   // Create a stack slot that is 16-byte aligned.
8511   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8512   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8513   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8514   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8515
8516   // Store the input value into Value#0 of the stack slot.
8517   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
8518                                MachinePointerInfo());
8519   // Load it out.
8520   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
8521 }
8522
8523 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8524                                                   SelectionDAG &DAG) const {
8525   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
8526          "Should only be called for ISD::INSERT_VECTOR_ELT");
8527   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
8528   // We have legal lowering for constant indices but not for variable ones.
8529   if (C)
8530     return Op;
8531   return SDValue();
8532 }
8533
8534 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
8535                                                    SelectionDAG &DAG) const {
8536   SDLoc dl(Op);
8537   SDNode *N = Op.getNode();
8538
8539   assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
8540          "Unknown extract_vector_elt type");
8541
8542   SDValue Value = N->getOperand(0);
8543
8544   // The first part of this is like the store lowering except that we don't
8545   // need to track the chain.
8546
8547   // The values are now known to be -1 (false) or 1 (true). To convert this
8548   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8549   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8550   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8551
8552   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8553   // understand how to form the extending load.
8554   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8555
8556   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8557
8558   // Now convert to an integer and store.
8559   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8560     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8561     Value);
8562
8563   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8564   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8565   MachinePointerInfo PtrInfo =
8566       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8567   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8568   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8569
8570   SDValue StoreChain = DAG.getEntryNode();
8571   SDValue Ops[] = {StoreChain,
8572                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8573                    Value, FIdx};
8574   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8575
8576   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8577     dl, VTs, Ops, MVT::v4i32, PtrInfo);
8578
8579   // Extract the value requested.
8580   unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
8581   SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8582   Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8583
8584   SDValue IntVal =
8585       DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
8586
8587   if (!Subtarget.useCRBits())
8588     return IntVal;
8589
8590   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
8591 }
8592
8593 /// Lowering for QPX v4i1 loads
8594 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
8595                                            SelectionDAG &DAG) const {
8596   SDLoc dl(Op);
8597   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
8598   SDValue LoadChain = LN->getChain();
8599   SDValue BasePtr = LN->getBasePtr();
8600
8601   if (Op.getValueType() == MVT::v4f64 ||
8602       Op.getValueType() == MVT::v4f32) {
8603     EVT MemVT = LN->getMemoryVT();
8604     unsigned Alignment = LN->getAlignment();
8605
8606     // If this load is properly aligned, then it is legal.
8607     if (Alignment >= MemVT.getStoreSize())
8608       return Op;
8609
8610     EVT ScalarVT = Op.getValueType().getScalarType(),
8611         ScalarMemVT = MemVT.getScalarType();
8612     unsigned Stride = ScalarMemVT.getStoreSize();
8613
8614     SDValue Vals[4], LoadChains[4];
8615     for (unsigned Idx = 0; Idx < 4; ++Idx) {
8616       SDValue Load;
8617       if (ScalarVT != ScalarMemVT)
8618         Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
8619                               BasePtr,
8620                               LN->getPointerInfo().getWithOffset(Idx * Stride),
8621                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8622                               LN->getMemOperand()->getFlags(), LN->getAAInfo());
8623       else
8624         Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
8625                            LN->getPointerInfo().getWithOffset(Idx * Stride),
8626                            MinAlign(Alignment, Idx * Stride),
8627                            LN->getMemOperand()->getFlags(), LN->getAAInfo());
8628
8629       if (Idx == 0 && LN->isIndexed()) {
8630         assert(LN->getAddressingMode() == ISD::PRE_INC &&
8631                "Unknown addressing mode on vector load");
8632         Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
8633                                   LN->getAddressingMode());
8634       }
8635
8636       Vals[Idx] = Load;
8637       LoadChains[Idx] = Load.getValue(1);
8638
8639       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8640                             DAG.getConstant(Stride, dl,
8641                                             BasePtr.getValueType()));
8642     }
8643
8644     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8645     SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
8646
8647     if (LN->isIndexed()) {
8648       SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
8649       return DAG.getMergeValues(RetOps, dl);
8650     }
8651
8652     SDValue RetOps[] = { Value, TF };
8653     return DAG.getMergeValues(RetOps, dl);
8654   }
8655
8656   assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
8657   assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
8658
8659   // To lower v4i1 from a byte array, we load the byte elements of the
8660   // vector and then reuse the BUILD_VECTOR logic.
8661
8662   SDValue VectElmts[4], VectElmtChains[4];
8663   for (unsigned i = 0; i < 4; ++i) {
8664     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8665     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8666
8667     VectElmts[i] = DAG.getExtLoad(
8668         ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
8669         LN->getPointerInfo().getWithOffset(i), MVT::i8,
8670         /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
8671     VectElmtChains[i] = VectElmts[i].getValue(1);
8672   }
8673
8674   LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
8675   SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
8676
8677   SDValue RVals[] = { Value, LoadChain };
8678   return DAG.getMergeValues(RVals, dl);
8679 }
8680
8681 /// Lowering for QPX v4i1 stores
8682 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
8683                                             SelectionDAG &DAG) const {
8684   SDLoc dl(Op);
8685   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
8686   SDValue StoreChain = SN->getChain();
8687   SDValue BasePtr = SN->getBasePtr();
8688   SDValue Value = SN->getValue();
8689
8690   if (Value.getValueType() == MVT::v4f64 ||
8691       Value.getValueType() == MVT::v4f32) {
8692     EVT MemVT = SN->getMemoryVT();
8693     unsigned Alignment = SN->getAlignment();
8694
8695     // If this store is properly aligned, then it is legal.
8696     if (Alignment >= MemVT.getStoreSize())
8697       return Op;
8698
8699     EVT ScalarVT = Value.getValueType().getScalarType(),
8700         ScalarMemVT = MemVT.getScalarType();
8701     unsigned Stride = ScalarMemVT.getStoreSize();
8702
8703     SDValue Stores[4];
8704     for (unsigned Idx = 0; Idx < 4; ++Idx) {
8705       SDValue Ex = DAG.getNode(
8706           ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
8707           DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
8708       SDValue Store;
8709       if (ScalarVT != ScalarMemVT)
8710         Store =
8711             DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
8712                               SN->getPointerInfo().getWithOffset(Idx * Stride),
8713                               ScalarMemVT, MinAlign(Alignment, Idx * Stride),
8714                               SN->getMemOperand()->getFlags(), SN->getAAInfo());
8715       else
8716         Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
8717                              SN->getPointerInfo().getWithOffset(Idx * Stride),
8718                              MinAlign(Alignment, Idx * Stride),
8719                              SN->getMemOperand()->getFlags(), SN->getAAInfo());
8720
8721       if (Idx == 0 && SN->isIndexed()) {
8722         assert(SN->getAddressingMode() == ISD::PRE_INC &&
8723                "Unknown addressing mode on vector store");
8724         Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
8725                                     SN->getAddressingMode());
8726       }
8727
8728       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8729                             DAG.getConstant(Stride, dl,
8730                                             BasePtr.getValueType()));
8731       Stores[Idx] = Store;
8732     }
8733
8734     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8735
8736     if (SN->isIndexed()) {
8737       SDValue RetOps[] = { TF, Stores[0].getValue(1) };
8738       return DAG.getMergeValues(RetOps, dl);
8739     }
8740
8741     return TF;
8742   }
8743
8744   assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
8745   assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
8746
8747   // The values are now known to be -1 (false) or 1 (true). To convert this
8748   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8749   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8750   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8751
8752   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8753   // understand how to form the extending load.
8754   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8755
8756   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8757
8758   // Now convert to an integer and store.
8759   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8760     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8761     Value);
8762
8763   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8764   int FrameIdx = MFI.CreateStackObject(16, 16, false);
8765   MachinePointerInfo PtrInfo =
8766       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8767   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8768   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8769
8770   SDValue Ops[] = {StoreChain,
8771                    DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8772                    Value, FIdx};
8773   SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8774
8775   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8776     dl, VTs, Ops, MVT::v4i32, PtrInfo);
8777
8778   // Move data into the byte array.
8779   SDValue Loads[4], LoadChains[4];
8780   for (unsigned i = 0; i < 4; ++i) {
8781     unsigned Offset = 4*i;
8782     SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8783     Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8784
8785     Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
8786                            PtrInfo.getWithOffset(Offset));
8787     LoadChains[i] = Loads[i].getValue(1);
8788   }
8789
8790   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8791
8792   SDValue Stores[4];
8793   for (unsigned i = 0; i < 4; ++i) {
8794     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8795     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8796
8797     Stores[i] = DAG.getTruncStore(
8798         StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
8799         MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
8800         SN->getAAInfo());
8801   }
8802
8803   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8804
8805   return StoreChain;
8806 }
8807
8808 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
8809   SDLoc dl(Op);
8810   if (Op.getValueType() == MVT::v4i32) {
8811     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8812
8813     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
8814     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
8815
8816     SDValue RHSSwap =   // = vrlw RHS, 16
8817       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
8818
8819     // Shrinkify inputs to v8i16.
8820     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
8821     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
8822     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
8823
8824     // Low parts multiplied together, generating 32-bit results (we ignore the
8825     // top parts).
8826     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
8827                                         LHS, RHS, DAG, dl, MVT::v4i32);
8828
8829     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
8830                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
8831     // Shift the high parts up 16 bits.
8832     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
8833                               Neg16, DAG, dl);
8834     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
8835   } else if (Op.getValueType() == MVT::v8i16) {
8836     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8837
8838     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
8839
8840     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
8841                             LHS, RHS, Zero, DAG, dl);
8842   } else if (Op.getValueType() == MVT::v16i8) {
8843     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8844     bool isLittleEndian = Subtarget.isLittleEndian();
8845
8846     // Multiply the even 8-bit parts, producing 16-bit sums.
8847     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
8848                                            LHS, RHS, DAG, dl, MVT::v8i16);
8849     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
8850
8851     // Multiply the odd 8-bit parts, producing 16-bit sums.
8852     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
8853                                           LHS, RHS, DAG, dl, MVT::v8i16);
8854     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
8855
8856     // Merge the results together.  Because vmuleub and vmuloub are
8857     // instructions with a big-endian bias, we must reverse the
8858     // element numbering and reverse the meaning of "odd" and "even"
8859     // when generating little endian code.
8860     int Ops[16];
8861     for (unsigned i = 0; i != 8; ++i) {
8862       if (isLittleEndian) {
8863         Ops[i*2  ] = 2*i;
8864         Ops[i*2+1] = 2*i+16;
8865       } else {
8866         Ops[i*2  ] = 2*i+1;
8867         Ops[i*2+1] = 2*i+1+16;
8868       }
8869     }
8870     if (isLittleEndian)
8871       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
8872     else
8873       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
8874   } else {
8875     llvm_unreachable("Unknown mul to lower!");
8876   }
8877 }
8878
8879 /// LowerOperation - Provide custom lowering hooks for some operations.
8880 ///
8881 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
8882   switch (Op.getOpcode()) {
8883   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
8884   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
8885   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
8886   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
8887   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
8888   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
8889   case ISD::SETCC:              return LowerSETCC(Op, DAG);
8890   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
8891   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
8892   case ISD::VASTART:
8893     return LowerVASTART(Op, DAG);
8894
8895   case ISD::VAARG:
8896     return LowerVAARG(Op, DAG);
8897
8898   case ISD::VACOPY:
8899     return LowerVACOPY(Op, DAG);
8900
8901   case ISD::STACKRESTORE:
8902     return LowerSTACKRESTORE(Op, DAG);
8903
8904   case ISD::DYNAMIC_STACKALLOC:
8905     return LowerDYNAMIC_STACKALLOC(Op, DAG);
8906
8907   case ISD::GET_DYNAMIC_AREA_OFFSET:
8908     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
8909
8910   case ISD::EH_DWARF_CFA:
8911     return LowerEH_DWARF_CFA(Op, DAG);
8912
8913   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
8914   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
8915
8916   case ISD::LOAD:               return LowerLOAD(Op, DAG);
8917   case ISD::STORE:              return LowerSTORE(Op, DAG);
8918   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
8919   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
8920   case ISD::FP_TO_UINT:
8921   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
8922                                                       SDLoc(Op));
8923   case ISD::UINT_TO_FP:
8924   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
8925   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
8926
8927   // Lower 64-bit shifts.
8928   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
8929   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
8930   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
8931
8932   // Vector-related lowering.
8933   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
8934   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
8935   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
8936   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
8937   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
8938   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
8939   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
8940   case ISD::MUL:                return LowerMUL(Op, DAG);
8941
8942   // For counter-based loop handling.
8943   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
8944
8945   // Frame & Return address.
8946   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
8947   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
8948
8949   case ISD::INTRINSIC_VOID:
8950     return LowerINTRINSIC_VOID(Op, DAG);
8951   case ISD::SREM:
8952   case ISD::UREM:
8953     return LowerREM(Op, DAG);
8954   }
8955 }
8956
8957 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
8958                                            SmallVectorImpl<SDValue>&Results,
8959                                            SelectionDAG &DAG) const {
8960   SDLoc dl(N);
8961   switch (N->getOpcode()) {
8962   default:
8963     llvm_unreachable("Do not know how to custom type legalize this operation!");
8964   case ISD::READCYCLECOUNTER: {
8965     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
8966     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
8967
8968     Results.push_back(RTB);
8969     Results.push_back(RTB.getValue(1));
8970     Results.push_back(RTB.getValue(2));
8971     break;
8972   }
8973   case ISD::INTRINSIC_W_CHAIN: {
8974     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
8975         Intrinsic::ppc_is_decremented_ctr_nonzero)
8976       break;
8977
8978     assert(N->getValueType(0) == MVT::i1 &&
8979            "Unexpected result type for CTR decrement intrinsic");
8980     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
8981                                  N->getValueType(0));
8982     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
8983     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
8984                                  N->getOperand(1));
8985
8986     Results.push_back(NewInt);
8987     Results.push_back(NewInt.getValue(1));
8988     break;
8989   }
8990   case ISD::VAARG: {
8991     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
8992       return;
8993
8994     EVT VT = N->getValueType(0);
8995
8996     if (VT == MVT::i64) {
8997       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
8998
8999       Results.push_back(NewNode);
9000       Results.push_back(NewNode.getValue(1));
9001     }
9002     return;
9003   }
9004   case ISD::FP_ROUND_INREG: {
9005     assert(N->getValueType(0) == MVT::ppcf128);
9006     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
9007     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
9008                              MVT::f64, N->getOperand(0),
9009                              DAG.getIntPtrConstant(0, dl));
9010     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
9011                              MVT::f64, N->getOperand(0),
9012                              DAG.getIntPtrConstant(1, dl));
9013
9014     // Add the two halves of the long double in round-to-zero mode.
9015     SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
9016
9017     // We know the low half is about to be thrown away, so just use something
9018     // convenient.
9019     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
9020                                 FPreg, FPreg));
9021     return;
9022   }
9023   case ISD::FP_TO_SINT:
9024   case ISD::FP_TO_UINT:
9025     // LowerFP_TO_INT() can only handle f32 and f64.
9026     if (N->getOperand(0).getValueType() == MVT::ppcf128)
9027       return;
9028     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
9029     return;
9030   }
9031 }
9032
9033 //===----------------------------------------------------------------------===//
9034 //  Other Lowering Code
9035 //===----------------------------------------------------------------------===//
9036
9037 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
9038   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
9039   Function *Func = Intrinsic::getDeclaration(M, Id);
9040   return Builder.CreateCall(Func, {});
9041 }
9042
9043 // The mappings for emitLeading/TrailingFence is taken from
9044 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
9045 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
9046                                                  Instruction *Inst,
9047                                                  AtomicOrdering Ord) const {
9048   if (Ord == AtomicOrdering::SequentiallyConsistent)
9049     return callIntrinsic(Builder, Intrinsic::ppc_sync);
9050   if (isReleaseOrStronger(Ord))
9051     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
9052   return nullptr;
9053 }
9054
9055 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
9056                                                   Instruction *Inst,
9057                                                   AtomicOrdering Ord) const {
9058   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
9059     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
9060     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
9061     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
9062     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
9063       return Builder.CreateCall(
9064           Intrinsic::getDeclaration(
9065               Builder.GetInsertBlock()->getParent()->getParent(),
9066               Intrinsic::ppc_cfence, {Inst->getType()}),
9067           {Inst});
9068     // FIXME: Can use isync for rmw operation.
9069     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
9070   }
9071   return nullptr;
9072 }
9073
9074 MachineBasicBlock *
9075 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
9076                                     unsigned AtomicSize,
9077                                     unsigned BinOpcode,
9078                                     unsigned CmpOpcode,
9079                                     unsigned CmpPred) const {
9080   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9081   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9082
9083   auto LoadMnemonic = PPC::LDARX;
9084   auto StoreMnemonic = PPC::STDCX;
9085   switch (AtomicSize) {
9086   default:
9087     llvm_unreachable("Unexpected size of atomic entity");
9088   case 1:
9089     LoadMnemonic = PPC::LBARX;
9090     StoreMnemonic = PPC::STBCX;
9091     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
9092     break;
9093   case 2:
9094     LoadMnemonic = PPC::LHARX;
9095     StoreMnemonic = PPC::STHCX;
9096     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
9097     break;
9098   case 4:
9099     LoadMnemonic = PPC::LWARX;
9100     StoreMnemonic = PPC::STWCX;
9101     break;
9102   case 8:
9103     LoadMnemonic = PPC::LDARX;
9104     StoreMnemonic = PPC::STDCX;
9105     break;
9106   }
9107
9108   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9109   MachineFunction *F = BB->getParent();
9110   MachineFunction::iterator It = ++BB->getIterator();
9111
9112   unsigned dest = MI.getOperand(0).getReg();
9113   unsigned ptrA = MI.getOperand(1).getReg();
9114   unsigned ptrB = MI.getOperand(2).getReg();
9115   unsigned incr = MI.getOperand(3).getReg();
9116   DebugLoc dl = MI.getDebugLoc();
9117
9118   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
9119   MachineBasicBlock *loop2MBB =
9120     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
9121   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9122   F->insert(It, loopMBB);
9123   if (CmpOpcode)
9124     F->insert(It, loop2MBB);
9125   F->insert(It, exitMBB);
9126   exitMBB->splice(exitMBB->begin(), BB,
9127                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
9128   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9129
9130   MachineRegisterInfo &RegInfo = F->getRegInfo();
9131   unsigned TmpReg = (!BinOpcode) ? incr :
9132     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
9133                                            : &PPC::GPRCRegClass);
9134
9135   //  thisMBB:
9136   //   ...
9137   //   fallthrough --> loopMBB
9138   BB->addSuccessor(loopMBB);
9139
9140   //  loopMBB:
9141   //   l[wd]arx dest, ptr
9142   //   add r0, dest, incr
9143   //   st[wd]cx. r0, ptr
9144   //   bne- loopMBB
9145   //   fallthrough --> exitMBB
9146
9147   // For max/min...
9148   //  loopMBB:
9149   //   l[wd]arx dest, ptr
9150   //   cmpl?[wd] incr, dest
9151   //   bgt exitMBB
9152   //  loop2MBB:
9153   //   st[wd]cx. dest, ptr
9154   //   bne- loopMBB
9155   //   fallthrough --> exitMBB
9156
9157   BB = loopMBB;
9158   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9159     .addReg(ptrA).addReg(ptrB);
9160   if (BinOpcode)
9161     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
9162   if (CmpOpcode) {
9163     // Signed comparisons of byte or halfword values must be sign-extended.
9164     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
9165       unsigned ExtReg =  RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9166       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
9167               ExtReg).addReg(dest);
9168       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9169         .addReg(incr).addReg(ExtReg);
9170     } else
9171       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9172         .addReg(incr).addReg(dest);
9173
9174     BuildMI(BB, dl, TII->get(PPC::BCC))
9175       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
9176     BB->addSuccessor(loop2MBB);
9177     BB->addSuccessor(exitMBB);
9178     BB = loop2MBB;
9179   }
9180   BuildMI(BB, dl, TII->get(StoreMnemonic))
9181     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
9182   BuildMI(BB, dl, TII->get(PPC::BCC))
9183     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
9184   BB->addSuccessor(loopMBB);
9185   BB->addSuccessor(exitMBB);
9186
9187   //  exitMBB:
9188   //   ...
9189   BB = exitMBB;
9190   return BB;
9191 }
9192
9193 MachineBasicBlock *
9194 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
9195                                             MachineBasicBlock *BB,
9196                                             bool is8bit, // operation
9197                                             unsigned BinOpcode,
9198                                             unsigned CmpOpcode,
9199                                             unsigned CmpPred) const {
9200   // If we support part-word atomic mnemonics, just use them
9201   if (Subtarget.hasPartwordAtomics())
9202     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode,
9203                             CmpOpcode, CmpPred);
9204
9205   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9206   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9207   // In 64 bit mode we have to use 64 bits for addresses, even though the
9208   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
9209   // registers without caring whether they're 32 or 64, but here we're
9210   // doing actual arithmetic on the addresses.
9211   bool is64bit = Subtarget.isPPC64();
9212   bool isLittleEndian = Subtarget.isLittleEndian();
9213   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
9214
9215   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9216   MachineFunction *F = BB->getParent();
9217   MachineFunction::iterator It = ++BB->getIterator();
9218
9219   unsigned dest = MI.getOperand(0).getReg();
9220   unsigned ptrA = MI.getOperand(1).getReg();
9221   unsigned ptrB = MI.getOperand(2).getReg();
9222   unsigned incr = MI.getOperand(3).getReg();
9223   DebugLoc dl = MI.getDebugLoc();
9224
9225   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
9226   MachineBasicBlock *loop2MBB =
9227     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
9228   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9229   F->insert(It, loopMBB);
9230   if (CmpOpcode)
9231     F->insert(It, loop2MBB);
9232   F->insert(It, exitMBB);
9233   exitMBB->splice(exitMBB->begin(), BB,
9234                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
9235   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9236
9237   MachineRegisterInfo &RegInfo = F->getRegInfo();
9238   const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
9239                                           : &PPC::GPRCRegClass;
9240   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
9241   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
9242   unsigned ShiftReg =
9243     isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
9244   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
9245   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
9246   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
9247   unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
9248   unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
9249   unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
9250   unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
9251   unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
9252   unsigned Ptr1Reg;
9253   unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
9254
9255   //  thisMBB:
9256   //   ...
9257   //   fallthrough --> loopMBB
9258   BB->addSuccessor(loopMBB);
9259
9260   // The 4-byte load must be aligned, while a char or short may be
9261   // anywhere in the word.  Hence all this nasty bookkeeping code.
9262   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
9263   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
9264   //   xori shift, shift1, 24 [16]
9265   //   rlwinm ptr, ptr1, 0, 0, 29
9266   //   slw incr2, incr, shift
9267   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
9268   //   slw mask, mask2, shift
9269   //  loopMBB:
9270   //   lwarx tmpDest, ptr
9271   //   add tmp, tmpDest, incr2
9272   //   andc tmp2, tmpDest, mask
9273   //   and tmp3, tmp, mask
9274   //   or tmp4, tmp3, tmp2
9275   //   stwcx. tmp4, ptr
9276   //   bne- loopMBB
9277   //   fallthrough --> exitMBB
9278   //   srw dest, tmpDest, shift
9279   if (ptrA != ZeroReg) {
9280     Ptr1Reg = RegInfo.createVirtualRegister(RC);
9281     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
9282       .addReg(ptrA).addReg(ptrB);
9283   } else {
9284     Ptr1Reg = ptrB;
9285   }
9286   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
9287       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
9288   if (!isLittleEndian)
9289     BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
9290         .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
9291   if (is64bit)
9292     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
9293       .addReg(Ptr1Reg).addImm(0).addImm(61);
9294   else
9295     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
9296       .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
9297   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
9298       .addReg(incr).addReg(ShiftReg);
9299   if (is8bit)
9300     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
9301   else {
9302     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
9303     BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
9304   }
9305   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
9306       .addReg(Mask2Reg).addReg(ShiftReg);
9307
9308   BB = loopMBB;
9309   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
9310     .addReg(ZeroReg).addReg(PtrReg);
9311   if (BinOpcode)
9312     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
9313       .addReg(Incr2Reg).addReg(TmpDestReg);
9314   BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
9315     .addReg(TmpDestReg).addReg(MaskReg);
9316   BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
9317     .addReg(TmpReg).addReg(MaskReg);
9318   if (CmpOpcode) {
9319     // For unsigned comparisons, we can directly compare the shifted values.
9320     // For signed comparisons we shift and sign extend.
9321     unsigned SReg = RegInfo.createVirtualRegister(RC);
9322     BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg)
9323       .addReg(TmpDestReg).addReg(MaskReg);
9324     unsigned ValueReg = SReg;
9325     unsigned CmpReg = Incr2Reg;
9326     if (CmpOpcode == PPC::CMPW) {
9327       ValueReg = RegInfo.createVirtualRegister(RC);
9328       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
9329         .addReg(SReg).addReg(ShiftReg);
9330       unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
9331       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
9332         .addReg(ValueReg);
9333       ValueReg = ValueSReg;
9334       CmpReg = incr;
9335     }
9336     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
9337       .addReg(CmpReg).addReg(ValueReg);
9338     BuildMI(BB, dl, TII->get(PPC::BCC))
9339       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
9340     BB->addSuccessor(loop2MBB);
9341     BB->addSuccessor(exitMBB);
9342     BB = loop2MBB;
9343   }
9344   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
9345     .addReg(Tmp3Reg).addReg(Tmp2Reg);
9346   BuildMI(BB, dl, TII->get(PPC::STWCX))
9347     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
9348   BuildMI(BB, dl, TII->get(PPC::BCC))
9349     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
9350   BB->addSuccessor(loopMBB);
9351   BB->addSuccessor(exitMBB);
9352
9353   //  exitMBB:
9354   //   ...
9355   BB = exitMBB;
9356   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
9357     .addReg(ShiftReg);
9358   return BB;
9359 }
9360
9361 llvm::MachineBasicBlock *
9362 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
9363                                     MachineBasicBlock *MBB) const {
9364   DebugLoc DL = MI.getDebugLoc();
9365   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9366   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
9367
9368   MachineFunction *MF = MBB->getParent();
9369   MachineRegisterInfo &MRI = MF->getRegInfo();
9370
9371   const BasicBlock *BB = MBB->getBasicBlock();
9372   MachineFunction::iterator I = ++MBB->getIterator();
9373
9374   // Memory Reference
9375   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
9376   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
9377
9378   unsigned DstReg = MI.getOperand(0).getReg();
9379   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
9380   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
9381   unsigned mainDstReg = MRI.createVirtualRegister(RC);
9382   unsigned restoreDstReg = MRI.createVirtualRegister(RC);
9383
9384   MVT PVT = getPointerTy(MF->getDataLayout());
9385   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
9386          "Invalid Pointer Size!");
9387   // For v = setjmp(buf), we generate
9388   //
9389   // thisMBB:
9390   //  SjLjSetup mainMBB
9391   //  bl mainMBB
9392   //  v_restore = 1
9393   //  b sinkMBB
9394   //
9395   // mainMBB:
9396   //  buf[LabelOffset] = LR
9397   //  v_main = 0
9398   //
9399   // sinkMBB:
9400   //  v = phi(main, restore)
9401   //
9402
9403   MachineBasicBlock *thisMBB = MBB;
9404   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
9405   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
9406   MF->insert(I, mainMBB);
9407   MF->insert(I, sinkMBB);
9408
9409   MachineInstrBuilder MIB;
9410
9411   // Transfer the remainder of BB and its successor edges to sinkMBB.
9412   sinkMBB->splice(sinkMBB->begin(), MBB,
9413                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
9414   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
9415
9416   // Note that the structure of the jmp_buf used here is not compatible
9417   // with that used by libc, and is not designed to be. Specifically, it
9418   // stores only those 'reserved' registers that LLVM does not otherwise
9419   // understand how to spill. Also, by convention, by the time this
9420   // intrinsic is called, Clang has already stored the frame address in the
9421   // first slot of the buffer and stack address in the third. Following the
9422   // X86 target code, we'll store the jump address in the second slot. We also
9423   // need to save the TOC pointer (R2) to handle jumps between shared
9424   // libraries, and that will be stored in the fourth slot. The thread
9425   // identifier (R13) is not affected.
9426
9427   // thisMBB:
9428   const int64_t LabelOffset = 1 * PVT.getStoreSize();
9429   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
9430   const int64_t BPOffset    = 4 * PVT.getStoreSize();
9431
9432   // Prepare IP either in reg.
9433   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
9434   unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
9435   unsigned BufReg = MI.getOperand(1).getReg();
9436
9437   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
9438     setUsesTOCBasePtr(*MBB->getParent());
9439     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
9440             .addReg(PPC::X2)
9441             .addImm(TOCOffset)
9442             .addReg(BufReg);
9443     MIB.setMemRefs(MMOBegin, MMOEnd);
9444   }
9445
9446   // Naked functions never have a base pointer, and so we use r1. For all
9447   // other functions, this decision must be delayed until during PEI.
9448   unsigned BaseReg;
9449   if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
9450     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
9451   else
9452     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
9453
9454   MIB = BuildMI(*thisMBB, MI, DL,
9455                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
9456             .addReg(BaseReg)
9457             .addImm(BPOffset)
9458             .addReg(BufReg);
9459   MIB.setMemRefs(MMOBegin, MMOEnd);
9460
9461   // Setup
9462   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
9463   MIB.addRegMask(TRI->getNoPreservedMask());
9464
9465   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
9466
9467   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
9468           .addMBB(mainMBB);
9469   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
9470
9471   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
9472   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
9473
9474   // mainMBB:
9475   //  mainDstReg = 0
9476   MIB =
9477       BuildMI(mainMBB, DL,
9478               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
9479
9480   // Store IP
9481   if (Subtarget.isPPC64()) {
9482     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
9483             .addReg(LabelReg)
9484             .addImm(LabelOffset)
9485             .addReg(BufReg);
9486   } else {
9487     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
9488             .addReg(LabelReg)
9489             .addImm(LabelOffset)
9490             .addReg(BufReg);
9491   }
9492
9493   MIB.setMemRefs(MMOBegin, MMOEnd);
9494
9495   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
9496   mainMBB->addSuccessor(sinkMBB);
9497
9498   // sinkMBB:
9499   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
9500           TII->get(PPC::PHI), DstReg)
9501     .addReg(mainDstReg).addMBB(mainMBB)
9502     .addReg(restoreDstReg).addMBB(thisMBB);
9503
9504   MI.eraseFromParent();
9505   return sinkMBB;
9506 }
9507
9508 MachineBasicBlock *
9509 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
9510                                      MachineBasicBlock *MBB) const {
9511   DebugLoc DL = MI.getDebugLoc();
9512   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9513
9514   MachineFunction *MF = MBB->getParent();
9515   MachineRegisterInfo &MRI = MF->getRegInfo();
9516
9517   // Memory Reference
9518   MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
9519   MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
9520
9521   MVT PVT = getPointerTy(MF->getDataLayout());
9522   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
9523          "Invalid Pointer Size!");
9524
9525   const TargetRegisterClass *RC =
9526     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
9527   unsigned Tmp = MRI.createVirtualRegister(RC);
9528   // Since FP is only updated here but NOT referenced, it's treated as GPR.
9529   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
9530   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
9531   unsigned BP =
9532       (PVT == MVT::i64)
9533           ? PPC::X30
9534           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
9535                                                               : PPC::R30);
9536
9537   MachineInstrBuilder MIB;
9538
9539   const int64_t LabelOffset = 1 * PVT.getStoreSize();
9540   const int64_t SPOffset    = 2 * PVT.getStoreSize();
9541   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
9542   const int64_t BPOffset    = 4 * PVT.getStoreSize();
9543
9544   unsigned BufReg = MI.getOperand(0).getReg();
9545
9546   // Reload FP (the jumped-to function may not have had a
9547   // frame pointer, and if so, then its r31 will be restored
9548   // as necessary).
9549   if (PVT == MVT::i64) {
9550     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
9551             .addImm(0)
9552             .addReg(BufReg);
9553   } else {
9554     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
9555             .addImm(0)
9556             .addReg(BufReg);
9557   }
9558   MIB.setMemRefs(MMOBegin, MMOEnd);
9559
9560   // Reload IP
9561   if (PVT == MVT::i64) {
9562     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
9563             .addImm(LabelOffset)
9564             .addReg(BufReg);
9565   } else {
9566     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
9567             .addImm(LabelOffset)
9568             .addReg(BufReg);
9569   }
9570   MIB.setMemRefs(MMOBegin, MMOEnd);
9571
9572   // Reload SP
9573   if (PVT == MVT::i64) {
9574     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
9575             .addImm(SPOffset)
9576             .addReg(BufReg);
9577   } else {
9578     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
9579             .addImm(SPOffset)
9580             .addReg(BufReg);
9581   }
9582   MIB.setMemRefs(MMOBegin, MMOEnd);
9583
9584   // Reload BP
9585   if (PVT == MVT::i64) {
9586     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
9587             .addImm(BPOffset)
9588             .addReg(BufReg);
9589   } else {
9590     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
9591             .addImm(BPOffset)
9592             .addReg(BufReg);
9593   }
9594   MIB.setMemRefs(MMOBegin, MMOEnd);
9595
9596   // Reload TOC
9597   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
9598     setUsesTOCBasePtr(*MBB->getParent());
9599     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
9600             .addImm(TOCOffset)
9601             .addReg(BufReg);
9602
9603     MIB.setMemRefs(MMOBegin, MMOEnd);
9604   }
9605
9606   // Jump
9607   BuildMI(*MBB, MI, DL,
9608           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
9609   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
9610
9611   MI.eraseFromParent();
9612   return MBB;
9613 }
9614
9615 MachineBasicBlock *
9616 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
9617                                                MachineBasicBlock *BB) const {
9618   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
9619       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9620     if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
9621         MI.getOpcode() == TargetOpcode::PATCHPOINT) {
9622       // Call lowering should have added an r2 operand to indicate a dependence
9623       // on the TOC base pointer value. It can't however, because there is no
9624       // way to mark the dependence as implicit there, and so the stackmap code
9625       // will confuse it with a regular operand. Instead, add the dependence
9626       // here.
9627       setUsesTOCBasePtr(*BB->getParent());
9628       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
9629     }
9630
9631     return emitPatchPoint(MI, BB);
9632   }
9633
9634   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
9635       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
9636     return emitEHSjLjSetJmp(MI, BB);
9637   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
9638              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
9639     return emitEHSjLjLongJmp(MI, BB);
9640   }
9641
9642   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9643
9644   // To "insert" these instructions we actually have to insert their
9645   // control-flow patterns.
9646   const BasicBlock *LLVM_BB = BB->getBasicBlock();
9647   MachineFunction::iterator It = ++BB->getIterator();
9648
9649   MachineFunction *F = BB->getParent();
9650
9651   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9652        MI.getOpcode() == PPC::SELECT_CC_I8 ||
9653        MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) {
9654     SmallVector<MachineOperand, 2> Cond;
9655     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9656         MI.getOpcode() == PPC::SELECT_CC_I8)
9657       Cond.push_back(MI.getOperand(4));
9658     else
9659       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
9660     Cond.push_back(MI.getOperand(1));
9661
9662     DebugLoc dl = MI.getDebugLoc();
9663     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
9664                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
9665   } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
9666              MI.getOpcode() == PPC::SELECT_CC_I8 ||
9667              MI.getOpcode() == PPC::SELECT_CC_F4 ||
9668              MI.getOpcode() == PPC::SELECT_CC_F8 ||
9669              MI.getOpcode() == PPC::SELECT_CC_QFRC ||
9670              MI.getOpcode() == PPC::SELECT_CC_QSRC ||
9671              MI.getOpcode() == PPC::SELECT_CC_QBRC ||
9672              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
9673              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
9674              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
9675              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
9676              MI.getOpcode() == PPC::SELECT_I4 ||
9677              MI.getOpcode() == PPC::SELECT_I8 ||
9678              MI.getOpcode() == PPC::SELECT_F4 ||
9679              MI.getOpcode() == PPC::SELECT_F8 ||
9680              MI.getOpcode() == PPC::SELECT_QFRC ||
9681              MI.getOpcode() == PPC::SELECT_QSRC ||
9682              MI.getOpcode() == PPC::SELECT_QBRC ||
9683              MI.getOpcode() == PPC::SELECT_VRRC ||
9684              MI.getOpcode() == PPC::SELECT_VSFRC ||
9685              MI.getOpcode() == PPC::SELECT_VSSRC ||
9686              MI.getOpcode() == PPC::SELECT_VSRC) {
9687     // The incoming instruction knows the destination vreg to set, the
9688     // condition code register to branch on, the true/false values to
9689     // select between, and a branch opcode to use.
9690
9691     //  thisMBB:
9692     //  ...
9693     //   TrueVal = ...
9694     //   cmpTY ccX, r1, r2
9695     //   bCC copy1MBB
9696     //   fallthrough --> copy0MBB
9697     MachineBasicBlock *thisMBB = BB;
9698     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
9699     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9700     DebugLoc dl = MI.getDebugLoc();
9701     F->insert(It, copy0MBB);
9702     F->insert(It, sinkMBB);
9703
9704     // Transfer the remainder of BB and its successor edges to sinkMBB.
9705     sinkMBB->splice(sinkMBB->begin(), BB,
9706                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9707     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9708
9709     // Next, add the true and fallthrough blocks as its successors.
9710     BB->addSuccessor(copy0MBB);
9711     BB->addSuccessor(sinkMBB);
9712
9713     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
9714         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
9715         MI.getOpcode() == PPC::SELECT_QFRC ||
9716         MI.getOpcode() == PPC::SELECT_QSRC ||
9717         MI.getOpcode() == PPC::SELECT_QBRC ||
9718         MI.getOpcode() == PPC::SELECT_VRRC ||
9719         MI.getOpcode() == PPC::SELECT_VSFRC ||
9720         MI.getOpcode() == PPC::SELECT_VSSRC ||
9721         MI.getOpcode() == PPC::SELECT_VSRC) {
9722       BuildMI(BB, dl, TII->get(PPC::BC))
9723           .addReg(MI.getOperand(1).getReg())
9724           .addMBB(sinkMBB);
9725     } else {
9726       unsigned SelectPred = MI.getOperand(4).getImm();
9727       BuildMI(BB, dl, TII->get(PPC::BCC))
9728           .addImm(SelectPred)
9729           .addReg(MI.getOperand(1).getReg())
9730           .addMBB(sinkMBB);
9731     }
9732
9733     //  copy0MBB:
9734     //   %FalseValue = ...
9735     //   # fallthrough to sinkMBB
9736     BB = copy0MBB;
9737
9738     // Update machine-CFG edges
9739     BB->addSuccessor(sinkMBB);
9740
9741     //  sinkMBB:
9742     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
9743     //  ...
9744     BB = sinkMBB;
9745     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
9746         .addReg(MI.getOperand(3).getReg())
9747         .addMBB(copy0MBB)
9748         .addReg(MI.getOperand(2).getReg())
9749         .addMBB(thisMBB);
9750   } else if (MI.getOpcode() == PPC::ReadTB) {
9751     // To read the 64-bit time-base register on a 32-bit target, we read the
9752     // two halves. Should the counter have wrapped while it was being read, we
9753     // need to try again.
9754     // ...
9755     // readLoop:
9756     // mfspr Rx,TBU # load from TBU
9757     // mfspr Ry,TB  # load from TB
9758     // mfspr Rz,TBU # load from TBU
9759     // cmpw crX,Rx,Rz # check if 'old'='new'
9760     // bne readLoop   # branch if they're not equal
9761     // ...
9762
9763     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
9764     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9765     DebugLoc dl = MI.getDebugLoc();
9766     F->insert(It, readMBB);
9767     F->insert(It, sinkMBB);
9768
9769     // Transfer the remainder of BB and its successor edges to sinkMBB.
9770     sinkMBB->splice(sinkMBB->begin(), BB,
9771                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9772     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9773
9774     BB->addSuccessor(readMBB);
9775     BB = readMBB;
9776
9777     MachineRegisterInfo &RegInfo = F->getRegInfo();
9778     unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9779     unsigned LoReg = MI.getOperand(0).getReg();
9780     unsigned HiReg = MI.getOperand(1).getReg();
9781
9782     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
9783     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
9784     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
9785
9786     unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9787
9788     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
9789       .addReg(HiReg).addReg(ReadAgainReg);
9790     BuildMI(BB, dl, TII->get(PPC::BCC))
9791       .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
9792
9793     BB->addSuccessor(readMBB);
9794     BB->addSuccessor(sinkMBB);
9795   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
9796     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
9797   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
9798     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
9799   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
9800     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
9801   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
9802     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
9803
9804   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
9805     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
9806   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
9807     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
9808   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
9809     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
9810   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
9811     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
9812
9813   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
9814     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
9815   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
9816     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
9817   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
9818     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
9819   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
9820     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
9821
9822   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
9823     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
9824   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
9825     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
9826   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
9827     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
9828   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
9829     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
9830
9831   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
9832     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
9833   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
9834     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
9835   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
9836     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
9837   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
9838     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
9839
9840   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
9841     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
9842   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
9843     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
9844   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
9845     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
9846   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
9847     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
9848
9849   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
9850     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
9851   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
9852     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
9853   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
9854     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
9855   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
9856     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
9857
9858   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
9859     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
9860   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
9861     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
9862   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
9863     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
9864   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
9865     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
9866
9867   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
9868     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
9869   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
9870     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
9871   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
9872     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
9873   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
9874     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
9875
9876   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
9877     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
9878   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
9879     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
9880   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
9881     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
9882   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
9883     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
9884
9885   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
9886     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
9887   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
9888     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
9889   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
9890     BB = EmitAtomicBinary(MI, BB, 4, 0);
9891   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
9892     BB = EmitAtomicBinary(MI, BB, 8, 0);
9893   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
9894            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
9895            (Subtarget.hasPartwordAtomics() &&
9896             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
9897            (Subtarget.hasPartwordAtomics() &&
9898             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
9899     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
9900
9901     auto LoadMnemonic = PPC::LDARX;
9902     auto StoreMnemonic = PPC::STDCX;
9903     switch (MI.getOpcode()) {
9904     default:
9905       llvm_unreachable("Compare and swap of unknown size");
9906     case PPC::ATOMIC_CMP_SWAP_I8:
9907       LoadMnemonic = PPC::LBARX;
9908       StoreMnemonic = PPC::STBCX;
9909       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9910       break;
9911     case PPC::ATOMIC_CMP_SWAP_I16:
9912       LoadMnemonic = PPC::LHARX;
9913       StoreMnemonic = PPC::STHCX;
9914       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9915       break;
9916     case PPC::ATOMIC_CMP_SWAP_I32:
9917       LoadMnemonic = PPC::LWARX;
9918       StoreMnemonic = PPC::STWCX;
9919       break;
9920     case PPC::ATOMIC_CMP_SWAP_I64:
9921       LoadMnemonic = PPC::LDARX;
9922       StoreMnemonic = PPC::STDCX;
9923       break;
9924     }
9925     unsigned dest = MI.getOperand(0).getReg();
9926     unsigned ptrA = MI.getOperand(1).getReg();
9927     unsigned ptrB = MI.getOperand(2).getReg();
9928     unsigned oldval = MI.getOperand(3).getReg();
9929     unsigned newval = MI.getOperand(4).getReg();
9930     DebugLoc dl = MI.getDebugLoc();
9931
9932     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
9933     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
9934     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
9935     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9936     F->insert(It, loop1MBB);
9937     F->insert(It, loop2MBB);
9938     F->insert(It, midMBB);
9939     F->insert(It, exitMBB);
9940     exitMBB->splice(exitMBB->begin(), BB,
9941                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
9942     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9943
9944     //  thisMBB:
9945     //   ...
9946     //   fallthrough --> loopMBB
9947     BB->addSuccessor(loop1MBB);
9948
9949     // loop1MBB:
9950     //   l[bhwd]arx dest, ptr
9951     //   cmp[wd] dest, oldval
9952     //   bne- midMBB
9953     // loop2MBB:
9954     //   st[bhwd]cx. newval, ptr
9955     //   bne- loopMBB
9956     //   b exitBB
9957     // midMBB:
9958     //   st[bhwd]cx. dest, ptr
9959     // exitBB:
9960     BB = loop1MBB;
9961     BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9962       .addReg(ptrA).addReg(ptrB);
9963     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
9964       .addReg(oldval).addReg(dest);
9965     BuildMI(BB, dl, TII->get(PPC::BCC))
9966       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
9967     BB->addSuccessor(loop2MBB);
9968     BB->addSuccessor(midMBB);
9969
9970     BB = loop2MBB;
9971     BuildMI(BB, dl, TII->get(StoreMnemonic))
9972       .addReg(newval).addReg(ptrA).addReg(ptrB);
9973     BuildMI(BB, dl, TII->get(PPC::BCC))
9974       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9975     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9976     BB->addSuccessor(loop1MBB);
9977     BB->addSuccessor(exitMBB);
9978
9979     BB = midMBB;
9980     BuildMI(BB, dl, TII->get(StoreMnemonic))
9981       .addReg(dest).addReg(ptrA).addReg(ptrB);
9982     BB->addSuccessor(exitMBB);
9983
9984     //  exitMBB:
9985     //   ...
9986     BB = exitMBB;
9987   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
9988              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
9989     // We must use 64-bit registers for addresses when targeting 64-bit,
9990     // since we're actually doing arithmetic on them.  Other registers
9991     // can be 32-bit.
9992     bool is64bit = Subtarget.isPPC64();
9993     bool isLittleEndian = Subtarget.isLittleEndian();
9994     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
9995
9996     unsigned dest = MI.getOperand(0).getReg();
9997     unsigned ptrA = MI.getOperand(1).getReg();
9998     unsigned ptrB = MI.getOperand(2).getReg();
9999     unsigned oldval = MI.getOperand(3).getReg();
10000     unsigned newval = MI.getOperand(4).getReg();
10001     DebugLoc dl = MI.getDebugLoc();
10002
10003     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
10004     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
10005     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
10006     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10007     F->insert(It, loop1MBB);
10008     F->insert(It, loop2MBB);
10009     F->insert(It, midMBB);
10010     F->insert(It, exitMBB);
10011     exitMBB->splice(exitMBB->begin(), BB,
10012                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
10013     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10014
10015     MachineRegisterInfo &RegInfo = F->getRegInfo();
10016     const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
10017                                             : &PPC::GPRCRegClass;
10018     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
10019     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
10020     unsigned ShiftReg =
10021       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
10022     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
10023     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
10024     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
10025     unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
10026     unsigned MaskReg = RegInfo.createVirtualRegister(RC);
10027     unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
10028     unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
10029     unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
10030     unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
10031     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
10032     unsigned Ptr1Reg;
10033     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
10034     unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10035     //  thisMBB:
10036     //   ...
10037     //   fallthrough --> loopMBB
10038     BB->addSuccessor(loop1MBB);
10039
10040     // The 4-byte load must be aligned, while a char or short may be
10041     // anywhere in the word.  Hence all this nasty bookkeeping code.
10042     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
10043     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10044     //   xori shift, shift1, 24 [16]
10045     //   rlwinm ptr, ptr1, 0, 0, 29
10046     //   slw newval2, newval, shift
10047     //   slw oldval2, oldval,shift
10048     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10049     //   slw mask, mask2, shift
10050     //   and newval3, newval2, mask
10051     //   and oldval3, oldval2, mask
10052     // loop1MBB:
10053     //   lwarx tmpDest, ptr
10054     //   and tmp, tmpDest, mask
10055     //   cmpw tmp, oldval3
10056     //   bne- midMBB
10057     // loop2MBB:
10058     //   andc tmp2, tmpDest, mask
10059     //   or tmp4, tmp2, newval3
10060     //   stwcx. tmp4, ptr
10061     //   bne- loop1MBB
10062     //   b exitBB
10063     // midMBB:
10064     //   stwcx. tmpDest, ptr
10065     // exitBB:
10066     //   srw dest, tmpDest, shift
10067     if (ptrA != ZeroReg) {
10068       Ptr1Reg = RegInfo.createVirtualRegister(RC);
10069       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10070         .addReg(ptrA).addReg(ptrB);
10071     } else {
10072       Ptr1Reg = ptrB;
10073     }
10074     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
10075         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
10076     if (!isLittleEndian)
10077       BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
10078           .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
10079     if (is64bit)
10080       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10081         .addReg(Ptr1Reg).addImm(0).addImm(61);
10082     else
10083       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10084         .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
10085     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
10086         .addReg(newval).addReg(ShiftReg);
10087     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
10088         .addReg(oldval).addReg(ShiftReg);
10089     if (is8bit)
10090       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10091     else {
10092       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10093       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10094         .addReg(Mask3Reg).addImm(65535);
10095     }
10096     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10097         .addReg(Mask2Reg).addReg(ShiftReg);
10098     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
10099         .addReg(NewVal2Reg).addReg(MaskReg);
10100     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
10101         .addReg(OldVal2Reg).addReg(MaskReg);
10102
10103     BB = loop1MBB;
10104     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10105         .addReg(ZeroReg).addReg(PtrReg);
10106     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
10107         .addReg(TmpDestReg).addReg(MaskReg);
10108     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
10109         .addReg(TmpReg).addReg(OldVal3Reg);
10110     BuildMI(BB, dl, TII->get(PPC::BCC))
10111         .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
10112     BB->addSuccessor(loop2MBB);
10113     BB->addSuccessor(midMBB);
10114
10115     BB = loop2MBB;
10116     BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
10117         .addReg(TmpDestReg).addReg(MaskReg);
10118     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
10119         .addReg(Tmp2Reg).addReg(NewVal3Reg);
10120     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
10121         .addReg(ZeroReg).addReg(PtrReg);
10122     BuildMI(BB, dl, TII->get(PPC::BCC))
10123       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
10124     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
10125     BB->addSuccessor(loop1MBB);
10126     BB->addSuccessor(exitMBB);
10127
10128     BB = midMBB;
10129     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
10130       .addReg(ZeroReg).addReg(PtrReg);
10131     BB->addSuccessor(exitMBB);
10132
10133     //  exitMBB:
10134     //   ...
10135     BB = exitMBB;
10136     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
10137       .addReg(ShiftReg);
10138   } else if (MI.getOpcode() == PPC::FADDrtz) {
10139     // This pseudo performs an FADD with rounding mode temporarily forced
10140     // to round-to-zero.  We emit this via custom inserter since the FPSCR
10141     // is not modeled at the SelectionDAG level.
10142     unsigned Dest = MI.getOperand(0).getReg();
10143     unsigned Src1 = MI.getOperand(1).getReg();
10144     unsigned Src2 = MI.getOperand(2).getReg();
10145     DebugLoc dl = MI.getDebugLoc();
10146
10147     MachineRegisterInfo &RegInfo = F->getRegInfo();
10148     unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
10149
10150     // Save FPSCR value.
10151     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
10152
10153     // Set rounding mode to round-to-zero.
10154     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
10155     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
10156
10157     // Perform addition.
10158     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
10159
10160     // Restore FPSCR value.
10161     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
10162   } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
10163              MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
10164              MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
10165              MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
10166     unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
10167                        MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
10168                           ? PPC::ANDIo8
10169                           : PPC::ANDIo;
10170     bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
10171                  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
10172
10173     MachineRegisterInfo &RegInfo = F->getRegInfo();
10174     unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
10175                                                   &PPC::GPRCRegClass :
10176                                                   &PPC::G8RCRegClass);
10177
10178     DebugLoc dl = MI.getDebugLoc();
10179     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
10180         .addReg(MI.getOperand(1).getReg())
10181         .addImm(1);
10182     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
10183             MI.getOperand(0).getReg())
10184         .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
10185   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
10186     DebugLoc Dl = MI.getDebugLoc();
10187     MachineRegisterInfo &RegInfo = F->getRegInfo();
10188     unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
10189     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
10190     return BB;
10191   } else {
10192     llvm_unreachable("Unexpected instr type to insert");
10193   }
10194
10195   MI.eraseFromParent(); // The pseudo instruction is gone now.
10196   return BB;
10197 }
10198
10199 //===----------------------------------------------------------------------===//
10200 // Target Optimization Hooks
10201 //===----------------------------------------------------------------------===//
10202
10203 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
10204   // For the estimates, convergence is quadratic, so we essentially double the
10205   // number of digits correct after every iteration. For both FRE and FRSQRTE,
10206   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
10207   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
10208   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
10209   if (VT.getScalarType() == MVT::f64)
10210     RefinementSteps++;
10211   return RefinementSteps;
10212 }
10213
10214 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
10215                                            int Enabled, int &RefinementSteps,
10216                                            bool &UseOneConstNR,
10217                                            bool Reciprocal) const {
10218   EVT VT = Operand.getValueType();
10219   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
10220       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
10221       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
10222       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
10223       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
10224       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
10225     if (RefinementSteps == ReciprocalEstimate::Unspecified)
10226       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10227
10228     UseOneConstNR = true;
10229     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
10230   }
10231   return SDValue();
10232 }
10233
10234 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
10235                                             int Enabled,
10236                                             int &RefinementSteps) const {
10237   EVT VT = Operand.getValueType();
10238   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
10239       (VT == MVT::f64 && Subtarget.hasFRE()) ||
10240       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
10241       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
10242       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
10243       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
10244     if (RefinementSteps == ReciprocalEstimate::Unspecified)
10245       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10246     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
10247   }
10248   return SDValue();
10249 }
10250
10251 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
10252   // Note: This functionality is used only when unsafe-fp-math is enabled, and
10253   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
10254   // enabled for division), this functionality is redundant with the default
10255   // combiner logic (once the division -> reciprocal/multiply transformation
10256   // has taken place). As a result, this matters more for older cores than for
10257   // newer ones.
10258
10259   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
10260   // reciprocal if there are two or more FDIVs (for embedded cores with only
10261   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
10262   switch (Subtarget.getDarwinDirective()) {
10263   default:
10264     return 3;
10265   case PPC::DIR_440:
10266   case PPC::DIR_A2:
10267   case PPC::DIR_E500mc:
10268   case PPC::DIR_E5500:
10269     return 2;
10270   }
10271 }
10272
10273 // isConsecutiveLSLoc needs to work even if all adds have not yet been
10274 // collapsed, and so we need to look through chains of them.
10275 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
10276                                      int64_t& Offset, SelectionDAG &DAG) {
10277   if (DAG.isBaseWithConstantOffset(Loc)) {
10278     Base = Loc.getOperand(0);
10279     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
10280
10281     // The base might itself be a base plus an offset, and if so, accumulate
10282     // that as well.
10283     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
10284   }
10285 }
10286
10287 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
10288                             unsigned Bytes, int Dist,
10289                             SelectionDAG &DAG) {
10290   if (VT.getSizeInBits() / 8 != Bytes)
10291     return false;
10292
10293   SDValue BaseLoc = Base->getBasePtr();
10294   if (Loc.getOpcode() == ISD::FrameIndex) {
10295     if (BaseLoc.getOpcode() != ISD::FrameIndex)
10296       return false;
10297     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10298     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
10299     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
10300     int FS  = MFI.getObjectSize(FI);
10301     int BFS = MFI.getObjectSize(BFI);
10302     if (FS != BFS || FS != (int)Bytes) return false;
10303     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
10304   }
10305
10306   SDValue Base1 = Loc, Base2 = BaseLoc;
10307   int64_t Offset1 = 0, Offset2 = 0;
10308   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
10309   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
10310   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
10311     return true;
10312
10313   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10314   const GlobalValue *GV1 = nullptr;
10315   const GlobalValue *GV2 = nullptr;
10316   Offset1 = 0;
10317   Offset2 = 0;
10318   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
10319   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
10320   if (isGA1 && isGA2 && GV1 == GV2)
10321     return Offset1 == (Offset2 + Dist*Bytes);
10322   return false;
10323 }
10324
10325 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
10326 // not enforce equality of the chain operands.
10327 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
10328                             unsigned Bytes, int Dist,
10329                             SelectionDAG &DAG) {
10330   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
10331     EVT VT = LS->getMemoryVT();
10332     SDValue Loc = LS->getBasePtr();
10333     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
10334   }
10335
10336   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
10337     EVT VT;
10338     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10339     default: return false;
10340     case Intrinsic::ppc_qpx_qvlfd:
10341     case Intrinsic::ppc_qpx_qvlfda:
10342       VT = MVT::v4f64;
10343       break;
10344     case Intrinsic::ppc_qpx_qvlfs:
10345     case Intrinsic::ppc_qpx_qvlfsa:
10346       VT = MVT::v4f32;
10347       break;
10348     case Intrinsic::ppc_qpx_qvlfcd:
10349     case Intrinsic::ppc_qpx_qvlfcda:
10350       VT = MVT::v2f64;
10351       break;
10352     case Intrinsic::ppc_qpx_qvlfcs:
10353     case Intrinsic::ppc_qpx_qvlfcsa:
10354       VT = MVT::v2f32;
10355       break;
10356     case Intrinsic::ppc_qpx_qvlfiwa:
10357     case Intrinsic::ppc_qpx_qvlfiwz:
10358     case Intrinsic::ppc_altivec_lvx:
10359     case Intrinsic::ppc_altivec_lvxl:
10360     case Intrinsic::ppc_vsx_lxvw4x:
10361     case Intrinsic::ppc_vsx_lxvw4x_be:
10362       VT = MVT::v4i32;
10363       break;
10364     case Intrinsic::ppc_vsx_lxvd2x:
10365     case Intrinsic::ppc_vsx_lxvd2x_be:
10366       VT = MVT::v2f64;
10367       break;
10368     case Intrinsic::ppc_altivec_lvebx:
10369       VT = MVT::i8;
10370       break;
10371     case Intrinsic::ppc_altivec_lvehx:
10372       VT = MVT::i16;
10373       break;
10374     case Intrinsic::ppc_altivec_lvewx:
10375       VT = MVT::i32;
10376       break;
10377     }
10378
10379     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
10380   }
10381
10382   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
10383     EVT VT;
10384     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10385     default: return false;
10386     case Intrinsic::ppc_qpx_qvstfd:
10387     case Intrinsic::ppc_qpx_qvstfda:
10388       VT = MVT::v4f64;
10389       break;
10390     case Intrinsic::ppc_qpx_qvstfs:
10391     case Intrinsic::ppc_qpx_qvstfsa:
10392       VT = MVT::v4f32;
10393       break;
10394     case Intrinsic::ppc_qpx_qvstfcd:
10395     case Intrinsic::ppc_qpx_qvstfcda:
10396       VT = MVT::v2f64;
10397       break;
10398     case Intrinsic::ppc_qpx_qvstfcs:
10399     case Intrinsic::ppc_qpx_qvstfcsa:
10400       VT = MVT::v2f32;
10401       break;
10402     case Intrinsic::ppc_qpx_qvstfiw:
10403     case Intrinsic::ppc_qpx_qvstfiwa:
10404     case Intrinsic::ppc_altivec_stvx:
10405     case Intrinsic::ppc_altivec_stvxl:
10406     case Intrinsic::ppc_vsx_stxvw4x:
10407       VT = MVT::v4i32;
10408       break;
10409     case Intrinsic::ppc_vsx_stxvd2x:
10410       VT = MVT::v2f64;
10411       break;
10412     case Intrinsic::ppc_vsx_stxvw4x_be:
10413       VT = MVT::v4i32;
10414       break;
10415     case Intrinsic::ppc_vsx_stxvd2x_be:
10416       VT = MVT::v2f64;
10417       break;
10418     case Intrinsic::ppc_altivec_stvebx:
10419       VT = MVT::i8;
10420       break;
10421     case Intrinsic::ppc_altivec_stvehx:
10422       VT = MVT::i16;
10423       break;
10424     case Intrinsic::ppc_altivec_stvewx:
10425       VT = MVT::i32;
10426       break;
10427     }
10428
10429     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
10430   }
10431
10432   return false;
10433 }
10434
10435 // Return true is there is a nearyby consecutive load to the one provided
10436 // (regardless of alignment). We search up and down the chain, looking though
10437 // token factors and other loads (but nothing else). As a result, a true result
10438 // indicates that it is safe to create a new consecutive load adjacent to the
10439 // load provided.
10440 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
10441   SDValue Chain = LD->getChain();
10442   EVT VT = LD->getMemoryVT();
10443
10444   SmallSet<SDNode *, 16> LoadRoots;
10445   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
10446   SmallSet<SDNode *, 16> Visited;
10447
10448   // First, search up the chain, branching to follow all token-factor operands.
10449   // If we find a consecutive load, then we're done, otherwise, record all
10450   // nodes just above the top-level loads and token factors.
10451   while (!Queue.empty()) {
10452     SDNode *ChainNext = Queue.pop_back_val();
10453     if (!Visited.insert(ChainNext).second)
10454       continue;
10455
10456     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
10457       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
10458         return true;
10459
10460       if (!Visited.count(ChainLD->getChain().getNode()))
10461         Queue.push_back(ChainLD->getChain().getNode());
10462     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
10463       for (const SDUse &O : ChainNext->ops())
10464         if (!Visited.count(O.getNode()))
10465           Queue.push_back(O.getNode());
10466     } else
10467       LoadRoots.insert(ChainNext);
10468   }
10469
10470   // Second, search down the chain, starting from the top-level nodes recorded
10471   // in the first phase. These top-level nodes are the nodes just above all
10472   // loads and token factors. Starting with their uses, recursively look though
10473   // all loads (just the chain uses) and token factors to find a consecutive
10474   // load.
10475   Visited.clear();
10476   Queue.clear();
10477
10478   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
10479        IE = LoadRoots.end(); I != IE; ++I) {
10480     Queue.push_back(*I);
10481
10482     while (!Queue.empty()) {
10483       SDNode *LoadRoot = Queue.pop_back_val();
10484       if (!Visited.insert(LoadRoot).second)
10485         continue;
10486
10487       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
10488         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
10489           return true;
10490
10491       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
10492            UE = LoadRoot->use_end(); UI != UE; ++UI)
10493         if (((isa<MemSDNode>(*UI) &&
10494             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
10495             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
10496           Queue.push_back(*UI);
10497     }
10498   }
10499
10500   return false;
10501 }
10502
10503 /// This function is called when we have proved that a SETCC node can be replaced
10504 /// by subtraction (and other supporting instructions) so that the result of
10505 /// comparison is kept in a GPR instead of CR. This function is purely for
10506 /// codegen purposes and has some flags to guide the codegen process.
10507 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
10508                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
10509   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
10510
10511   // Zero extend the operands to the largest legal integer. Originally, they
10512   // must be of a strictly smaller size.
10513   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
10514                          DAG.getConstant(Size, DL, MVT::i32));
10515   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
10516                          DAG.getConstant(Size, DL, MVT::i32));
10517
10518   // Swap if needed. Depends on the condition code.
10519   if (Swap)
10520     std::swap(Op0, Op1);
10521
10522   // Subtract extended integers.
10523   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
10524
10525   // Move the sign bit to the least significant position and zero out the rest.
10526   // Now the least significant bit carries the result of original comparison.
10527   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
10528                              DAG.getConstant(Size - 1, DL, MVT::i32));
10529   auto Final = Shifted;
10530
10531   // Complement the result if needed. Based on the condition code.
10532   if (Complement)
10533     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
10534                         DAG.getConstant(1, DL, MVT::i64));
10535
10536   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
10537 }
10538
10539 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
10540                                                   DAGCombinerInfo &DCI) const {
10541   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
10542
10543   SelectionDAG &DAG = DCI.DAG;
10544   SDLoc DL(N);
10545
10546   // Size of integers being compared has a critical role in the following
10547   // analysis, so we prefer to do this when all types are legal.
10548   if (!DCI.isAfterLegalizeVectorOps())
10549     return SDValue();
10550
10551   // If all users of SETCC extend its value to a legal integer type
10552   // then we replace SETCC with a subtraction
10553   for (SDNode::use_iterator UI = N->use_begin(),
10554        UE = N->use_end(); UI != UE; ++UI) {
10555     if (UI->getOpcode() != ISD::ZERO_EXTEND)
10556       return SDValue();
10557   }
10558
10559   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
10560   auto OpSize = N->getOperand(0).getValueSizeInBits();
10561
10562   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
10563
10564   if (OpSize < Size) {
10565     switch (CC) {
10566     default: break;
10567     case ISD::SETULT:
10568       return generateEquivalentSub(N, Size, false, false, DL, DAG);
10569     case ISD::SETULE:
10570       return generateEquivalentSub(N, Size, true, true, DL, DAG);
10571     case ISD::SETUGT:
10572       return generateEquivalentSub(N, Size, false, true, DL, DAG);
10573     case ISD::SETUGE:
10574       return generateEquivalentSub(N, Size, true, false, DL, DAG);
10575     }
10576   }
10577
10578   return SDValue();
10579 }
10580
10581 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
10582                                                   DAGCombinerInfo &DCI) const {
10583   SelectionDAG &DAG = DCI.DAG;
10584   SDLoc dl(N);
10585
10586   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
10587   // If we're tracking CR bits, we need to be careful that we don't have:
10588   //   trunc(binary-ops(zext(x), zext(y)))
10589   // or
10590   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
10591   // such that we're unnecessarily moving things into GPRs when it would be
10592   // better to keep them in CR bits.
10593
10594   // Note that trunc here can be an actual i1 trunc, or can be the effective
10595   // truncation that comes from a setcc or select_cc.
10596   if (N->getOpcode() == ISD::TRUNCATE &&
10597       N->getValueType(0) != MVT::i1)
10598     return SDValue();
10599
10600   if (N->getOperand(0).getValueType() != MVT::i32 &&
10601       N->getOperand(0).getValueType() != MVT::i64)
10602     return SDValue();
10603
10604   if (N->getOpcode() == ISD::SETCC ||
10605       N->getOpcode() == ISD::SELECT_CC) {
10606     // If we're looking at a comparison, then we need to make sure that the
10607     // high bits (all except for the first) don't matter the result.
10608     ISD::CondCode CC =
10609       cast<CondCodeSDNode>(N->getOperand(
10610         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
10611     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
10612
10613     if (ISD::isSignedIntSetCC(CC)) {
10614       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
10615           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
10616         return SDValue();
10617     } else if (ISD::isUnsignedIntSetCC(CC)) {
10618       if (!DAG.MaskedValueIsZero(N->getOperand(0),
10619                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
10620           !DAG.MaskedValueIsZero(N->getOperand(1),
10621                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
10622         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
10623                                              : SDValue());
10624     } else {
10625       // This is neither a signed nor an unsigned comparison, just make sure
10626       // that the high bits are equal.
10627       KnownBits Op1Known, Op2Known;
10628       DAG.computeKnownBits(N->getOperand(0), Op1Known);
10629       DAG.computeKnownBits(N->getOperand(1), Op2Known);
10630
10631       // We don't really care about what is known about the first bit (if
10632       // anything), so clear it in all masks prior to comparing them.
10633       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
10634       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
10635
10636       if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
10637         return SDValue();
10638     }
10639   }
10640
10641   // We now know that the higher-order bits are irrelevant, we just need to
10642   // make sure that all of the intermediate operations are bit operations, and
10643   // all inputs are extensions.
10644   if (N->getOperand(0).getOpcode() != ISD::AND &&
10645       N->getOperand(0).getOpcode() != ISD::OR  &&
10646       N->getOperand(0).getOpcode() != ISD::XOR &&
10647       N->getOperand(0).getOpcode() != ISD::SELECT &&
10648       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
10649       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
10650       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
10651       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
10652       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
10653     return SDValue();
10654
10655   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
10656       N->getOperand(1).getOpcode() != ISD::AND &&
10657       N->getOperand(1).getOpcode() != ISD::OR  &&
10658       N->getOperand(1).getOpcode() != ISD::XOR &&
10659       N->getOperand(1).getOpcode() != ISD::SELECT &&
10660       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
10661       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
10662       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
10663       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
10664       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
10665     return SDValue();
10666
10667   SmallVector<SDValue, 4> Inputs;
10668   SmallVector<SDValue, 8> BinOps, PromOps;
10669   SmallPtrSet<SDNode *, 16> Visited;
10670
10671   for (unsigned i = 0; i < 2; ++i) {
10672     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10673           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10674           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10675           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10676         isa<ConstantSDNode>(N->getOperand(i)))
10677       Inputs.push_back(N->getOperand(i));
10678     else
10679       BinOps.push_back(N->getOperand(i));
10680
10681     if (N->getOpcode() == ISD::TRUNCATE)
10682       break;
10683   }
10684
10685   // Visit all inputs, collect all binary operations (and, or, xor and
10686   // select) that are all fed by extensions.
10687   while (!BinOps.empty()) {
10688     SDValue BinOp = BinOps.back();
10689     BinOps.pop_back();
10690
10691     if (!Visited.insert(BinOp.getNode()).second)
10692       continue;
10693
10694     PromOps.push_back(BinOp);
10695
10696     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10697       // The condition of the select is not promoted.
10698       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10699         continue;
10700       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10701         continue;
10702
10703       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10704             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10705             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
10706            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
10707           isa<ConstantSDNode>(BinOp.getOperand(i))) {
10708         Inputs.push_back(BinOp.getOperand(i));
10709       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10710                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10711                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10712                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10713                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
10714                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10715                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
10716                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
10717                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
10718         BinOps.push_back(BinOp.getOperand(i));
10719       } else {
10720         // We have an input that is not an extension or another binary
10721         // operation; we'll abort this transformation.
10722         return SDValue();
10723       }
10724     }
10725   }
10726
10727   // Make sure that this is a self-contained cluster of operations (which
10728   // is not quite the same thing as saying that everything has only one
10729   // use).
10730   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10731     if (isa<ConstantSDNode>(Inputs[i]))
10732       continue;
10733
10734     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10735                               UE = Inputs[i].getNode()->use_end();
10736          UI != UE; ++UI) {
10737       SDNode *User = *UI;
10738       if (User != N && !Visited.count(User))
10739         return SDValue();
10740
10741       // Make sure that we're not going to promote the non-output-value
10742       // operand(s) or SELECT or SELECT_CC.
10743       // FIXME: Although we could sometimes handle this, and it does occur in
10744       // practice that one of the condition inputs to the select is also one of
10745       // the outputs, we currently can't deal with this.
10746       if (User->getOpcode() == ISD::SELECT) {
10747         if (User->getOperand(0) == Inputs[i])
10748           return SDValue();
10749       } else if (User->getOpcode() == ISD::SELECT_CC) {
10750         if (User->getOperand(0) == Inputs[i] ||
10751             User->getOperand(1) == Inputs[i])
10752           return SDValue();
10753       }
10754     }
10755   }
10756
10757   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10758     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10759                               UE = PromOps[i].getNode()->use_end();
10760          UI != UE; ++UI) {
10761       SDNode *User = *UI;
10762       if (User != N && !Visited.count(User))
10763         return SDValue();
10764
10765       // Make sure that we're not going to promote the non-output-value
10766       // operand(s) or SELECT or SELECT_CC.
10767       // FIXME: Although we could sometimes handle this, and it does occur in
10768       // practice that one of the condition inputs to the select is also one of
10769       // the outputs, we currently can't deal with this.
10770       if (User->getOpcode() == ISD::SELECT) {
10771         if (User->getOperand(0) == PromOps[i])
10772           return SDValue();
10773       } else if (User->getOpcode() == ISD::SELECT_CC) {
10774         if (User->getOperand(0) == PromOps[i] ||
10775             User->getOperand(1) == PromOps[i])
10776           return SDValue();
10777       }
10778     }
10779   }
10780
10781   // Replace all inputs with the extension operand.
10782   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10783     // Constants may have users outside the cluster of to-be-promoted nodes,
10784     // and so we need to replace those as we do the promotions.
10785     if (isa<ConstantSDNode>(Inputs[i]))
10786       continue;
10787     else
10788       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
10789   }
10790
10791   std::list<HandleSDNode> PromOpHandles;
10792   for (auto &PromOp : PromOps)
10793     PromOpHandles.emplace_back(PromOp);
10794
10795   // Replace all operations (these are all the same, but have a different
10796   // (i1) return type). DAG.getNode will validate that the types of
10797   // a binary operator match, so go through the list in reverse so that
10798   // we've likely promoted both operands first. Any intermediate truncations or
10799   // extensions disappear.
10800   while (!PromOpHandles.empty()) {
10801     SDValue PromOp = PromOpHandles.back().getValue();
10802     PromOpHandles.pop_back();
10803
10804     if (PromOp.getOpcode() == ISD::TRUNCATE ||
10805         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
10806         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
10807         PromOp.getOpcode() == ISD::ANY_EXTEND) {
10808       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
10809           PromOp.getOperand(0).getValueType() != MVT::i1) {
10810         // The operand is not yet ready (see comment below).
10811         PromOpHandles.emplace_front(PromOp);
10812         continue;
10813       }
10814
10815       SDValue RepValue = PromOp.getOperand(0);
10816       if (isa<ConstantSDNode>(RepValue))
10817         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
10818
10819       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
10820       continue;
10821     }
10822
10823     unsigned C;
10824     switch (PromOp.getOpcode()) {
10825     default:             C = 0; break;
10826     case ISD::SELECT:    C = 1; break;
10827     case ISD::SELECT_CC: C = 2; break;
10828     }
10829
10830     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
10831          PromOp.getOperand(C).getValueType() != MVT::i1) ||
10832         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
10833          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
10834       // The to-be-promoted operands of this node have not yet been
10835       // promoted (this should be rare because we're going through the
10836       // list backward, but if one of the operands has several users in
10837       // this cluster of to-be-promoted nodes, it is possible).
10838       PromOpHandles.emplace_front(PromOp);
10839       continue;
10840     }
10841
10842     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10843                                 PromOp.getNode()->op_end());
10844
10845     // If there are any constant inputs, make sure they're replaced now.
10846     for (unsigned i = 0; i < 2; ++i)
10847       if (isa<ConstantSDNode>(Ops[C+i]))
10848         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
10849
10850     DAG.ReplaceAllUsesOfValueWith(PromOp,
10851       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
10852   }
10853
10854   // Now we're left with the initial truncation itself.
10855   if (N->getOpcode() == ISD::TRUNCATE)
10856     return N->getOperand(0);
10857
10858   // Otherwise, this is a comparison. The operands to be compared have just
10859   // changed type (to i1), but everything else is the same.
10860   return SDValue(N, 0);
10861 }
10862
10863 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
10864                                                   DAGCombinerInfo &DCI) const {
10865   SelectionDAG &DAG = DCI.DAG;
10866   SDLoc dl(N);
10867
10868   // If we're tracking CR bits, we need to be careful that we don't have:
10869   //   zext(binary-ops(trunc(x), trunc(y)))
10870   // or
10871   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
10872   // such that we're unnecessarily moving things into CR bits that can more
10873   // efficiently stay in GPRs. Note that if we're not certain that the high
10874   // bits are set as required by the final extension, we still may need to do
10875   // some masking to get the proper behavior.
10876
10877   // This same functionality is important on PPC64 when dealing with
10878   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
10879   // the return values of functions. Because it is so similar, it is handled
10880   // here as well.
10881
10882   if (N->getValueType(0) != MVT::i32 &&
10883       N->getValueType(0) != MVT::i64)
10884     return SDValue();
10885
10886   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
10887         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
10888     return SDValue();
10889
10890   if (N->getOperand(0).getOpcode() != ISD::AND &&
10891       N->getOperand(0).getOpcode() != ISD::OR  &&
10892       N->getOperand(0).getOpcode() != ISD::XOR &&
10893       N->getOperand(0).getOpcode() != ISD::SELECT &&
10894       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
10895     return SDValue();
10896
10897   SmallVector<SDValue, 4> Inputs;
10898   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
10899   SmallPtrSet<SDNode *, 16> Visited;
10900
10901   // Visit all inputs, collect all binary operations (and, or, xor and
10902   // select) that are all fed by truncations.
10903   while (!BinOps.empty()) {
10904     SDValue BinOp = BinOps.back();
10905     BinOps.pop_back();
10906
10907     if (!Visited.insert(BinOp.getNode()).second)
10908       continue;
10909
10910     PromOps.push_back(BinOp);
10911
10912     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10913       // The condition of the select is not promoted.
10914       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10915         continue;
10916       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10917         continue;
10918
10919       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10920           isa<ConstantSDNode>(BinOp.getOperand(i))) {
10921         Inputs.push_back(BinOp.getOperand(i));
10922       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10923                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10924                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10925                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10926                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
10927         BinOps.push_back(BinOp.getOperand(i));
10928       } else {
10929         // We have an input that is not a truncation or another binary
10930         // operation; we'll abort this transformation.
10931         return SDValue();
10932       }
10933     }
10934   }
10935
10936   // The operands of a select that must be truncated when the select is
10937   // promoted because the operand is actually part of the to-be-promoted set.
10938   DenseMap<SDNode *, EVT> SelectTruncOp[2];
10939
10940   // Make sure that this is a self-contained cluster of operations (which
10941   // is not quite the same thing as saying that everything has only one
10942   // use).
10943   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10944     if (isa<ConstantSDNode>(Inputs[i]))
10945       continue;
10946
10947     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10948                               UE = Inputs[i].getNode()->use_end();
10949          UI != UE; ++UI) {
10950       SDNode *User = *UI;
10951       if (User != N && !Visited.count(User))
10952         return SDValue();
10953
10954       // If we're going to promote the non-output-value operand(s) or SELECT or
10955       // SELECT_CC, record them for truncation.
10956       if (User->getOpcode() == ISD::SELECT) {
10957         if (User->getOperand(0) == Inputs[i])
10958           SelectTruncOp[0].insert(std::make_pair(User,
10959                                     User->getOperand(0).getValueType()));
10960       } else if (User->getOpcode() == ISD::SELECT_CC) {
10961         if (User->getOperand(0) == Inputs[i])
10962           SelectTruncOp[0].insert(std::make_pair(User,
10963                                     User->getOperand(0).getValueType()));
10964         if (User->getOperand(1) == Inputs[i])
10965           SelectTruncOp[1].insert(std::make_pair(User,
10966                                     User->getOperand(1).getValueType()));
10967       }
10968     }
10969   }
10970
10971   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10972     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10973                               UE = PromOps[i].getNode()->use_end();
10974          UI != UE; ++UI) {
10975       SDNode *User = *UI;
10976       if (User != N && !Visited.count(User))
10977         return SDValue();
10978
10979       // If we're going to promote the non-output-value operand(s) or SELECT or
10980       // SELECT_CC, record them for truncation.
10981       if (User->getOpcode() == ISD::SELECT) {
10982         if (User->getOperand(0) == PromOps[i])
10983           SelectTruncOp[0].insert(std::make_pair(User,
10984                                     User->getOperand(0).getValueType()));
10985       } else if (User->getOpcode() == ISD::SELECT_CC) {
10986         if (User->getOperand(0) == PromOps[i])
10987           SelectTruncOp[0].insert(std::make_pair(User,
10988                                     User->getOperand(0).getValueType()));
10989         if (User->getOperand(1) == PromOps[i])
10990           SelectTruncOp[1].insert(std::make_pair(User,
10991                                     User->getOperand(1).getValueType()));
10992       }
10993     }
10994   }
10995
10996   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
10997   bool ReallyNeedsExt = false;
10998   if (N->getOpcode() != ISD::ANY_EXTEND) {
10999     // If all of the inputs are not already sign/zero extended, then
11000     // we'll still need to do that at the end.
11001     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11002       if (isa<ConstantSDNode>(Inputs[i]))
11003         continue;
11004
11005       unsigned OpBits =
11006         Inputs[i].getOperand(0).getValueSizeInBits();
11007       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
11008
11009       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
11010            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
11011                                   APInt::getHighBitsSet(OpBits,
11012                                                         OpBits-PromBits))) ||
11013           (N->getOpcode() == ISD::SIGN_EXTEND &&
11014            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
11015              (OpBits-(PromBits-1)))) {
11016         ReallyNeedsExt = true;
11017         break;
11018       }
11019     }
11020   }
11021
11022   // Replace all inputs, either with the truncation operand, or a
11023   // truncation or extension to the final output type.
11024   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11025     // Constant inputs need to be replaced with the to-be-promoted nodes that
11026     // use them because they might have users outside of the cluster of
11027     // promoted nodes.
11028     if (isa<ConstantSDNode>(Inputs[i]))
11029       continue;
11030
11031     SDValue InSrc = Inputs[i].getOperand(0);
11032     if (Inputs[i].getValueType() == N->getValueType(0))
11033       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
11034     else if (N->getOpcode() == ISD::SIGN_EXTEND)
11035       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11036         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
11037     else if (N->getOpcode() == ISD::ZERO_EXTEND)
11038       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11039         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
11040     else
11041       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
11042         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
11043   }
11044
11045   std::list<HandleSDNode> PromOpHandles;
11046   for (auto &PromOp : PromOps)
11047     PromOpHandles.emplace_back(PromOp);
11048
11049   // Replace all operations (these are all the same, but have a different
11050   // (promoted) return type). DAG.getNode will validate that the types of
11051   // a binary operator match, so go through the list in reverse so that
11052   // we've likely promoted both operands first.
11053   while (!PromOpHandles.empty()) {
11054     SDValue PromOp = PromOpHandles.back().getValue();
11055     PromOpHandles.pop_back();
11056
11057     unsigned C;
11058     switch (PromOp.getOpcode()) {
11059     default:             C = 0; break;
11060     case ISD::SELECT:    C = 1; break;
11061     case ISD::SELECT_CC: C = 2; break;
11062     }
11063
11064     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
11065          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
11066         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
11067          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
11068       // The to-be-promoted operands of this node have not yet been
11069       // promoted (this should be rare because we're going through the
11070       // list backward, but if one of the operands has several users in
11071       // this cluster of to-be-promoted nodes, it is possible).
11072       PromOpHandles.emplace_front(PromOp);
11073       continue;
11074     }
11075
11076     // For SELECT and SELECT_CC nodes, we do a similar check for any
11077     // to-be-promoted comparison inputs.
11078     if (PromOp.getOpcode() == ISD::SELECT ||
11079         PromOp.getOpcode() == ISD::SELECT_CC) {
11080       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
11081            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
11082           (SelectTruncOp[1].count(PromOp.getNode()) &&
11083            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
11084         PromOpHandles.emplace_front(PromOp);
11085         continue;
11086       }
11087     }
11088
11089     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
11090                                 PromOp.getNode()->op_end());
11091
11092     // If this node has constant inputs, then they'll need to be promoted here.
11093     for (unsigned i = 0; i < 2; ++i) {
11094       if (!isa<ConstantSDNode>(Ops[C+i]))
11095         continue;
11096       if (Ops[C+i].getValueType() == N->getValueType(0))
11097         continue;
11098
11099       if (N->getOpcode() == ISD::SIGN_EXTEND)
11100         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11101       else if (N->getOpcode() == ISD::ZERO_EXTEND)
11102         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11103       else
11104         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
11105     }
11106
11107     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
11108     // truncate them again to the original value type.
11109     if (PromOp.getOpcode() == ISD::SELECT ||
11110         PromOp.getOpcode() == ISD::SELECT_CC) {
11111       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
11112       if (SI0 != SelectTruncOp[0].end())
11113         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
11114       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
11115       if (SI1 != SelectTruncOp[1].end())
11116         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
11117     }
11118
11119     DAG.ReplaceAllUsesOfValueWith(PromOp,
11120       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
11121   }
11122
11123   // Now we're left with the initial extension itself.
11124   if (!ReallyNeedsExt)
11125     return N->getOperand(0);
11126
11127   // To zero extend, just mask off everything except for the first bit (in the
11128   // i1 case).
11129   if (N->getOpcode() == ISD::ZERO_EXTEND)
11130     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
11131                        DAG.getConstant(APInt::getLowBitsSet(
11132                                          N->getValueSizeInBits(0), PromBits),
11133                                        dl, N->getValueType(0)));
11134
11135   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
11136          "Invalid extension type");
11137   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
11138   SDValue ShiftCst =
11139       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
11140   return DAG.getNode(
11141       ISD::SRA, dl, N->getValueType(0),
11142       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
11143       ShiftCst);
11144 }
11145
11146 /// \brief Reduces the number of fp-to-int conversion when building a vector.
11147 ///
11148 /// If this vector is built out of floating to integer conversions,
11149 /// transform it to a vector built out of floating point values followed by a
11150 /// single floating to integer conversion of the vector.
11151 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
11152 /// becomes (fptosi (build_vector ($A, $B, ...)))
11153 SDValue PPCTargetLowering::
11154 combineElementTruncationToVectorTruncation(SDNode *N,
11155                                            DAGCombinerInfo &DCI) const {
11156   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11157          "Should be called with a BUILD_VECTOR node");
11158
11159   SelectionDAG &DAG = DCI.DAG;
11160   SDLoc dl(N);
11161
11162   SDValue FirstInput = N->getOperand(0);
11163   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
11164          "The input operand must be an fp-to-int conversion.");
11165
11166   // This combine happens after legalization so the fp_to_[su]i nodes are
11167   // already converted to PPCSISD nodes.
11168   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
11169   if (FirstConversion == PPCISD::FCTIDZ ||
11170       FirstConversion == PPCISD::FCTIDUZ ||
11171       FirstConversion == PPCISD::FCTIWZ ||
11172       FirstConversion == PPCISD::FCTIWUZ) {
11173     bool IsSplat = true;
11174     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
11175       FirstConversion == PPCISD::FCTIWUZ;
11176     EVT SrcVT = FirstInput.getOperand(0).getValueType();
11177     SmallVector<SDValue, 4> Ops;
11178     EVT TargetVT = N->getValueType(0);
11179     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
11180       if (N->getOperand(i).getOpcode() != PPCISD::MFVSR)
11181         return SDValue();
11182       unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode();
11183       if (NextConversion != FirstConversion)
11184         return SDValue();
11185       if (N->getOperand(i) != FirstInput)
11186         IsSplat = false;
11187     }
11188
11189     // If this is a splat, we leave it as-is since there will be only a single
11190     // fp-to-int conversion followed by a splat of the integer. This is better
11191     // for 32-bit and smaller ints and neutral for 64-bit ints.
11192     if (IsSplat)
11193       return SDValue();
11194
11195     // Now that we know we have the right type of node, get its operands
11196     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
11197       SDValue In = N->getOperand(i).getOperand(0);
11198       // For 32-bit values, we need to add an FP_ROUND node.
11199       if (Is32Bit) {
11200         if (In.isUndef())
11201           Ops.push_back(DAG.getUNDEF(SrcVT));
11202         else {
11203           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
11204                                       MVT::f32, In.getOperand(0),
11205                                       DAG.getIntPtrConstant(1, dl));
11206           Ops.push_back(Trunc);
11207         }
11208       } else
11209         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
11210     }
11211
11212     unsigned Opcode;
11213     if (FirstConversion == PPCISD::FCTIDZ ||
11214         FirstConversion == PPCISD::FCTIWZ)
11215       Opcode = ISD::FP_TO_SINT;
11216     else
11217       Opcode = ISD::FP_TO_UINT;
11218
11219     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
11220     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
11221     return DAG.getNode(Opcode, dl, TargetVT, BV);
11222   }
11223   return SDValue();
11224 }
11225
11226 /// \brief Reduce the number of loads when building a vector.
11227 ///
11228 /// Building a vector out of multiple loads can be converted to a load
11229 /// of the vector type if the loads are consecutive. If the loads are
11230 /// consecutive but in descending order, a shuffle is added at the end
11231 /// to reorder the vector.
11232 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
11233   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11234          "Should be called with a BUILD_VECTOR node");
11235
11236   SDLoc dl(N);
11237   bool InputsAreConsecutiveLoads = true;
11238   bool InputsAreReverseConsecutive = true;
11239   unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
11240   SDValue FirstInput = N->getOperand(0);
11241   bool IsRoundOfExtLoad = false;
11242
11243   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
11244       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
11245     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
11246     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
11247   }
11248   // Not a build vector of (possibly fp_rounded) loads.
11249   if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD)
11250     return SDValue();
11251
11252   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
11253     // If any inputs are fp_round(extload), they all must be.
11254     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
11255       return SDValue();
11256
11257     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
11258       N->getOperand(i);
11259     if (NextInput.getOpcode() != ISD::LOAD)
11260       return SDValue();
11261
11262     SDValue PreviousInput =
11263       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
11264     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
11265     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
11266
11267     // If any inputs are fp_round(extload), they all must be.
11268     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
11269       return SDValue();
11270
11271     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
11272       InputsAreConsecutiveLoads = false;
11273     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
11274       InputsAreReverseConsecutive = false;
11275
11276     // Exit early if the loads are neither consecutive nor reverse consecutive.
11277     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
11278       return SDValue();
11279   }
11280
11281   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
11282          "The loads cannot be both consecutive and reverse consecutive.");
11283
11284   SDValue FirstLoadOp =
11285     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
11286   SDValue LastLoadOp =
11287     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
11288                        N->getOperand(N->getNumOperands()-1);
11289
11290   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
11291   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
11292   if (InputsAreConsecutiveLoads) {
11293     assert(LD1 && "Input needs to be a LoadSDNode.");
11294     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
11295                        LD1->getBasePtr(), LD1->getPointerInfo(),
11296                        LD1->getAlignment());
11297   }
11298   if (InputsAreReverseConsecutive) {
11299     assert(LDL && "Input needs to be a LoadSDNode.");
11300     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
11301                                LDL->getBasePtr(), LDL->getPointerInfo(),
11302                                LDL->getAlignment());
11303     SmallVector<int, 16> Ops;
11304     for (int i = N->getNumOperands() - 1; i >= 0; i--)
11305       Ops.push_back(i);
11306
11307     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
11308                                 DAG.getUNDEF(N->getValueType(0)), Ops);
11309   }
11310   return SDValue();
11311 }
11312
11313 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
11314                                                  DAGCombinerInfo &DCI) const {
11315   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
11316          "Should be called with a BUILD_VECTOR node");
11317
11318   SelectionDAG &DAG = DCI.DAG;
11319   SDLoc dl(N);
11320
11321   if (!Subtarget.hasVSX())
11322     return SDValue();
11323
11324   // The target independent DAG combiner will leave a build_vector of
11325   // float-to-int conversions intact. We can generate MUCH better code for
11326   // a float-to-int conversion of a vector of floats.
11327   SDValue FirstInput = N->getOperand(0);
11328   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
11329     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
11330     if (Reduced)
11331       return Reduced;
11332   }
11333
11334   // If we're building a vector out of consecutive loads, just load that
11335   // vector type.
11336   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
11337   if (Reduced)
11338     return Reduced;
11339
11340   if (N->getValueType(0) != MVT::v2f64)
11341     return SDValue();
11342
11343   // Looking for:
11344   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
11345   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
11346       FirstInput.getOpcode() != ISD::UINT_TO_FP)
11347     return SDValue();
11348   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
11349       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
11350     return SDValue();
11351   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
11352     return SDValue();
11353
11354   SDValue Ext1 = FirstInput.getOperand(0);
11355   SDValue Ext2 = N->getOperand(1).getOperand(0);
11356   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
11357      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11358     return SDValue();
11359
11360   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
11361   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
11362   if (!Ext1Op || !Ext2Op)
11363     return SDValue();
11364   if (Ext1.getValueType() != MVT::i32 ||
11365       Ext2.getValueType() != MVT::i32)
11366   if (Ext1.getOperand(0) != Ext2.getOperand(0))
11367     return SDValue();
11368
11369   int FirstElem = Ext1Op->getZExtValue();
11370   int SecondElem = Ext2Op->getZExtValue();
11371   int SubvecIdx;
11372   if (FirstElem == 0 && SecondElem == 1)
11373     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
11374   else if (FirstElem == 2 && SecondElem == 3)
11375     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
11376   else
11377     return SDValue();
11378
11379   SDValue SrcVec = Ext1.getOperand(0);
11380   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
11381     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
11382   return DAG.getNode(NodeType, dl, MVT::v2f64,
11383                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
11384 }
11385
11386 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
11387                                               DAGCombinerInfo &DCI) const {
11388   assert((N->getOpcode() == ISD::SINT_TO_FP ||
11389           N->getOpcode() == ISD::UINT_TO_FP) &&
11390          "Need an int -> FP conversion node here");
11391
11392   if (useSoftFloat() || !Subtarget.has64BitSupport())
11393     return SDValue();
11394
11395   SelectionDAG &DAG = DCI.DAG;
11396   SDLoc dl(N);
11397   SDValue Op(N, 0);
11398
11399   SDValue FirstOperand(Op.getOperand(0));
11400   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
11401     (FirstOperand.getValueType() == MVT::i8 ||
11402      FirstOperand.getValueType() == MVT::i16);
11403   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
11404     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
11405     bool DstDouble = Op.getValueType() == MVT::f64;
11406     unsigned ConvOp = Signed ?
11407       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
11408       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
11409     SDValue WidthConst =
11410       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
11411                             dl, false);
11412     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
11413     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
11414     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
11415                                          DAG.getVTList(MVT::f64, MVT::Other),
11416                                          Ops, MVT::i8, LDN->getMemOperand());
11417
11418     // For signed conversion, we need to sign-extend the value in the VSR
11419     if (Signed) {
11420       SDValue ExtOps[] = { Ld, WidthConst };
11421       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
11422       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
11423     } else
11424       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
11425   }
11426
11427   // Don't handle ppc_fp128 here or i1 conversions.
11428   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
11429     return SDValue();
11430   if (Op.getOperand(0).getValueType() == MVT::i1)
11431     return SDValue();
11432
11433   // For i32 intermediate values, unfortunately, the conversion functions
11434   // leave the upper 32 bits of the value are undefined. Within the set of
11435   // scalar instructions, we have no method for zero- or sign-extending the
11436   // value. Thus, we cannot handle i32 intermediate values here.
11437   if (Op.getOperand(0).getValueType() == MVT::i32)
11438     return SDValue();
11439
11440   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
11441          "UINT_TO_FP is supported only with FPCVT");
11442
11443   // If we have FCFIDS, then use it when converting to single-precision.
11444   // Otherwise, convert to double-precision and then round.
11445   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
11446                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
11447                                                             : PPCISD::FCFIDS)
11448                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
11449                                                             : PPCISD::FCFID);
11450   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
11451                   ? MVT::f32
11452                   : MVT::f64;
11453
11454   // If we're converting from a float, to an int, and back to a float again,
11455   // then we don't need the store/load pair at all.
11456   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
11457        Subtarget.hasFPCVT()) ||
11458       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
11459     SDValue Src = Op.getOperand(0).getOperand(0);
11460     if (Src.getValueType() == MVT::f32) {
11461       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
11462       DCI.AddToWorklist(Src.getNode());
11463     } else if (Src.getValueType() != MVT::f64) {
11464       // Make sure that we don't pick up a ppc_fp128 source value.
11465       return SDValue();
11466     }
11467
11468     unsigned FCTOp =
11469       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
11470                                                         PPCISD::FCTIDUZ;
11471
11472     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
11473     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
11474
11475     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
11476       FP = DAG.getNode(ISD::FP_ROUND, dl,
11477                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
11478       DCI.AddToWorklist(FP.getNode());
11479     }
11480
11481     return FP;
11482   }
11483
11484   return SDValue();
11485 }
11486
11487 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
11488 // builtins) into loads with swaps.
11489 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
11490                                               DAGCombinerInfo &DCI) const {
11491   SelectionDAG &DAG = DCI.DAG;
11492   SDLoc dl(N);
11493   SDValue Chain;
11494   SDValue Base;
11495   MachineMemOperand *MMO;
11496
11497   switch (N->getOpcode()) {
11498   default:
11499     llvm_unreachable("Unexpected opcode for little endian VSX load");
11500   case ISD::LOAD: {
11501     LoadSDNode *LD = cast<LoadSDNode>(N);
11502     Chain = LD->getChain();
11503     Base = LD->getBasePtr();
11504     MMO = LD->getMemOperand();
11505     // If the MMO suggests this isn't a load of a full vector, leave
11506     // things alone.  For a built-in, we have to make the change for
11507     // correctness, so if there is a size problem that will be a bug.
11508     if (MMO->getSize() < 16)
11509       return SDValue();
11510     break;
11511   }
11512   case ISD::INTRINSIC_W_CHAIN: {
11513     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
11514     Chain = Intrin->getChain();
11515     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
11516     // us what we want. Get operand 2 instead.
11517     Base = Intrin->getOperand(2);
11518     MMO = Intrin->getMemOperand();
11519     break;
11520   }
11521   }
11522
11523   MVT VecTy = N->getValueType(0).getSimpleVT();
11524
11525   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
11526   // aligned and the type is a vector with elements up to 4 bytes
11527   if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
11528       && VecTy.getScalarSizeInBits() <= 32 ) {
11529     return SDValue();
11530   }
11531
11532   SDValue LoadOps[] = { Chain, Base };
11533   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
11534                                          DAG.getVTList(MVT::v2f64, MVT::Other),
11535                                          LoadOps, MVT::v2f64, MMO);
11536
11537   DCI.AddToWorklist(Load.getNode());
11538   Chain = Load.getValue(1);
11539   SDValue Swap = DAG.getNode(
11540       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
11541   DCI.AddToWorklist(Swap.getNode());
11542
11543   // Add a bitcast if the resulting load type doesn't match v2f64.
11544   if (VecTy != MVT::v2f64) {
11545     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
11546     DCI.AddToWorklist(N.getNode());
11547     // Package {bitcast value, swap's chain} to match Load's shape.
11548     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
11549                        N, Swap.getValue(1));
11550   }
11551
11552   return Swap;
11553 }
11554
11555 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
11556 // builtins) into stores with swaps.
11557 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
11558                                                DAGCombinerInfo &DCI) const {
11559   SelectionDAG &DAG = DCI.DAG;
11560   SDLoc dl(N);
11561   SDValue Chain;
11562   SDValue Base;
11563   unsigned SrcOpnd;
11564   MachineMemOperand *MMO;
11565
11566   switch (N->getOpcode()) {
11567   default:
11568     llvm_unreachable("Unexpected opcode for little endian VSX store");
11569   case ISD::STORE: {
11570     StoreSDNode *ST = cast<StoreSDNode>(N);
11571     Chain = ST->getChain();
11572     Base = ST->getBasePtr();
11573     MMO = ST->getMemOperand();
11574     SrcOpnd = 1;
11575     // If the MMO suggests this isn't a store of a full vector, leave
11576     // things alone.  For a built-in, we have to make the change for
11577     // correctness, so if there is a size problem that will be a bug.
11578     if (MMO->getSize() < 16)
11579       return SDValue();
11580     break;
11581   }
11582   case ISD::INTRINSIC_VOID: {
11583     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
11584     Chain = Intrin->getChain();
11585     // Intrin->getBasePtr() oddly does not get what we want.
11586     Base = Intrin->getOperand(3);
11587     MMO = Intrin->getMemOperand();
11588     SrcOpnd = 2;
11589     break;
11590   }
11591   }
11592
11593   SDValue Src = N->getOperand(SrcOpnd);
11594   MVT VecTy = Src.getValueType().getSimpleVT();
11595
11596   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
11597   // aligned and the type is a vector with elements up to 4 bytes
11598   if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
11599       && VecTy.getScalarSizeInBits() <= 32 ) {
11600     return SDValue();
11601   }
11602
11603   // All stores are done as v2f64 and possible bit cast.
11604   if (VecTy != MVT::v2f64) {
11605     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
11606     DCI.AddToWorklist(Src.getNode());
11607   }
11608
11609   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
11610                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
11611   DCI.AddToWorklist(Swap.getNode());
11612   Chain = Swap.getValue(1);
11613   SDValue StoreOps[] = { Chain, Swap, Base };
11614   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
11615                                           DAG.getVTList(MVT::Other),
11616                                           StoreOps, VecTy, MMO);
11617   DCI.AddToWorklist(Store.getNode());
11618   return Store;
11619 }
11620
11621 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
11622                                              DAGCombinerInfo &DCI) const {
11623   SelectionDAG &DAG = DCI.DAG;
11624   SDLoc dl(N);
11625   switch (N->getOpcode()) {
11626   default: break;
11627   case ISD::SHL:
11628     return combineSHL(N, DCI);
11629   case ISD::SRA:
11630     return combineSRA(N, DCI);
11631   case ISD::SRL:
11632     return combineSRL(N, DCI);
11633   case PPCISD::SHL:
11634     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
11635         return N->getOperand(0);
11636     break;
11637   case PPCISD::SRL:
11638     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
11639         return N->getOperand(0);
11640     break;
11641   case PPCISD::SRA:
11642     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
11643       if (C->isNullValue() ||   //  0 >>s V -> 0.
11644           C->isAllOnesValue())    // -1 >>s V -> -1.
11645         return N->getOperand(0);
11646     }
11647     break;
11648   case ISD::SIGN_EXTEND:
11649   case ISD::ZERO_EXTEND:
11650   case ISD::ANY_EXTEND:
11651     return DAGCombineExtBoolTrunc(N, DCI);
11652   case ISD::TRUNCATE:
11653   case ISD::SETCC:
11654   case ISD::SELECT_CC:
11655     return DAGCombineTruncBoolExt(N, DCI);
11656   case ISD::SINT_TO_FP:
11657   case ISD::UINT_TO_FP:
11658     return combineFPToIntToFP(N, DCI);
11659   case ISD::STORE: {
11660     EVT Op1VT = N->getOperand(1).getValueType();
11661     bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
11662       (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16));
11663
11664     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
11665     if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
11666         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
11667         ValidTypeForStoreFltAsInt &&
11668         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
11669       SDValue Val = N->getOperand(1).getOperand(0);
11670       if (Val.getValueType() == MVT::f32) {
11671         Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
11672         DCI.AddToWorklist(Val.getNode());
11673       }
11674       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
11675       DCI.AddToWorklist(Val.getNode());
11676
11677       if (Op1VT == MVT::i32) {
11678         SDValue Ops[] = {
11679           N->getOperand(0), Val, N->getOperand(2),
11680           DAG.getValueType(N->getOperand(1).getValueType())
11681         };
11682
11683         Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
11684                 DAG.getVTList(MVT::Other), Ops,
11685                 cast<StoreSDNode>(N)->getMemoryVT(),
11686                 cast<StoreSDNode>(N)->getMemOperand());
11687       } else {
11688         unsigned WidthInBytes =
11689           N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2;
11690         SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
11691
11692         SDValue Ops[] = {
11693           N->getOperand(0), Val, N->getOperand(2), WidthConst,
11694           DAG.getValueType(N->getOperand(1).getValueType())
11695         };
11696         Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
11697                                       DAG.getVTList(MVT::Other), Ops,
11698                                       cast<StoreSDNode>(N)->getMemoryVT(),
11699                                       cast<StoreSDNode>(N)->getMemOperand());
11700       }
11701
11702       DCI.AddToWorklist(Val.getNode());
11703       return Val;
11704     }
11705
11706     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
11707     if (cast<StoreSDNode>(N)->isUnindexed() &&
11708         N->getOperand(1).getOpcode() == ISD::BSWAP &&
11709         N->getOperand(1).getNode()->hasOneUse() &&
11710         (N->getOperand(1).getValueType() == MVT::i32 ||
11711          N->getOperand(1).getValueType() == MVT::i16 ||
11712          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
11713           N->getOperand(1).getValueType() == MVT::i64))) {
11714       SDValue BSwapOp = N->getOperand(1).getOperand(0);
11715       // Do an any-extend to 32-bits if this is a half-word input.
11716       if (BSwapOp.getValueType() == MVT::i16)
11717         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
11718
11719       // If the type of BSWAP operand is wider than stored memory width
11720       // it need to be shifted to the right side before STBRX.
11721       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
11722       if (Op1VT.bitsGT(mVT)) {
11723         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
11724         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
11725                               DAG.getConstant(Shift, dl, MVT::i32));
11726         // Need to truncate if this is a bswap of i64 stored as i32/i16.
11727         if (Op1VT == MVT::i64)
11728           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
11729       }
11730
11731       SDValue Ops[] = {
11732         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
11733       };
11734       return
11735         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
11736                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
11737                                 cast<StoreSDNode>(N)->getMemOperand());
11738     }
11739
11740     // For little endian, VSX stores require generating xxswapd/lxvd2x.
11741     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
11742     EVT VT = N->getOperand(1).getValueType();
11743     if (VT.isSimple()) {
11744       MVT StoreVT = VT.getSimpleVT();
11745       if (Subtarget.needsSwapsForVSXMemOps() &&
11746           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
11747            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
11748         return expandVSXStoreForLE(N, DCI);
11749     }
11750     break;
11751   }
11752   case ISD::LOAD: {
11753     LoadSDNode *LD = cast<LoadSDNode>(N);
11754     EVT VT = LD->getValueType(0);
11755
11756     // For little endian, VSX loads require generating lxvd2x/xxswapd.
11757     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
11758     if (VT.isSimple()) {
11759       MVT LoadVT = VT.getSimpleVT();
11760       if (Subtarget.needsSwapsForVSXMemOps() &&
11761           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
11762            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
11763         return expandVSXLoadForLE(N, DCI);
11764     }
11765
11766     // We sometimes end up with a 64-bit integer load, from which we extract
11767     // two single-precision floating-point numbers. This happens with
11768     // std::complex<float>, and other similar structures, because of the way we
11769     // canonicalize structure copies. However, if we lack direct moves,
11770     // then the final bitcasts from the extracted integer values to the
11771     // floating-point numbers turn into store/load pairs. Even with direct moves,
11772     // just loading the two floating-point numbers is likely better.
11773     auto ReplaceTwoFloatLoad = [&]() {
11774       if (VT != MVT::i64)
11775         return false;
11776
11777       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
11778           LD->isVolatile())
11779         return false;
11780
11781       //  We're looking for a sequence like this:
11782       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
11783       //      t16: i64 = srl t13, Constant:i32<32>
11784       //    t17: i32 = truncate t16
11785       //  t18: f32 = bitcast t17
11786       //    t19: i32 = truncate t13
11787       //  t20: f32 = bitcast t19
11788
11789       if (!LD->hasNUsesOfValue(2, 0))
11790         return false;
11791
11792       auto UI = LD->use_begin();
11793       while (UI.getUse().getResNo() != 0) ++UI;
11794       SDNode *Trunc = *UI++;
11795       while (UI.getUse().getResNo() != 0) ++UI;
11796       SDNode *RightShift = *UI;
11797       if (Trunc->getOpcode() != ISD::TRUNCATE)
11798         std::swap(Trunc, RightShift);
11799
11800       if (Trunc->getOpcode() != ISD::TRUNCATE ||
11801           Trunc->getValueType(0) != MVT::i32 ||
11802           !Trunc->hasOneUse())
11803         return false;
11804       if (RightShift->getOpcode() != ISD::SRL ||
11805           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
11806           RightShift->getConstantOperandVal(1) != 32 ||
11807           !RightShift->hasOneUse())
11808         return false;
11809
11810       SDNode *Trunc2 = *RightShift->use_begin();
11811       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
11812           Trunc2->getValueType(0) != MVT::i32 ||
11813           !Trunc2->hasOneUse())
11814         return false;
11815
11816       SDNode *Bitcast = *Trunc->use_begin();
11817       SDNode *Bitcast2 = *Trunc2->use_begin();
11818
11819       if (Bitcast->getOpcode() != ISD::BITCAST ||
11820           Bitcast->getValueType(0) != MVT::f32)
11821         return false;
11822       if (Bitcast2->getOpcode() != ISD::BITCAST ||
11823           Bitcast2->getValueType(0) != MVT::f32)
11824         return false;
11825
11826       if (Subtarget.isLittleEndian())
11827         std::swap(Bitcast, Bitcast2);
11828
11829       // Bitcast has the second float (in memory-layout order) and Bitcast2
11830       // has the first one.
11831
11832       SDValue BasePtr = LD->getBasePtr();
11833       if (LD->isIndexed()) {
11834         assert(LD->getAddressingMode() == ISD::PRE_INC &&
11835                "Non-pre-inc AM on PPC?");
11836         BasePtr =
11837           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11838                       LD->getOffset());
11839       }
11840
11841       auto MMOFlags =
11842           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
11843       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
11844                                       LD->getPointerInfo(), LD->getAlignment(),
11845                                       MMOFlags, LD->getAAInfo());
11846       SDValue AddPtr =
11847         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
11848                     BasePtr, DAG.getIntPtrConstant(4, dl));
11849       SDValue FloatLoad2 = DAG.getLoad(
11850           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
11851           LD->getPointerInfo().getWithOffset(4),
11852           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
11853
11854       if (LD->isIndexed()) {
11855         // Note that DAGCombine should re-form any pre-increment load(s) from
11856         // what is produced here if that makes sense.
11857         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
11858       }
11859
11860       DCI.CombineTo(Bitcast2, FloatLoad);
11861       DCI.CombineTo(Bitcast, FloatLoad2);
11862
11863       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
11864                                     SDValue(FloatLoad2.getNode(), 1));
11865       return true;
11866     };
11867
11868     if (ReplaceTwoFloatLoad())
11869       return SDValue(N, 0);
11870
11871     EVT MemVT = LD->getMemoryVT();
11872     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
11873     unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
11874     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
11875     unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
11876     if (LD->isUnindexed() && VT.isVector() &&
11877         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
11878           // P8 and later hardware should just use LOAD.
11879           !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
11880                                        VT == MVT::v4i32 || VT == MVT::v4f32)) ||
11881          (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
11882           LD->getAlignment() >= ScalarABIAlignment)) &&
11883         LD->getAlignment() < ABIAlignment) {
11884       // This is a type-legal unaligned Altivec or QPX load.
11885       SDValue Chain = LD->getChain();
11886       SDValue Ptr = LD->getBasePtr();
11887       bool isLittleEndian = Subtarget.isLittleEndian();
11888
11889       // This implements the loading of unaligned vectors as described in
11890       // the venerable Apple Velocity Engine overview. Specifically:
11891       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
11892       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
11893       //
11894       // The general idea is to expand a sequence of one or more unaligned
11895       // loads into an alignment-based permutation-control instruction (lvsl
11896       // or lvsr), a series of regular vector loads (which always truncate
11897       // their input address to an aligned address), and a series of
11898       // permutations.  The results of these permutations are the requested
11899       // loaded values.  The trick is that the last "extra" load is not taken
11900       // from the address you might suspect (sizeof(vector) bytes after the
11901       // last requested load), but rather sizeof(vector) - 1 bytes after the
11902       // last requested vector. The point of this is to avoid a page fault if
11903       // the base address happened to be aligned. This works because if the
11904       // base address is aligned, then adding less than a full vector length
11905       // will cause the last vector in the sequence to be (re)loaded.
11906       // Otherwise, the next vector will be fetched as you might suspect was
11907       // necessary.
11908
11909       // We might be able to reuse the permutation generation from
11910       // a different base address offset from this one by an aligned amount.
11911       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
11912       // optimization later.
11913       Intrinsic::ID Intr, IntrLD, IntrPerm;
11914       MVT PermCntlTy, PermTy, LDTy;
11915       if (Subtarget.hasAltivec()) {
11916         Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
11917                                  Intrinsic::ppc_altivec_lvsl;
11918         IntrLD = Intrinsic::ppc_altivec_lvx;
11919         IntrPerm = Intrinsic::ppc_altivec_vperm;
11920         PermCntlTy = MVT::v16i8;
11921         PermTy = MVT::v4i32;
11922         LDTy = MVT::v4i32;
11923       } else {
11924         Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
11925                                        Intrinsic::ppc_qpx_qvlpcls;
11926         IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
11927                                        Intrinsic::ppc_qpx_qvlfs;
11928         IntrPerm = Intrinsic::ppc_qpx_qvfperm;
11929         PermCntlTy = MVT::v4f64;
11930         PermTy = MVT::v4f64;
11931         LDTy = MemVT.getSimpleVT();
11932       }
11933
11934       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
11935
11936       // Create the new MMO for the new base load. It is like the original MMO,
11937       // but represents an area in memory almost twice the vector size centered
11938       // on the original address. If the address is unaligned, we might start
11939       // reading up to (sizeof(vector)-1) bytes below the address of the
11940       // original unaligned load.
11941       MachineFunction &MF = DAG.getMachineFunction();
11942       MachineMemOperand *BaseMMO =
11943         MF.getMachineMemOperand(LD->getMemOperand(),
11944                                 -(long)MemVT.getStoreSize()+1,
11945                                 2*MemVT.getStoreSize()-1);
11946
11947       // Create the new base load.
11948       SDValue LDXIntID =
11949           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
11950       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
11951       SDValue BaseLoad =
11952         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
11953                                 DAG.getVTList(PermTy, MVT::Other),
11954                                 BaseLoadOps, LDTy, BaseMMO);
11955
11956       // Note that the value of IncOffset (which is provided to the next
11957       // load's pointer info offset value, and thus used to calculate the
11958       // alignment), and the value of IncValue (which is actually used to
11959       // increment the pointer value) are different! This is because we
11960       // require the next load to appear to be aligned, even though it
11961       // is actually offset from the base pointer by a lesser amount.
11962       int IncOffset = VT.getSizeInBits() / 8;
11963       int IncValue = IncOffset;
11964
11965       // Walk (both up and down) the chain looking for another load at the real
11966       // (aligned) offset (the alignment of the other load does not matter in
11967       // this case). If found, then do not use the offset reduction trick, as
11968       // that will prevent the loads from being later combined (as they would
11969       // otherwise be duplicates).
11970       if (!findConsecutiveLoad(LD, DAG))
11971         --IncValue;
11972
11973       SDValue Increment =
11974           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
11975       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
11976
11977       MachineMemOperand *ExtraMMO =
11978         MF.getMachineMemOperand(LD->getMemOperand(),
11979                                 1, 2*MemVT.getStoreSize()-1);
11980       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
11981       SDValue ExtraLoad =
11982         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
11983                                 DAG.getVTList(PermTy, MVT::Other),
11984                                 ExtraLoadOps, LDTy, ExtraMMO);
11985
11986       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
11987         BaseLoad.getValue(1), ExtraLoad.getValue(1));
11988
11989       // Because vperm has a big-endian bias, we must reverse the order
11990       // of the input vectors and complement the permute control vector
11991       // when generating little endian code.  We have already handled the
11992       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
11993       // and ExtraLoad here.
11994       SDValue Perm;
11995       if (isLittleEndian)
11996         Perm = BuildIntrinsicOp(IntrPerm,
11997                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
11998       else
11999         Perm = BuildIntrinsicOp(IntrPerm,
12000                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
12001
12002       if (VT != PermTy)
12003         Perm = Subtarget.hasAltivec() ?
12004                  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
12005                  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
12006                                DAG.getTargetConstant(1, dl, MVT::i64));
12007                                // second argument is 1 because this rounding
12008                                // is always exact.
12009
12010       // The output of the permutation is our loaded result, the TokenFactor is
12011       // our new chain.
12012       DCI.CombineTo(N, Perm, TF);
12013       return SDValue(N, 0);
12014     }
12015     }
12016     break;
12017     case ISD::INTRINSIC_WO_CHAIN: {
12018       bool isLittleEndian = Subtarget.isLittleEndian();
12019       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
12020       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
12021                                            : Intrinsic::ppc_altivec_lvsl);
12022       if ((IID == Intr ||
12023            IID == Intrinsic::ppc_qpx_qvlpcld  ||
12024            IID == Intrinsic::ppc_qpx_qvlpcls) &&
12025         N->getOperand(1)->getOpcode() == ISD::ADD) {
12026         SDValue Add = N->getOperand(1);
12027
12028         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
12029                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
12030
12031         if (DAG.MaskedValueIsZero(Add->getOperand(1),
12032                                   APInt::getAllOnesValue(Bits /* alignment */)
12033                                       .zext(Add.getScalarValueSizeInBits()))) {
12034           SDNode *BasePtr = Add->getOperand(0).getNode();
12035           for (SDNode::use_iterator UI = BasePtr->use_begin(),
12036                                     UE = BasePtr->use_end();
12037                UI != UE; ++UI) {
12038             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12039                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
12040               // We've found another LVSL/LVSR, and this address is an aligned
12041               // multiple of that one. The results will be the same, so use the
12042               // one we've just found instead.
12043
12044               return SDValue(*UI, 0);
12045             }
12046           }
12047         }
12048
12049         if (isa<ConstantSDNode>(Add->getOperand(1))) {
12050           SDNode *BasePtr = Add->getOperand(0).getNode();
12051           for (SDNode::use_iterator UI = BasePtr->use_begin(),
12052                UE = BasePtr->use_end(); UI != UE; ++UI) {
12053             if (UI->getOpcode() == ISD::ADD &&
12054                 isa<ConstantSDNode>(UI->getOperand(1)) &&
12055                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
12056                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
12057                 (1ULL << Bits) == 0) {
12058               SDNode *OtherAdd = *UI;
12059               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
12060                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
12061                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12062                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
12063                   return SDValue(*VI, 0);
12064                 }
12065               }
12066             }
12067           }
12068         }
12069       }
12070     }
12071
12072     break;
12073   case ISD::INTRINSIC_W_CHAIN:
12074     // For little endian, VSX loads require generating lxvd2x/xxswapd.
12075     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
12076     if (Subtarget.needsSwapsForVSXMemOps()) {
12077       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12078       default:
12079         break;
12080       case Intrinsic::ppc_vsx_lxvw4x:
12081       case Intrinsic::ppc_vsx_lxvd2x:
12082         return expandVSXLoadForLE(N, DCI);
12083       }
12084     }
12085     break;
12086   case ISD::INTRINSIC_VOID:
12087     // For little endian, VSX stores require generating xxswapd/stxvd2x.
12088     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
12089     if (Subtarget.needsSwapsForVSXMemOps()) {
12090       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12091       default:
12092         break;
12093       case Intrinsic::ppc_vsx_stxvw4x:
12094       case Intrinsic::ppc_vsx_stxvd2x:
12095         return expandVSXStoreForLE(N, DCI);
12096       }
12097     }
12098     break;
12099   case ISD::BSWAP:
12100     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
12101     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
12102         N->getOperand(0).hasOneUse() &&
12103         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
12104          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
12105           N->getValueType(0) == MVT::i64))) {
12106       SDValue Load = N->getOperand(0);
12107       LoadSDNode *LD = cast<LoadSDNode>(Load);
12108       // Create the byte-swapping load.
12109       SDValue Ops[] = {
12110         LD->getChain(),    // Chain
12111         LD->getBasePtr(),  // Ptr
12112         DAG.getValueType(N->getValueType(0)) // VT
12113       };
12114       SDValue BSLoad =
12115         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
12116                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
12117                                               MVT::i64 : MVT::i32, MVT::Other),
12118                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
12119
12120       // If this is an i16 load, insert the truncate.
12121       SDValue ResVal = BSLoad;
12122       if (N->getValueType(0) == MVT::i16)
12123         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
12124
12125       // First, combine the bswap away.  This makes the value produced by the
12126       // load dead.
12127       DCI.CombineTo(N, ResVal);
12128
12129       // Next, combine the load away, we give it a bogus result value but a real
12130       // chain result.  The result value is dead because the bswap is dead.
12131       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
12132
12133       // Return N so it doesn't get rechecked!
12134       return SDValue(N, 0);
12135     }
12136     break;
12137   case PPCISD::VCMP:
12138     // If a VCMPo node already exists with exactly the same operands as this
12139     // node, use its result instead of this node (VCMPo computes both a CR6 and
12140     // a normal output).
12141     //
12142     if (!N->getOperand(0).hasOneUse() &&
12143         !N->getOperand(1).hasOneUse() &&
12144         !N->getOperand(2).hasOneUse()) {
12145
12146       // Scan all of the users of the LHS, looking for VCMPo's that match.
12147       SDNode *VCMPoNode = nullptr;
12148
12149       SDNode *LHSN = N->getOperand(0).getNode();
12150       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
12151            UI != E; ++UI)
12152         if (UI->getOpcode() == PPCISD::VCMPo &&
12153             UI->getOperand(1) == N->getOperand(1) &&
12154             UI->getOperand(2) == N->getOperand(2) &&
12155             UI->getOperand(0) == N->getOperand(0)) {
12156           VCMPoNode = *UI;
12157           break;
12158         }
12159
12160       // If there is no VCMPo node, or if the flag value has a single use, don't
12161       // transform this.
12162       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
12163         break;
12164
12165       // Look at the (necessarily single) use of the flag value.  If it has a
12166       // chain, this transformation is more complex.  Note that multiple things
12167       // could use the value result, which we should ignore.
12168       SDNode *FlagUser = nullptr;
12169       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
12170            FlagUser == nullptr; ++UI) {
12171         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
12172         SDNode *User = *UI;
12173         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
12174           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
12175             FlagUser = User;
12176             break;
12177           }
12178         }
12179       }
12180
12181       // If the user is a MFOCRF instruction, we know this is safe.
12182       // Otherwise we give up for right now.
12183       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
12184         return SDValue(VCMPoNode, 0);
12185     }
12186     break;
12187   case ISD::BRCOND: {
12188     SDValue Cond = N->getOperand(1);
12189     SDValue Target = N->getOperand(2);
12190
12191     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12192         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
12193           Intrinsic::ppc_is_decremented_ctr_nonzero) {
12194
12195       // We now need to make the intrinsic dead (it cannot be instruction
12196       // selected).
12197       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
12198       assert(Cond.getNode()->hasOneUse() &&
12199              "Counter decrement has more than one use");
12200
12201       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
12202                          N->getOperand(0), Target);
12203     }
12204   }
12205   break;
12206   case ISD::BR_CC: {
12207     // If this is a branch on an altivec predicate comparison, lower this so
12208     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
12209     // lowering is done pre-legalize, because the legalizer lowers the predicate
12210     // compare down to code that is difficult to reassemble.
12211     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
12212     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
12213
12214     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
12215     // value. If so, pass-through the AND to get to the intrinsic.
12216     if (LHS.getOpcode() == ISD::AND &&
12217         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12218         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
12219           Intrinsic::ppc_is_decremented_ctr_nonzero &&
12220         isa<ConstantSDNode>(LHS.getOperand(1)) &&
12221         !isNullConstant(LHS.getOperand(1)))
12222       LHS = LHS.getOperand(0);
12223
12224     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
12225         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
12226           Intrinsic::ppc_is_decremented_ctr_nonzero &&
12227         isa<ConstantSDNode>(RHS)) {
12228       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
12229              "Counter decrement comparison is not EQ or NE");
12230
12231       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
12232       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
12233                     (CC == ISD::SETNE && !Val);
12234
12235       // We now need to make the intrinsic dead (it cannot be instruction
12236       // selected).
12237       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
12238       assert(LHS.getNode()->hasOneUse() &&
12239              "Counter decrement has more than one use");
12240
12241       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
12242                          N->getOperand(0), N->getOperand(4));
12243     }
12244
12245     int CompareOpc;
12246     bool isDot;
12247
12248     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
12249         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
12250         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
12251       assert(isDot && "Can't compare against a vector result!");
12252
12253       // If this is a comparison against something other than 0/1, then we know
12254       // that the condition is never/always true.
12255       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
12256       if (Val != 0 && Val != 1) {
12257         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
12258           return N->getOperand(0);
12259         // Always !=, turn it into an unconditional branch.
12260         return DAG.getNode(ISD::BR, dl, MVT::Other,
12261                            N->getOperand(0), N->getOperand(4));
12262       }
12263
12264       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
12265
12266       // Create the PPCISD altivec 'dot' comparison node.
12267       SDValue Ops[] = {
12268         LHS.getOperand(2),  // LHS of compare
12269         LHS.getOperand(3),  // RHS of compare
12270         DAG.getConstant(CompareOpc, dl, MVT::i32)
12271       };
12272       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
12273       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
12274
12275       // Unpack the result based on how the target uses it.
12276       PPC::Predicate CompOpc;
12277       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
12278       default:  // Can't happen, don't crash on invalid number though.
12279       case 0:   // Branch on the value of the EQ bit of CR6.
12280         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
12281         break;
12282       case 1:   // Branch on the inverted value of the EQ bit of CR6.
12283         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
12284         break;
12285       case 2:   // Branch on the value of the LT bit of CR6.
12286         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
12287         break;
12288       case 3:   // Branch on the inverted value of the LT bit of CR6.
12289         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
12290         break;
12291       }
12292
12293       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
12294                          DAG.getConstant(CompOpc, dl, MVT::i32),
12295                          DAG.getRegister(PPC::CR6, MVT::i32),
12296                          N->getOperand(4), CompNode.getValue(1));
12297     }
12298     break;
12299   }
12300   case ISD::BUILD_VECTOR:
12301     return DAGCombineBuildVector(N, DCI);
12302   }
12303
12304   return SDValue();
12305 }
12306
12307 SDValue
12308 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
12309                                   SelectionDAG &DAG,
12310                                   std::vector<SDNode *> *Created) const {
12311   // fold (sdiv X, pow2)
12312   EVT VT = N->getValueType(0);
12313   if (VT == MVT::i64 && !Subtarget.isPPC64())
12314     return SDValue();
12315   if ((VT != MVT::i32 && VT != MVT::i64) ||
12316       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
12317     return SDValue();
12318
12319   SDLoc DL(N);
12320   SDValue N0 = N->getOperand(0);
12321
12322   bool IsNegPow2 = (-Divisor).isPowerOf2();
12323   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
12324   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
12325
12326   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
12327   if (Created)
12328     Created->push_back(Op.getNode());
12329
12330   if (IsNegPow2) {
12331     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
12332     if (Created)
12333       Created->push_back(Op.getNode());
12334   }
12335
12336   return Op;
12337 }
12338
12339 //===----------------------------------------------------------------------===//
12340 // Inline Assembly Support
12341 //===----------------------------------------------------------------------===//
12342
12343 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
12344                                                       KnownBits &Known,
12345                                                       const APInt &DemandedElts,
12346                                                       const SelectionDAG &DAG,
12347                                                       unsigned Depth) const {
12348   Known.resetAll();
12349   switch (Op.getOpcode()) {
12350   default: break;
12351   case PPCISD::LBRX: {
12352     // lhbrx is known to have the top bits cleared out.
12353     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
12354       Known.Zero = 0xFFFF0000;
12355     break;
12356   }
12357   case ISD::INTRINSIC_WO_CHAIN: {
12358     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
12359     default: break;
12360     case Intrinsic::ppc_altivec_vcmpbfp_p:
12361     case Intrinsic::ppc_altivec_vcmpeqfp_p:
12362     case Intrinsic::ppc_altivec_vcmpequb_p:
12363     case Intrinsic::ppc_altivec_vcmpequh_p:
12364     case Intrinsic::ppc_altivec_vcmpequw_p:
12365     case Intrinsic::ppc_altivec_vcmpequd_p:
12366     case Intrinsic::ppc_altivec_vcmpgefp_p:
12367     case Intrinsic::ppc_altivec_vcmpgtfp_p:
12368     case Intrinsic::ppc_altivec_vcmpgtsb_p:
12369     case Intrinsic::ppc_altivec_vcmpgtsh_p:
12370     case Intrinsic::ppc_altivec_vcmpgtsw_p:
12371     case Intrinsic::ppc_altivec_vcmpgtsd_p:
12372     case Intrinsic::ppc_altivec_vcmpgtub_p:
12373     case Intrinsic::ppc_altivec_vcmpgtuh_p:
12374     case Intrinsic::ppc_altivec_vcmpgtuw_p:
12375     case Intrinsic::ppc_altivec_vcmpgtud_p:
12376       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
12377       break;
12378     }
12379   }
12380   }
12381 }
12382
12383 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
12384   switch (Subtarget.getDarwinDirective()) {
12385   default: break;
12386   case PPC::DIR_970:
12387   case PPC::DIR_PWR4:
12388   case PPC::DIR_PWR5:
12389   case PPC::DIR_PWR5X:
12390   case PPC::DIR_PWR6:
12391   case PPC::DIR_PWR6X:
12392   case PPC::DIR_PWR7:
12393   case PPC::DIR_PWR8:
12394   case PPC::DIR_PWR9: {
12395     if (!ML)
12396       break;
12397
12398     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12399
12400     // For small loops (between 5 and 8 instructions), align to a 32-byte
12401     // boundary so that the entire loop fits in one instruction-cache line.
12402     uint64_t LoopSize = 0;
12403     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
12404       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
12405         LoopSize += TII->getInstSizeInBytes(*J);
12406         if (LoopSize > 32)
12407           break;
12408       }
12409
12410     if (LoopSize > 16 && LoopSize <= 32)
12411       return 5;
12412
12413     break;
12414   }
12415   }
12416
12417   return TargetLowering::getPrefLoopAlignment(ML);
12418 }
12419
12420 /// getConstraintType - Given a constraint, return the type of
12421 /// constraint it is for this target.
12422 PPCTargetLowering::ConstraintType
12423 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
12424   if (Constraint.size() == 1) {
12425     switch (Constraint[0]) {
12426     default: break;
12427     case 'b':
12428     case 'r':
12429     case 'f':
12430     case 'd':
12431     case 'v':
12432     case 'y':
12433       return C_RegisterClass;
12434     case 'Z':
12435       // FIXME: While Z does indicate a memory constraint, it specifically
12436       // indicates an r+r address (used in conjunction with the 'y' modifier
12437       // in the replacement string). Currently, we're forcing the base
12438       // register to be r0 in the asm printer (which is interpreted as zero)
12439       // and forming the complete address in the second register. This is
12440       // suboptimal.
12441       return C_Memory;
12442     }
12443   } else if (Constraint == "wc") { // individual CR bits.
12444     return C_RegisterClass;
12445   } else if (Constraint == "wa" || Constraint == "wd" ||
12446              Constraint == "wf" || Constraint == "ws") {
12447     return C_RegisterClass; // VSX registers.
12448   }
12449   return TargetLowering::getConstraintType(Constraint);
12450 }
12451
12452 /// Examine constraint type and operand type and determine a weight value.
12453 /// This object must already have been set up with the operand type
12454 /// and the current alternative constraint selected.
12455 TargetLowering::ConstraintWeight
12456 PPCTargetLowering::getSingleConstraintMatchWeight(
12457     AsmOperandInfo &info, const char *constraint) const {
12458   ConstraintWeight weight = CW_Invalid;
12459   Value *CallOperandVal = info.CallOperandVal;
12460     // If we don't have a value, we can't do a match,
12461     // but allow it at the lowest weight.
12462   if (!CallOperandVal)
12463     return CW_Default;
12464   Type *type = CallOperandVal->getType();
12465
12466   // Look at the constraint type.
12467   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
12468     return CW_Register; // an individual CR bit.
12469   else if ((StringRef(constraint) == "wa" ||
12470             StringRef(constraint) == "wd" ||
12471             StringRef(constraint) == "wf") &&
12472            type->isVectorTy())
12473     return CW_Register;
12474   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
12475     return CW_Register;
12476
12477   switch (*constraint) {
12478   default:
12479     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
12480     break;
12481   case 'b':
12482     if (type->isIntegerTy())
12483       weight = CW_Register;
12484     break;
12485   case 'f':
12486     if (type->isFloatTy())
12487       weight = CW_Register;
12488     break;
12489   case 'd':
12490     if (type->isDoubleTy())
12491       weight = CW_Register;
12492     break;
12493   case 'v':
12494     if (type->isVectorTy())
12495       weight = CW_Register;
12496     break;
12497   case 'y':
12498     weight = CW_Register;
12499     break;
12500   case 'Z':
12501     weight = CW_Memory;
12502     break;
12503   }
12504   return weight;
12505 }
12506
12507 std::pair<unsigned, const TargetRegisterClass *>
12508 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
12509                                                 StringRef Constraint,
12510                                                 MVT VT) const {
12511   if (Constraint.size() == 1) {
12512     // GCC RS6000 Constraint Letters
12513     switch (Constraint[0]) {
12514     case 'b':   // R1-R31
12515       if (VT == MVT::i64 && Subtarget.isPPC64())
12516         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
12517       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
12518     case 'r':   // R0-R31
12519       if (VT == MVT::i64 && Subtarget.isPPC64())
12520         return std::make_pair(0U, &PPC::G8RCRegClass);
12521       return std::make_pair(0U, &PPC::GPRCRegClass);
12522     // 'd' and 'f' constraints are both defined to be "the floating point
12523     // registers", where one is for 32-bit and the other for 64-bit. We don't
12524     // really care overly much here so just give them all the same reg classes.
12525     case 'd':
12526     case 'f':
12527       if (VT == MVT::f32 || VT == MVT::i32)
12528         return std::make_pair(0U, &PPC::F4RCRegClass);
12529       if (VT == MVT::f64 || VT == MVT::i64)
12530         return std::make_pair(0U, &PPC::F8RCRegClass);
12531       if (VT == MVT::v4f64 && Subtarget.hasQPX())
12532         return std::make_pair(0U, &PPC::QFRCRegClass);
12533       if (VT == MVT::v4f32 && Subtarget.hasQPX())
12534         return std::make_pair(0U, &PPC::QSRCRegClass);
12535       break;
12536     case 'v':
12537       if (VT == MVT::v4f64 && Subtarget.hasQPX())
12538         return std::make_pair(0U, &PPC::QFRCRegClass);
12539       if (VT == MVT::v4f32 && Subtarget.hasQPX())
12540         return std::make_pair(0U, &PPC::QSRCRegClass);
12541       if (Subtarget.hasAltivec())
12542         return std::make_pair(0U, &PPC::VRRCRegClass);
12543     case 'y':   // crrc
12544       return std::make_pair(0U, &PPC::CRRCRegClass);
12545     }
12546   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
12547     // An individual CR bit.
12548     return std::make_pair(0U, &PPC::CRBITRCRegClass);
12549   } else if ((Constraint == "wa" || Constraint == "wd" ||
12550              Constraint == "wf") && Subtarget.hasVSX()) {
12551     return std::make_pair(0U, &PPC::VSRCRegClass);
12552   } else if (Constraint == "ws" && Subtarget.hasVSX()) {
12553     if (VT == MVT::f32 && Subtarget.hasP8Vector())
12554       return std::make_pair(0U, &PPC::VSSRCRegClass);
12555     else
12556       return std::make_pair(0U, &PPC::VSFRCRegClass);
12557   }
12558
12559   std::pair<unsigned, const TargetRegisterClass *> R =
12560       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
12561
12562   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
12563   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
12564   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
12565   // register.
12566   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
12567   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
12568   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
12569       PPC::GPRCRegClass.contains(R.first))
12570     return std::make_pair(TRI->getMatchingSuperReg(R.first,
12571                             PPC::sub_32, &PPC::G8RCRegClass),
12572                           &PPC::G8RCRegClass);
12573
12574   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
12575   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
12576     R.first = PPC::CR0;
12577     R.second = &PPC::CRRCRegClass;
12578   }
12579
12580   return R;
12581 }
12582
12583 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
12584 /// vector.  If it is invalid, don't add anything to Ops.
12585 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
12586                                                      std::string &Constraint,
12587                                                      std::vector<SDValue>&Ops,
12588                                                      SelectionDAG &DAG) const {
12589   SDValue Result;
12590
12591   // Only support length 1 constraints.
12592   if (Constraint.length() > 1) return;
12593
12594   char Letter = Constraint[0];
12595   switch (Letter) {
12596   default: break;
12597   case 'I':
12598   case 'J':
12599   case 'K':
12600   case 'L':
12601   case 'M':
12602   case 'N':
12603   case 'O':
12604   case 'P': {
12605     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
12606     if (!CST) return; // Must be an immediate to match.
12607     SDLoc dl(Op);
12608     int64_t Value = CST->getSExtValue();
12609     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
12610                          // numbers are printed as such.
12611     switch (Letter) {
12612     default: llvm_unreachable("Unknown constraint letter!");
12613     case 'I':  // "I" is a signed 16-bit constant.
12614       if (isInt<16>(Value))
12615         Result = DAG.getTargetConstant(Value, dl, TCVT);
12616       break;
12617     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
12618       if (isShiftedUInt<16, 16>(Value))
12619         Result = DAG.getTargetConstant(Value, dl, TCVT);
12620       break;
12621     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
12622       if (isShiftedInt<16, 16>(Value))
12623         Result = DAG.getTargetConstant(Value, dl, TCVT);
12624       break;
12625     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
12626       if (isUInt<16>(Value))
12627         Result = DAG.getTargetConstant(Value, dl, TCVT);
12628       break;
12629     case 'M':  // "M" is a constant that is greater than 31.
12630       if (Value > 31)
12631         Result = DAG.getTargetConstant(Value, dl, TCVT);
12632       break;
12633     case 'N':  // "N" is a positive constant that is an exact power of two.
12634       if (Value > 0 && isPowerOf2_64(Value))
12635         Result = DAG.getTargetConstant(Value, dl, TCVT);
12636       break;
12637     case 'O':  // "O" is the constant zero.
12638       if (Value == 0)
12639         Result = DAG.getTargetConstant(Value, dl, TCVT);
12640       break;
12641     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
12642       if (isInt<16>(-Value))
12643         Result = DAG.getTargetConstant(Value, dl, TCVT);
12644       break;
12645     }
12646     break;
12647   }
12648   }
12649
12650   if (Result.getNode()) {
12651     Ops.push_back(Result);
12652     return;
12653   }
12654
12655   // Handle standard constraint letters.
12656   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
12657 }
12658
12659 // isLegalAddressingMode - Return true if the addressing mode represented
12660 // by AM is legal for this target, for a load/store of the specified type.
12661 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
12662                                               const AddrMode &AM, Type *Ty,
12663                                               unsigned AS) const {
12664   // PPC does not allow r+i addressing modes for vectors!
12665   if (Ty->isVectorTy() && AM.BaseOffs != 0)
12666     return false;
12667
12668   // PPC allows a sign-extended 16-bit immediate field.
12669   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
12670     return false;
12671
12672   // No global is ever allowed as a base.
12673   if (AM.BaseGV)
12674     return false;
12675
12676   // PPC only support r+r,
12677   switch (AM.Scale) {
12678   case 0:  // "r+i" or just "i", depending on HasBaseReg.
12679     break;
12680   case 1:
12681     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
12682       return false;
12683     // Otherwise we have r+r or r+i.
12684     break;
12685   case 2:
12686     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
12687       return false;
12688     // Allow 2*r as r+r.
12689     break;
12690   default:
12691     // No other scales are supported.
12692     return false;
12693   }
12694
12695   return true;
12696 }
12697
12698 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
12699                                            SelectionDAG &DAG) const {
12700   MachineFunction &MF = DAG.getMachineFunction();
12701   MachineFrameInfo &MFI = MF.getFrameInfo();
12702   MFI.setReturnAddressIsTaken(true);
12703
12704   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
12705     return SDValue();
12706
12707   SDLoc dl(Op);
12708   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12709
12710   // Make sure the function does not optimize away the store of the RA to
12711   // the stack.
12712   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
12713   FuncInfo->setLRStoreRequired();
12714   bool isPPC64 = Subtarget.isPPC64();
12715   auto PtrVT = getPointerTy(MF.getDataLayout());
12716
12717   if (Depth > 0) {
12718     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
12719     SDValue Offset =
12720         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
12721                         isPPC64 ? MVT::i64 : MVT::i32);
12722     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
12723                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
12724                        MachinePointerInfo());
12725   }
12726
12727   // Just load the return address off the stack.
12728   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
12729   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
12730                      MachinePointerInfo());
12731 }
12732
12733 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
12734                                           SelectionDAG &DAG) const {
12735   SDLoc dl(Op);
12736   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
12737
12738   MachineFunction &MF = DAG.getMachineFunction();
12739   MachineFrameInfo &MFI = MF.getFrameInfo();
12740   MFI.setFrameAddressIsTaken(true);
12741
12742   EVT PtrVT = getPointerTy(MF.getDataLayout());
12743   bool isPPC64 = PtrVT == MVT::i64;
12744
12745   // Naked functions never have a frame pointer, and so we use r1. For all
12746   // other functions, this decision must be delayed until during PEI.
12747   unsigned FrameReg;
12748   if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
12749     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
12750   else
12751     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
12752
12753   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
12754                                          PtrVT);
12755   while (Depth--)
12756     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
12757                             FrameAddr, MachinePointerInfo());
12758   return FrameAddr;
12759 }
12760
12761 // FIXME? Maybe this could be a TableGen attribute on some registers and
12762 // this table could be generated automatically from RegInfo.
12763 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
12764                                               SelectionDAG &DAG) const {
12765   bool isPPC64 = Subtarget.isPPC64();
12766   bool isDarwinABI = Subtarget.isDarwinABI();
12767
12768   if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
12769       (!isPPC64 && VT != MVT::i32))
12770     report_fatal_error("Invalid register global variable type");
12771
12772   bool is64Bit = isPPC64 && VT == MVT::i64;
12773   unsigned Reg = StringSwitch<unsigned>(RegName)
12774                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
12775                    .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
12776                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
12777                                   (is64Bit ? PPC::X13 : PPC::R13))
12778                    .Default(0);
12779
12780   if (Reg)
12781     return Reg;
12782   report_fatal_error("Invalid register name global variable");
12783 }
12784
12785 bool
12786 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
12787   // The PowerPC target isn't yet aware of offsets.
12788   return false;
12789 }
12790
12791 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12792                                            const CallInst &I,
12793                                            unsigned Intrinsic) const {
12794   switch (Intrinsic) {
12795   case Intrinsic::ppc_qpx_qvlfd:
12796   case Intrinsic::ppc_qpx_qvlfs:
12797   case Intrinsic::ppc_qpx_qvlfcd:
12798   case Intrinsic::ppc_qpx_qvlfcs:
12799   case Intrinsic::ppc_qpx_qvlfiwa:
12800   case Intrinsic::ppc_qpx_qvlfiwz:
12801   case Intrinsic::ppc_altivec_lvx:
12802   case Intrinsic::ppc_altivec_lvxl:
12803   case Intrinsic::ppc_altivec_lvebx:
12804   case Intrinsic::ppc_altivec_lvehx:
12805   case Intrinsic::ppc_altivec_lvewx:
12806   case Intrinsic::ppc_vsx_lxvd2x:
12807   case Intrinsic::ppc_vsx_lxvw4x: {
12808     EVT VT;
12809     switch (Intrinsic) {
12810     case Intrinsic::ppc_altivec_lvebx:
12811       VT = MVT::i8;
12812       break;
12813     case Intrinsic::ppc_altivec_lvehx:
12814       VT = MVT::i16;
12815       break;
12816     case Intrinsic::ppc_altivec_lvewx:
12817       VT = MVT::i32;
12818       break;
12819     case Intrinsic::ppc_vsx_lxvd2x:
12820       VT = MVT::v2f64;
12821       break;
12822     case Intrinsic::ppc_qpx_qvlfd:
12823       VT = MVT::v4f64;
12824       break;
12825     case Intrinsic::ppc_qpx_qvlfs:
12826       VT = MVT::v4f32;
12827       break;
12828     case Intrinsic::ppc_qpx_qvlfcd:
12829       VT = MVT::v2f64;
12830       break;
12831     case Intrinsic::ppc_qpx_qvlfcs:
12832       VT = MVT::v2f32;
12833       break;
12834     default:
12835       VT = MVT::v4i32;
12836       break;
12837     }
12838
12839     Info.opc = ISD::INTRINSIC_W_CHAIN;
12840     Info.memVT = VT;
12841     Info.ptrVal = I.getArgOperand(0);
12842     Info.offset = -VT.getStoreSize()+1;
12843     Info.size = 2*VT.getStoreSize()-1;
12844     Info.align = 1;
12845     Info.vol = false;
12846     Info.readMem = true;
12847     Info.writeMem = false;
12848     return true;
12849   }
12850   case Intrinsic::ppc_qpx_qvlfda:
12851   case Intrinsic::ppc_qpx_qvlfsa:
12852   case Intrinsic::ppc_qpx_qvlfcda:
12853   case Intrinsic::ppc_qpx_qvlfcsa:
12854   case Intrinsic::ppc_qpx_qvlfiwaa:
12855   case Intrinsic::ppc_qpx_qvlfiwza: {
12856     EVT VT;
12857     switch (Intrinsic) {
12858     case Intrinsic::ppc_qpx_qvlfda:
12859       VT = MVT::v4f64;
12860       break;
12861     case Intrinsic::ppc_qpx_qvlfsa:
12862       VT = MVT::v4f32;
12863       break;
12864     case Intrinsic::ppc_qpx_qvlfcda:
12865       VT = MVT::v2f64;
12866       break;
12867     case Intrinsic::ppc_qpx_qvlfcsa:
12868       VT = MVT::v2f32;
12869       break;
12870     default:
12871       VT = MVT::v4i32;
12872       break;
12873     }
12874
12875     Info.opc = ISD::INTRINSIC_W_CHAIN;
12876     Info.memVT = VT;
12877     Info.ptrVal = I.getArgOperand(0);
12878     Info.offset = 0;
12879     Info.size = VT.getStoreSize();
12880     Info.align = 1;
12881     Info.vol = false;
12882     Info.readMem = true;
12883     Info.writeMem = false;
12884     return true;
12885   }
12886   case Intrinsic::ppc_qpx_qvstfd:
12887   case Intrinsic::ppc_qpx_qvstfs:
12888   case Intrinsic::ppc_qpx_qvstfcd:
12889   case Intrinsic::ppc_qpx_qvstfcs:
12890   case Intrinsic::ppc_qpx_qvstfiw:
12891   case Intrinsic::ppc_altivec_stvx:
12892   case Intrinsic::ppc_altivec_stvxl:
12893   case Intrinsic::ppc_altivec_stvebx:
12894   case Intrinsic::ppc_altivec_stvehx:
12895   case Intrinsic::ppc_altivec_stvewx:
12896   case Intrinsic::ppc_vsx_stxvd2x:
12897   case Intrinsic::ppc_vsx_stxvw4x: {
12898     EVT VT;
12899     switch (Intrinsic) {
12900     case Intrinsic::ppc_altivec_stvebx:
12901       VT = MVT::i8;
12902       break;
12903     case Intrinsic::ppc_altivec_stvehx:
12904       VT = MVT::i16;
12905       break;
12906     case Intrinsic::ppc_altivec_stvewx:
12907       VT = MVT::i32;
12908       break;
12909     case Intrinsic::ppc_vsx_stxvd2x:
12910       VT = MVT::v2f64;
12911       break;
12912     case Intrinsic::ppc_qpx_qvstfd:
12913       VT = MVT::v4f64;
12914       break;
12915     case Intrinsic::ppc_qpx_qvstfs:
12916       VT = MVT::v4f32;
12917       break;
12918     case Intrinsic::ppc_qpx_qvstfcd:
12919       VT = MVT::v2f64;
12920       break;
12921     case Intrinsic::ppc_qpx_qvstfcs:
12922       VT = MVT::v2f32;
12923       break;
12924     default:
12925       VT = MVT::v4i32;
12926       break;
12927     }
12928
12929     Info.opc = ISD::INTRINSIC_VOID;
12930     Info.memVT = VT;
12931     Info.ptrVal = I.getArgOperand(1);
12932     Info.offset = -VT.getStoreSize()+1;
12933     Info.size = 2*VT.getStoreSize()-1;
12934     Info.align = 1;
12935     Info.vol = false;
12936     Info.readMem = false;
12937     Info.writeMem = true;
12938     return true;
12939   }
12940   case Intrinsic::ppc_qpx_qvstfda:
12941   case Intrinsic::ppc_qpx_qvstfsa:
12942   case Intrinsic::ppc_qpx_qvstfcda:
12943   case Intrinsic::ppc_qpx_qvstfcsa:
12944   case Intrinsic::ppc_qpx_qvstfiwa: {
12945     EVT VT;
12946     switch (Intrinsic) {
12947     case Intrinsic::ppc_qpx_qvstfda:
12948       VT = MVT::v4f64;
12949       break;
12950     case Intrinsic::ppc_qpx_qvstfsa:
12951       VT = MVT::v4f32;
12952       break;
12953     case Intrinsic::ppc_qpx_qvstfcda:
12954       VT = MVT::v2f64;
12955       break;
12956     case Intrinsic::ppc_qpx_qvstfcsa:
12957       VT = MVT::v2f32;
12958       break;
12959     default:
12960       VT = MVT::v4i32;
12961       break;
12962     }
12963
12964     Info.opc = ISD::INTRINSIC_VOID;
12965     Info.memVT = VT;
12966     Info.ptrVal = I.getArgOperand(1);
12967     Info.offset = 0;
12968     Info.size = VT.getStoreSize();
12969     Info.align = 1;
12970     Info.vol = false;
12971     Info.readMem = false;
12972     Info.writeMem = true;
12973     return true;
12974   }
12975   default:
12976     break;
12977   }
12978
12979   return false;
12980 }
12981
12982 /// getOptimalMemOpType - Returns the target specific optimal type for load
12983 /// and store operations as a result of memset, memcpy, and memmove
12984 /// lowering. If DstAlign is zero that means it's safe to destination
12985 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
12986 /// means there isn't a need to check it against alignment requirement,
12987 /// probably because the source does not need to be loaded. If 'IsMemset' is
12988 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
12989 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
12990 /// source is constant so it does not need to be loaded.
12991 /// It returns EVT::Other if the type should be determined using generic
12992 /// target-independent logic.
12993 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
12994                                            unsigned DstAlign, unsigned SrcAlign,
12995                                            bool IsMemset, bool ZeroMemset,
12996                                            bool MemcpyStrSrc,
12997                                            MachineFunction &MF) const {
12998   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
12999     const Function *F = MF.getFunction();
13000     // When expanding a memset, require at least two QPX instructions to cover
13001     // the cost of loading the value to be stored from the constant pool.
13002     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
13003        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
13004         !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
13005       return MVT::v4f64;
13006     }
13007
13008     // We should use Altivec/VSX loads and stores when available. For unaligned
13009     // addresses, unaligned VSX loads are only fast starting with the P8.
13010     if (Subtarget.hasAltivec() && Size >= 16 &&
13011         (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
13012          ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
13013       return MVT::v4i32;
13014   }
13015
13016   if (Subtarget.isPPC64()) {
13017     return MVT::i64;
13018   }
13019
13020   return MVT::i32;
13021 }
13022
13023 /// \brief Returns true if it is beneficial to convert a load of a constant
13024 /// to just the constant itself.
13025 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
13026                                                           Type *Ty) const {
13027   assert(Ty->isIntegerTy());
13028
13029   unsigned BitSize = Ty->getPrimitiveSizeInBits();
13030   return !(BitSize == 0 || BitSize > 64);
13031 }
13032
13033 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
13034   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
13035     return false;
13036   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
13037   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
13038   return NumBits1 == 64 && NumBits2 == 32;
13039 }
13040
13041 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
13042   if (!VT1.isInteger() || !VT2.isInteger())
13043     return false;
13044   unsigned NumBits1 = VT1.getSizeInBits();
13045   unsigned NumBits2 = VT2.getSizeInBits();
13046   return NumBits1 == 64 && NumBits2 == 32;
13047 }
13048
13049 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
13050   // Generally speaking, zexts are not free, but they are free when they can be
13051   // folded with other operations.
13052   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
13053     EVT MemVT = LD->getMemoryVT();
13054     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
13055          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
13056         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
13057          LD->getExtensionType() == ISD::ZEXTLOAD))
13058       return true;
13059   }
13060
13061   // FIXME: Add other cases...
13062   //  - 32-bit shifts with a zext to i64
13063   //  - zext after ctlz, bswap, etc.
13064   //  - zext after and by a constant mask
13065
13066   return TargetLowering::isZExtFree(Val, VT2);
13067 }
13068
13069 bool PPCTargetLowering::isFPExtFree(EVT VT) const {
13070   assert(VT.isFloatingPoint());
13071   return true;
13072 }
13073
13074 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
13075   return isInt<16>(Imm) || isUInt<16>(Imm);
13076 }
13077
13078 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
13079   return isInt<16>(Imm) || isUInt<16>(Imm);
13080 }
13081
13082 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
13083                                                        unsigned,
13084                                                        unsigned,
13085                                                        bool *Fast) const {
13086   if (DisablePPCUnaligned)
13087     return false;
13088
13089   // PowerPC supports unaligned memory access for simple non-vector types.
13090   // Although accessing unaligned addresses is not as efficient as accessing
13091   // aligned addresses, it is generally more efficient than manual expansion,
13092   // and generally only traps for software emulation when crossing page
13093   // boundaries.
13094
13095   if (!VT.isSimple())
13096     return false;
13097
13098   if (VT.getSimpleVT().isVector()) {
13099     if (Subtarget.hasVSX()) {
13100       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
13101           VT != MVT::v4f32 && VT != MVT::v4i32)
13102         return false;
13103     } else {
13104       return false;
13105     }
13106   }
13107
13108   if (VT == MVT::ppcf128)
13109     return false;
13110
13111   if (Fast)
13112     *Fast = true;
13113
13114   return true;
13115 }
13116
13117 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
13118   VT = VT.getScalarType();
13119
13120   if (!VT.isSimple())
13121     return false;
13122
13123   switch (VT.getSimpleVT().SimpleTy) {
13124   case MVT::f32:
13125   case MVT::f64:
13126     return true;
13127   default:
13128     break;
13129   }
13130
13131   return false;
13132 }
13133
13134 const MCPhysReg *
13135 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
13136   // LR is a callee-save register, but we must treat it as clobbered by any call
13137   // site. Hence we include LR in the scratch registers, which are in turn added
13138   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
13139   // to CTR, which is used by any indirect call.
13140   static const MCPhysReg ScratchRegs[] = {
13141     PPC::X12, PPC::LR8, PPC::CTR8, 0
13142   };
13143
13144   return ScratchRegs;
13145 }
13146
13147 unsigned PPCTargetLowering::getExceptionPointerRegister(
13148     const Constant *PersonalityFn) const {
13149   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
13150 }
13151
13152 unsigned PPCTargetLowering::getExceptionSelectorRegister(
13153     const Constant *PersonalityFn) const {
13154   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
13155 }
13156
13157 bool
13158 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
13159                      EVT VT , unsigned DefinedValues) const {
13160   if (VT == MVT::v2i64)
13161     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
13162
13163   if (Subtarget.hasVSX() || Subtarget.hasQPX())
13164     return true;
13165
13166   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
13167 }
13168
13169 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
13170   if (DisableILPPref || Subtarget.enableMachineScheduler())
13171     return TargetLowering::getSchedulingPreference(N);
13172
13173   return Sched::ILP;
13174 }
13175
13176 // Create a fast isel object.
13177 FastISel *
13178 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
13179                                   const TargetLibraryInfo *LibInfo) const {
13180   return PPC::createFastISel(FuncInfo, LibInfo);
13181 }
13182
13183 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
13184   if (Subtarget.isDarwinABI()) return;
13185   if (!Subtarget.isPPC64()) return;
13186
13187   // Update IsSplitCSR in PPCFunctionInfo
13188   PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
13189   PFI->setIsSplitCSR(true);
13190 }
13191
13192 void PPCTargetLowering::insertCopiesSplitCSR(
13193   MachineBasicBlock *Entry,
13194   const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
13195   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13196   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
13197   if (!IStart)
13198     return;
13199
13200   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13201   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
13202   MachineBasicBlock::iterator MBBI = Entry->begin();
13203   for (const MCPhysReg *I = IStart; *I; ++I) {
13204     const TargetRegisterClass *RC = nullptr;
13205     if (PPC::G8RCRegClass.contains(*I))
13206       RC = &PPC::G8RCRegClass;
13207     else if (PPC::F8RCRegClass.contains(*I))
13208       RC = &PPC::F8RCRegClass;
13209     else if (PPC::CRRCRegClass.contains(*I))
13210       RC = &PPC::CRRCRegClass;
13211     else if (PPC::VRRCRegClass.contains(*I))
13212       RC = &PPC::VRRCRegClass;
13213     else
13214       llvm_unreachable("Unexpected register class in CSRsViaCopy!");
13215
13216     unsigned NewVR = MRI->createVirtualRegister(RC);
13217     // Create copy from CSR to a virtual register.
13218     // FIXME: this currently does not emit CFI pseudo-instructions, it works
13219     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
13220     // nounwind. If we want to generalize this later, we may need to emit
13221     // CFI pseudo-instructions.
13222     assert(Entry->getParent()->getFunction()->hasFnAttribute(
13223              Attribute::NoUnwind) &&
13224            "Function should be nounwind in insertCopiesSplitCSR!");
13225     Entry->addLiveIn(*I);
13226     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
13227       .addReg(*I);
13228
13229     // Insert the copy-back instructions right before the terminator
13230     for (auto *Exit : Exits)
13231       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
13232               TII->get(TargetOpcode::COPY), *I)
13233         .addReg(NewVR);
13234   }
13235 }
13236
13237 // Override to enable LOAD_STACK_GUARD lowering on Linux.
13238 bool PPCTargetLowering::useLoadStackGuardNode() const {
13239   if (!Subtarget.isTargetLinux())
13240     return TargetLowering::useLoadStackGuardNode();
13241   return true;
13242 }
13243
13244 // Override to disable global variable loading on Linux.
13245 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
13246   if (!Subtarget.isTargetLinux())
13247     return TargetLowering::insertSSPDeclarations(M);
13248 }
13249
13250 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
13251   if (!VT.isSimple() || !Subtarget.hasVSX())
13252     return false;
13253
13254   switch(VT.getSimpleVT().SimpleTy) {
13255   default:
13256     // For FP types that are currently not supported by PPC backend, return
13257     // false. Examples: f16, f80.
13258     return false;
13259   case MVT::f32:
13260   case MVT::f64:
13261   case MVT::ppcf128:
13262     return Imm.isPosZero();
13263   }
13264 }
13265
13266 // For vector shift operation op, fold
13267 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
13268 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
13269                                   SelectionDAG &DAG) {
13270   SDValue N0 = N->getOperand(0);
13271   SDValue N1 = N->getOperand(1);
13272   EVT VT = N0.getValueType();
13273   unsigned OpSizeInBits = VT.getScalarSizeInBits();
13274   unsigned Opcode = N->getOpcode();
13275   unsigned TargetOpcode;
13276
13277   switch (Opcode) {
13278   default:
13279     llvm_unreachable("Unexpected shift operation");
13280   case ISD::SHL:
13281     TargetOpcode = PPCISD::SHL;
13282     break;
13283   case ISD::SRL:
13284     TargetOpcode = PPCISD::SRL;
13285     break;
13286   case ISD::SRA:
13287     TargetOpcode = PPCISD::SRA;
13288     break;
13289   }
13290
13291   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
13292       N1->getOpcode() == ISD::AND)
13293     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
13294       if (Mask->getZExtValue() == OpSizeInBits - 1)
13295         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
13296
13297   return SDValue();
13298 }
13299
13300 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
13301   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13302     return Value;
13303
13304   return SDValue();
13305 }
13306
13307 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
13308   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13309     return Value;
13310
13311   return SDValue();
13312 }
13313
13314 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
13315   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13316     return Value;
13317
13318   return SDValue();
13319 }