contrib/llvm/lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the interfaces that X86 uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  17
  18 #include "llvm/CodeGen/CallingConvLower.h"
  19 #include "llvm/CodeGen/SelectionDAG.h"
  20 #include "llvm/CodeGen/TargetLowering.h"
  21 #include "llvm/Target/TargetOptions.h"
  22
  23 namespace llvm {
  24   class X86Subtarget;
  25   class X86TargetMachine;
  26
  27   namespace X86ISD {
  28     // X86 Specific DAG Nodes
  29     enum NodeType : unsigned {
  30       // Start the numbering where the builtin ops leave off.
  31       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  32
  33       /// Bit scan forward.
  34       BSF,
  35       /// Bit scan reverse.
  36       BSR,
  37
  38       /// Double shift instructions. These correspond to
  39       /// X86::SHLDxx and X86::SHRDxx instructions.
  40       SHLD,
  41       SHRD,
  42
  43       /// Bitwise logical AND of floating point values. This corresponds
  44       /// to X86::ANDPS or X86::ANDPD.
  45       FAND,
  46
  47       /// Bitwise logical OR of floating point values. This corresponds
  48       /// to X86::ORPS or X86::ORPD.
  49       FOR,
  50
  51       /// Bitwise logical XOR of floating point values. This corresponds
  52       /// to X86::XORPS or X86::XORPD.
  53       FXOR,
  54
  55       ///  Bitwise logical ANDNOT of floating point values. This
  56       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  57       FANDN,
  58
  59       /// These operations represent an abstract X86 call
  60       /// instruction, which includes a bunch of information.  In particular the
  61       /// operands of these node are:
  62       ///
  63       ///     #0 - The incoming token chain
  64       ///     #1 - The callee
  65       ///     #2 - The number of arg bytes the caller pushes on the stack.
  66       ///     #3 - The number of arg bytes the callee pops off the stack.
  67       ///     #4 - The value to pass in AL/AX/EAX (optional)
  68       ///     #5 - The value to pass in DL/DX/EDX (optional)
  69       ///
  70       /// The result values of these nodes are:
  71       ///
  72       ///     #0 - The outgoing token chain
  73       ///     #1 - The first register result value (optional)
  74       ///     #2 - The second register result value (optional)
  75       ///
  76       CALL,
  77
  78       /// Same as call except it adds the NoTrack prefix.
  79       NT_CALL,
  80
  81       /// This operation implements the lowering for readcyclecounter.
  82       RDTSC_DAG,
  83
  84       /// X86 Read Time-Stamp Counter and Processor ID.
  85       RDTSCP_DAG,
  86
  87       /// X86 Read Performance Monitoring Counters.
  88       RDPMC_DAG,
  89
  90       /// X86 compare and logical compare instructions.
  91       CMP, COMI, UCOMI,
  92
  93       /// X86 bit-test instructions.
  94       BT,
  95
  96       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  97       /// operand, usually produced by a CMP instruction.
  98       SETCC,
  99
 100       /// X86 Select
 101       SELECT, SELECTS,
 102
 103       // Same as SETCC except it's materialized with a sbb and the value is all
 104       // one's or all zero's.
 105       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
 106
 107       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
 108       /// Operands are two FP values to compare; result is a mask of
 109       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
 110       FSETCC,
 111
 112       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 113       /// with optional rounding mode.
 114       FSETCCM, FSETCCM_RND,
 115
 116       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 117       /// to select from. Operand 2 is the condition code, and operand 3 is the
 118       /// flag operand produced by a CMP or TEST instruction. It also writes a
 119       /// flag result.
 120       CMOV,
 121
 122       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 123       /// is the block to branch if condition is true, operand 2 is the
 124       /// condition code, and operand 3 is the flag operand produced by a CMP
 125       /// or TEST instruction.
 126       BRCOND,
 127
 128       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 129       /// operand 1 is the target address.
 130       NT_BRIND,
 131
 132       /// Return with a flag operand. Operand 0 is the chain operand, operand
 133       /// 1 is the number of bytes of stack to pop.
 134       RET_FLAG,
 135
 136       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 137       IRET,
 138
 139       /// Repeat fill, corresponds to X86::REP_STOSx.
 140       REP_STOS,
 141
 142       /// Repeat move, corresponds to X86::REP_MOVSx.
 143       REP_MOVS,
 144
 145       /// On Darwin, this node represents the result of the popl
 146       /// at function entry, used for PIC code.
 147       GlobalBaseReg,
 148
 149       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 150       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 151       /// MCSymbol and TargetBlockAddress.
 152       Wrapper,
 153
 154       /// Special wrapper used under X86-64 PIC mode for RIP
 155       /// relative displacements.
 156       WrapperRIP,
 157
 158       /// Copies a 64-bit value from the low word of an XMM vector
 159       /// to an MMX vector.
 160       MOVDQ2Q,
 161
 162       /// Copies a 32-bit value from the low word of a MMX
 163       /// vector to a GPR.
 164       MMX_MOVD2W,
 165
 166       /// Copies a GPR into the low 32-bit word of a MMX vector
 167       /// and zero out the high word.
 168       MMX_MOVW2D,
 169
 170       /// Extract an 8-bit value from a vector and zero extend it to
 171       /// i32, corresponds to X86::PEXTRB.
 172       PEXTRB,
 173
 174       /// Extract a 16-bit value from a vector and zero extend it to
 175       /// i32, corresponds to X86::PEXTRW.
 176       PEXTRW,
 177
 178       /// Insert any element of a 4 x float vector into any element
 179       /// of a destination 4 x floatvector.
 180       INSERTPS,
 181
 182       /// Insert the lower 8-bits of a 32-bit value to a vector,
 183       /// corresponds to X86::PINSRB.
 184       PINSRB,
 185
 186       /// Insert the lower 16-bits of a 32-bit value to a vector,
 187       /// corresponds to X86::PINSRW.
 188       PINSRW,
 189
 190       /// Shuffle 16 8-bit values within a vector.
 191       PSHUFB,
 192
 193       /// Compute Sum of Absolute Differences.
 194       PSADBW,
 195       /// Compute Double Block Packed Sum-Absolute-Differences
 196       DBPSADBW,
 197
 198       /// Bitwise Logical AND NOT of Packed FP values.
 199       ANDNP,
 200
 201       /// Blend where the selector is an immediate.
 202       BLENDI,
 203
 204       /// Dynamic (non-constant condition) vector blend where only the sign bits
 205       /// of the condition elements are used. This is used to enforce that the
 206       /// condition mask is not valid for generic VSELECT optimizations.
 207       SHRUNKBLEND,
 208
 209       /// Combined add and sub on an FP vector.
 210       ADDSUB,
 211
 212       //  FP vector ops with rounding mode.
 213       FADD_RND, FADDS_RND,
 214       FSUB_RND, FSUBS_RND,
 215       FMUL_RND, FMULS_RND,
 216       FDIV_RND, FDIVS_RND,
 217       FMAX_RND, FMAXS_RND,
 218       FMIN_RND, FMINS_RND,
 219       FSQRT_RND, FSQRTS_RND,
 220
 221       // FP vector get exponent.
 222       FGETEXP_RND, FGETEXPS_RND,
 223       // Extract Normalized Mantissas.
 224       VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
 225       // FP Scale.
 226       SCALEF,
 227       SCALEFS,
 228
 229       // Integer add/sub with unsigned saturation.
 230       ADDUS,
 231       SUBUS,
 232
 233       // Integer add/sub with signed saturation.
 234       ADDS,
 235       SUBS,
 236
 237       // Unsigned Integer average.
 238       AVG,
 239
 240       /// Integer horizontal add/sub.
 241       HADD,
 242       HSUB,
 243
 244       /// Floating point horizontal add/sub.
 245       FHADD,
 246       FHSUB,
 247
 248       // Detect Conflicts Within a Vector
 249       CONFLICT,
 250
 251       /// Floating point max and min.
 252       FMAX, FMIN,
 253
 254       /// Commutative FMIN and FMAX.
 255       FMAXC, FMINC,
 256
 257       /// Scalar intrinsic floating point max and min.
 258       FMAXS, FMINS,
 259
 260       /// Floating point reciprocal-sqrt and reciprocal approximation.
 261       /// Note that these typically require refinement
 262       /// in order to obtain suitable precision.
 263       FRSQRT, FRCP,
 264
 265       // AVX-512 reciprocal approximations with a little more precision.
 266       RSQRT14, RSQRT14S, RCP14, RCP14S,
 267
 268       // Thread Local Storage.
 269       TLSADDR,
 270
 271       // Thread Local Storage. A call to get the start address
 272       // of the TLS block for the current module.
 273       TLSBASEADDR,
 274
 275       // Thread Local Storage.  When calling to an OS provided
 276       // thunk at the address from an earlier relocation.
 277       TLSCALL,
 278
 279       // Exception Handling helpers.
 280       EH_RETURN,
 281
 282       // SjLj exception handling setjmp.
 283       EH_SJLJ_SETJMP,
 284
 285       // SjLj exception handling longjmp.
 286       EH_SJLJ_LONGJMP,
 287
 288       // SjLj exception handling dispatch.
 289       EH_SJLJ_SETUP_DISPATCH,
 290
 291       /// Tail call return. See X86TargetLowering::LowerCall for
 292       /// the list of operands.
 293       TC_RETURN,
 294
 295       // Vector move to low scalar and zero higher vector elements.
 296       VZEXT_MOVL,
 297
 298       // Vector integer zero-extend.
 299       VZEXT,
 300       // Vector integer signed-extend.
 301       VSEXT,
 302
 303       // Vector integer truncate.
 304       VTRUNC,
 305       // Vector integer truncate with unsigned/signed saturation.
 306       VTRUNCUS, VTRUNCS,
 307
 308       // Vector FP extend.
 309       VFPEXT, VFPEXT_RND, VFPEXTS_RND,
 310
 311       // Vector FP round.
 312       VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
 313
 314       // 128-bit vector logical left / right shift
 315       VSHLDQ, VSRLDQ,
 316
 317       // Vector shift elements
 318       VSHL, VSRL, VSRA,
 319
 320       // Vector variable shift right arithmetic.
 321       // Unlike ISD::SRA, in case shift count greater then element size
 322       // use sign bit to fill destination data element.
 323       VSRAV,
 324
 325       // Vector shift elements by immediate
 326       VSHLI, VSRLI, VSRAI,
 327
 328       // Shifts of mask registers.
 329       KSHIFTL, KSHIFTR,
 330
 331       // Bit rotate by immediate
 332       VROTLI, VROTRI,
 333
 334       // Vector packed double/float comparison.
 335       CMPP,
 336
 337       // Vector integer comparisons.
 338       PCMPEQ, PCMPGT,
 339
 340       // v8i16 Horizontal minimum and position.
 341       PHMINPOS,
 342
 343       MULTISHIFT,
 344
 345       /// Vector comparison generating mask bits for fp and
 346       /// integer signed and unsigned data types.
 347       CMPM,
 348       // Vector comparison with rounding mode for FP values
 349       CMPM_RND,
 350
 351       // Arithmetic operations with FLAGS results.
 352       ADD, SUB, ADC, SBB, SMUL,
 353       INC, DEC, OR, XOR, AND,
 354
 355       // Bit field extract.
 356       BEXTR,
 357
 358       // LOW, HI, FLAGS = umul LHS, RHS.
 359       UMUL,
 360
 361       // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
 362       SMUL8, UMUL8,
 363
 364       // 8-bit divrem that zero-extend the high result (AH).
 365       UDIVREM8_ZEXT_HREG,
 366       SDIVREM8_SEXT_HREG,
 367
 368       // X86-specific multiply by immediate.
 369       MUL_IMM,
 370
 371       // Vector sign bit extraction.
 372       MOVMSK,
 373
 374       // Vector bitwise comparisons.
 375       PTEST,
 376
 377       // Vector packed fp sign bitwise comparisons.
 378       TESTP,
 379
 380       // OR/AND test for masks.
 381       KORTEST,
 382       KTEST,
 383
 384       // ADD for masks.
 385       KADD,
 386
 387       // Several flavors of instructions with vector shuffle behaviors.
 388       // Saturated signed/unnsigned packing.
 389       PACKSS,
 390       PACKUS,
 391       // Intra-lane alignr.
 392       PALIGNR,
 393       // AVX512 inter-lane alignr.
 394       VALIGN,
 395       PSHUFD,
 396       PSHUFHW,
 397       PSHUFLW,
 398       SHUFP,
 399       // VBMI2 Concat & Shift.
 400       VSHLD,
 401       VSHRD,
 402       VSHLDV,
 403       VSHRDV,
 404       //Shuffle Packed Values at 128-bit granularity.
 405       SHUF128,
 406       MOVDDUP,
 407       MOVSHDUP,
 408       MOVSLDUP,
 409       MOVLHPS,
 410       MOVHLPS,
 411       MOVSD,
 412       MOVSS,
 413       UNPCKL,
 414       UNPCKH,
 415       VPERMILPV,
 416       VPERMILPI,
 417       VPERMI,
 418       VPERM2X128,
 419
 420       // Variable Permute (VPERM).
 421       // Res = VPERMV MaskV, V0
 422       VPERMV,
 423
 424       // 3-op Variable Permute (VPERMT2).
 425       // Res = VPERMV3 V0, MaskV, V1
 426       VPERMV3,
 427
 428       // Bitwise ternary logic.
 429       VPTERNLOG,
 430       // Fix Up Special Packed Float32/64 values.
 431       VFIXUPIMM,
 432       VFIXUPIMMS,
 433       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 434       VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
 435       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 436       VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
 437       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 438       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 439       // scaling part of the immediate.
 440       VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
 441       // Tests Types Of a FP Values for packed types.
 442       VFPCLASS,
 443       // Tests Types Of a FP Values for scalar types.
 444       VFPCLASSS,
 445
 446       // Broadcast scalar to vector.
 447       VBROADCAST,
 448       // Broadcast mask to vector.
 449       VBROADCASTM,
 450       // Broadcast subvector to vector.
 451       SUBV_BROADCAST,
 452
 453       /// SSE4A Extraction and Insertion.
 454       EXTRQI, INSERTQI,
 455
 456       // XOP arithmetic/logical shifts.
 457       VPSHA, VPSHL,
 458       // XOP signed/unsigned integer comparisons.
 459       VPCOM, VPCOMU,
 460       // XOP packed permute bytes.
 461       VPPERM,
 462       // XOP two source permutation.
 463       VPERMIL2,
 464
 465       // Vector multiply packed unsigned doubleword integers.
 466       PMULUDQ,
 467       // Vector multiply packed signed doubleword integers.
 468       PMULDQ,
 469       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 470       MULHRS,
 471
 472       // Multiply and Add Packed Integers.
 473       VPMADDUBSW, VPMADDWD,
 474
 475       // AVX512IFMA multiply and add.
 476       // NOTE: These are different than the instruction and perform
 477       // op0 x op1 + op2.
 478       VPMADD52L, VPMADD52H,
 479
 480       // VNNI
 481       VPDPBUSD,
 482       VPDPBUSDS,
 483       VPDPWSSD,
 484       VPDPWSSDS,
 485
 486       // FMA nodes.
 487       // We use the target independent ISD::FMA for the non-inverted case.
 488       FNMADD,
 489       FMSUB,
 490       FNMSUB,
 491       FMADDSUB,
 492       FMSUBADD,
 493
 494       // FMA with rounding mode.
 495       FMADD_RND,
 496       FNMADD_RND,
 497       FMSUB_RND,
 498       FNMSUB_RND,
 499       FMADDSUB_RND,
 500       FMSUBADD_RND,
 501
 502       // Compress and expand.
 503       COMPRESS,
 504       EXPAND,
 505
 506       // Bits shuffle
 507       VPSHUFBITQMB,
 508
 509       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 510       SINT_TO_FP_RND, UINT_TO_FP_RND,
 511       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 512
 513       // Vector float/double to signed/unsigned integer.
 514       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 515       // Scalar float/double to signed/unsigned integer.
 516       CVTS2SI_RND, CVTS2UI_RND,
 517
 518       // Vector float/double to signed/unsigned integer with truncation.
 519       CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
 520       // Scalar float/double to signed/unsigned integer with truncation.
 521       CVTTS2SI_RND, CVTTS2UI_RND,
 522
 523       // Vector signed/unsigned integer to float/double.
 524       CVTSI2P, CVTUI2P,
 525
 526       // Save xmm argument registers to the stack, according to %al. An operator
 527       // is needed so that this can be expanded with control flow.
 528       VASTART_SAVE_XMM_REGS,
 529
 530       // Windows's _chkstk call to do stack probing.
 531       WIN_ALLOCA,
 532
 533       // For allocating variable amounts of stack space when using
 534       // segmented stacks. Check if the current stacklet has enough space, and
 535       // falls back to heap allocation if not.
 536       SEG_ALLOCA,
 537
 538       // Memory barriers.
 539       MEMBARRIER,
 540       MFENCE,
 541
 542       // Store FP status word into i16 register.
 543       FNSTSW16r,
 544
 545       // Store contents of %ah into %eflags.
 546       SAHF,
 547
 548       // Get a random integer and indicate whether it is valid in CF.
 549       RDRAND,
 550
 551       // Get a NIST SP800-90B & C compliant random integer and
 552       // indicate whether it is valid in CF.
 553       RDSEED,
 554
 555       // SSE42 string comparisons.
 556       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 557       // will emit one or two instructions based on which results are used. If
 558       // flags and index/mask this allows us to use a single instruction since
 559       // we won't have to pick and opcode for flags. Instead we can rely on the
 560       // DAG to CSE everything and decide at isel.
 561       PCMPISTR,
 562       PCMPESTR,
 563
 564       // Test if in transactional execution.
 565       XTEST,
 566
 567       // ERI instructions.
 568       RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
 569
 570       // Conversions between float and half-float.
 571       CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
 572
 573       // Galois Field Arithmetic Instructions
 574       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 575
 576       // LWP insert record.
 577       LWPINS,
 578
 579       // User level wait
 580       UMWAIT, TPAUSE,
 581
 582       // Compare and swap.
 583       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 584       LCMPXCHG8_DAG,
 585       LCMPXCHG16_DAG,
 586       LCMPXCHG8_SAVE_EBX_DAG,
 587       LCMPXCHG16_SAVE_RBX_DAG,
 588
 589       /// LOCK-prefixed arithmetic read-modify-write instructions.
 590       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 591       LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
 592
 593       // Load, scalar_to_vector, and zero extend.
 594       VZEXT_LOAD,
 595
 596       // Store FP control world into i16 memory.
 597       FNSTCW16m,
 598
 599       /// This instruction implements FP_TO_SINT with the
 600       /// integer destination in memory and a FP reg source.  This corresponds
 601       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 602       /// has two inputs (token chain and address) and two outputs (int value
 603       /// and token chain).
 604       FP_TO_INT16_IN_MEM,
 605       FP_TO_INT32_IN_MEM,
 606       FP_TO_INT64_IN_MEM,
 607
 608       /// This instruction implements SINT_TO_FP with the
 609       /// integer source in memory and FP reg result.  This corresponds to the
 610       /// X86::FILD*m instructions. It has three inputs (token chain, address,
 611       /// and source type) and two outputs (FP value and token chain). FILD_FLAG
 612       /// also produces a flag).
 613       FILD,
 614       FILD_FLAG,
 615
 616       /// This instruction implements an extending load to FP stack slots.
 617       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 618       /// operand, ptr to load from, and a ValueType node indicating the type
 619       /// to load to.
 620       FLD,
 621
 622       /// This instruction implements a truncating store to FP stack
 623       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 624       /// chain operand, value to store, address, and a ValueType to store it
 625       /// as.
 626       FST,
 627
 628       /// This instruction grabs the address of the next argument
 629       /// from a va_list. (reads and modifies the va_list in memory)
 630       VAARG_64,
 631
 632       // Vector truncating store with unsigned/signed saturation
 633       VTRUNCSTOREUS, VTRUNCSTORES,
 634       // Vector truncating masked store with unsigned/signed saturation
 635       VMTRUNCSTOREUS, VMTRUNCSTORES,
 636
 637       // X86 specific gather and scatter
 638       MGATHER, MSCATTER,
 639
 640       // WARNING: Do not add anything in the end unless you want the node to
 641       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 642       // opcodes will be thought as target memory ops!
 643     };
 644   } // end namespace X86ISD
 645
 646   /// Define some predicates that are used for node matching.
 647   namespace X86 {
 648     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 649     bool isZeroNode(SDValue Elt);
 650
 651     /// Returns true of the given offset can be
 652     /// fit into displacement field of the instruction.
 653     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 654                                       bool hasSymbolicDisplacement = true);
 655
 656     /// Determines whether the callee is required to pop its
 657     /// own arguments. Callee pop is necessary to support tail calls.
 658     bool isCalleePop(CallingConv::ID CallingConv,
 659                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 660
 661   } // end namespace X86
 662
 663   //===--------------------------------------------------------------------===//
 664   //  X86 Implementation of the TargetLowering interface
 665   class X86TargetLowering final : public TargetLowering {
 666   public:
 667     explicit X86TargetLowering(const X86TargetMachine &TM,
 668                                const X86Subtarget &STI);
 669
 670     unsigned getJumpTableEncoding() const override;
 671     bool useSoftFloat() const override;
 672
 673     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 674                                ArgListTy &Args) const override;
 675
 676     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 677       return MVT::i8;
 678     }
 679
 680     const MCExpr *
 681     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 682                               const MachineBasicBlock *MBB, unsigned uid,
 683                               MCContext &Ctx) const override;
 684
 685     /// Returns relocation base for the given PIC jumptable.
 686     SDValue getPICJumpTableRelocBase(SDValue Table,
 687                                      SelectionDAG &DAG) const override;
 688     const MCExpr *
 689     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 690                                  unsigned JTI, MCContext &Ctx) const override;
 691
 692     /// Return the desired alignment for ByVal aggregate
 693     /// function arguments in the caller parameter area. For X86, aggregates
 694     /// that contains are placed at 16-byte boundaries while the rest are at
 695     /// 4-byte boundaries.
 696     unsigned getByValTypeAlignment(Type *Ty,
 697                                    const DataLayout &DL) const override;
 698
 699     /// Returns the target specific optimal type for load
 700     /// and store operations as a result of memset, memcpy, and memmove
 701     /// lowering. If DstAlign is zero that means it's safe to destination
 702     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 703     /// means there isn't a need to check it against alignment requirement,
 704     /// probably because the source does not need to be loaded. If 'IsMemset' is
 705     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 706     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 707     /// source is constant so it does not need to be loaded.
 708     /// It returns EVT::Other if the type should be determined using generic
 709     /// target-independent logic.
 710     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 711                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 712                             MachineFunction &MF) const override;
 713
 714     /// Returns true if it's safe to use load / store of the
 715     /// specified type to expand memcpy / memset inline. This is mostly true
 716     /// for all types except for some special cases. For example, on X86
 717     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 718     /// also does type conversion. Note the specified type doesn't have to be
 719     /// legal as the hook is used before type legalization.
 720     bool isSafeMemOpType(MVT VT) const override;
 721
 722     /// Returns true if the target allows unaligned memory accesses of the
 723     /// specified type. Returns whether it is "fast" in the last argument.
 724     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 725                                        bool *Fast) const override;
 726
 727     /// Provide custom lowering hooks for some operations.
 728     ///
 729     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 730
 731     /// Places new result values for the node in Results (their number
 732     /// and types must exactly match those of the original return values of
 733     /// the node), or leaves Results empty, which indicates that the node is not
 734     /// to be custom lowered after all.
 735     void LowerOperationWrapper(SDNode *N,
 736                                SmallVectorImpl<SDValue> &Results,
 737                                SelectionDAG &DAG) const override;
 738
 739     /// Replace the results of node with an illegal result
 740     /// type with new values built out of custom code.
 741     ///
 742     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 743                             SelectionDAG &DAG) const override;
 744
 745     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 746
 747     // Return true if it is profitable to combine a BUILD_VECTOR with a
 748     // stride-pattern to a shuffle and a truncate.
 749     // Example of such a combine:
 750     // v4i32 build_vector((extract_elt V, 1),
 751     //                    (extract_elt V, 3),
 752     //                    (extract_elt V, 5),
 753     //                    (extract_elt V, 7))
 754     //  -->
 755     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 756     // v4i64)
 757     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 758         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 759
 760     /// Return true if the target has native support for
 761     /// the specified value type and it is 'desirable' to use the type for the
 762     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 763     /// instruction encodings are longer and some i16 instructions are slow.
 764     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 765
 766     /// Return true if the target has native support for the
 767     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 768     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 769     /// and some i16 instructions are slow.
 770     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 771
 772     MachineBasicBlock *
 773     EmitInstrWithCustomInserter(MachineInstr &MI,
 774                                 MachineBasicBlock *MBB) const override;
 775
 776     /// This method returns the name of a target specific DAG node.
 777     const char *getTargetNodeName(unsigned Opcode) const override;
 778
 779     bool mergeStoresAfterLegalization() const override { return true; }
 780
 781     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 782                           const SelectionDAG &DAG) const override;
 783
 784     bool isCheapToSpeculateCttz() const override;
 785
 786     bool isCheapToSpeculateCtlz() const override;
 787
 788     bool isCtlzFast() const override;
 789
 790     bool hasBitPreservingFPLogic(EVT VT) const override {
 791       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 792     }
 793
 794     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 795       // If the pair to store is a mixture of float and int values, we will
 796       // save two bitwise instructions and one float-to-int instruction and
 797       // increase one store instruction. There is potentially a more
 798       // significant benefit because it avoids the float->int domain switch
 799       // for input value. So It is more likely a win.
 800       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 801           (LTy.isInteger() && HTy.isFloatingPoint()))
 802         return true;
 803       // If the pair only contains int values, we will save two bitwise
 804       // instructions and increase one store instruction (costing one more
 805       // store buffer). Since the benefit is more blurred so we leave
 806       // such pair out until we get testcase to prove it is a win.
 807       return false;
 808     }
 809
 810     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 811
 812     bool hasAndNotCompare(SDValue Y) const override;
 813
 814     bool hasAndNot(SDValue Y) const override;
 815
 816     bool preferShiftsToClearExtremeBits(SDValue Y) const override;
 817
 818     bool
 819     shouldTransformSignedTruncationCheck(EVT XVT,
 820                                          unsigned KeptBits) const override {
 821       // For vectors, we don't have a preference..
 822       if (XVT.isVector())
 823         return false;
 824
 825       auto VTIsOk = [](EVT VT) -> bool {
 826         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 827                VT == MVT::i64;
 828       };
 829
 830       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
 831       // XVT will be larger than KeptBitsVT.
 832       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
 833       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
 834     }
 835
 836     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 837       return VT.isScalarInteger();
 838     }
 839
 840     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 841     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 842
 843     /// Allow multiple load pairs per block for smaller and faster code.
 844     unsigned getMemcmpEqZeroLoadsPerBlock() const override {
 845       return 2;
 846     }
 847
 848     /// Return the value type to use for ISD::SETCC.
 849     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 850                            EVT VT) const override;
 851
 852     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 853                                       TargetLoweringOpt &TLO) const override;
 854
 855     /// Determine which of the bits specified in Mask are known to be either
 856     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 857     void computeKnownBitsForTargetNode(const SDValue Op,
 858                                        KnownBits &Known,
 859                                        const APInt &DemandedElts,
 860                                        const SelectionDAG &DAG,
 861                                        unsigned Depth = 0) const override;
 862
 863     /// Determine the number of bits in the operation that are sign bits.
 864     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 865                                              const APInt &DemandedElts,
 866                                              const SelectionDAG &DAG,
 867                                              unsigned Depth) const override;
 868
 869     SDValue unwrapAddress(SDValue N) const override;
 870
 871     bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
 872                         int64_t &Offset) const override;
 873
 874     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 875
 876     bool ExpandInlineAsm(CallInst *CI) const override;
 877
 878     ConstraintType getConstraintType(StringRef Constraint) const override;
 879
 880     /// Examine constraint string and operand type and determine a weight value.
 881     /// The operand object must already have been set up with the operand type.
 882     ConstraintWeight
 883       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 884                                      const char *constraint) const override;
 885
 886     const char *LowerXConstraint(EVT ConstraintVT) const override;
 887
 888     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 889     /// add anything to Ops. If hasMemory is true it means one of the asm
 890     /// constraint of the inline asm instruction being processed is 'm'.
 891     void LowerAsmOperandForConstraint(SDValue Op,
 892                                       std::string &Constraint,
 893                                       std::vector<SDValue> &Ops,
 894                                       SelectionDAG &DAG) const override;
 895
 896     unsigned
 897     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 898       if (ConstraintCode == "i")
 899         return InlineAsm::Constraint_i;
 900       else if (ConstraintCode == "o")
 901         return InlineAsm::Constraint_o;
 902       else if (ConstraintCode == "v")
 903         return InlineAsm::Constraint_v;
 904       else if (ConstraintCode == "X")
 905         return InlineAsm::Constraint_X;
 906       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 907     }
 908
 909     /// Given a physical register constraint
 910     /// (e.g. {edx}), return the register number and the register class for the
 911     /// register.  This should only be used for C_Register constraints.  On
 912     /// error, this returns a register number of 0.
 913     std::pair<unsigned, const TargetRegisterClass *>
 914     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 915                                  StringRef Constraint, MVT VT) const override;
 916
 917     /// Return true if the addressing mode represented
 918     /// by AM is legal for this target, for a load/store of the specified type.
 919     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 920                                Type *Ty, unsigned AS,
 921                                Instruction *I = nullptr) const override;
 922
 923     /// Return true if the specified immediate is legal
 924     /// icmp immediate, that is the target has icmp instructions which can
 925     /// compare a register against the immediate without having to materialize
 926     /// the immediate into a register.
 927     bool isLegalICmpImmediate(int64_t Imm) const override;
 928
 929     /// Return true if the specified immediate is legal
 930     /// add immediate, that is the target has add instructions which can
 931     /// add a register and the immediate without having to materialize
 932     /// the immediate into a register.
 933     bool isLegalAddImmediate(int64_t Imm) const override;
 934
 935     /// Return the cost of the scaling factor used in the addressing
 936     /// mode represented by AM for this target, for a load/store
 937     /// of the specified type.
 938     /// If the AM is supported, the return value must be >= 0.
 939     /// If the AM is not supported, it returns a negative value.
 940     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
 941                              unsigned AS) const override;
 942
 943     bool isVectorShiftByScalarCheap(Type *Ty) const override;
 944
 945     /// Return true if it's free to truncate a value of
 946     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
 947     /// register EAX to i16 by referencing its sub-register AX.
 948     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
 949     bool isTruncateFree(EVT VT1, EVT VT2) const override;
 950
 951     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
 952
 953     /// Return true if any actual instruction that defines a
 954     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
 955     /// register. This does not necessarily include registers defined in
 956     /// unknown ways, such as incoming arguments, or copies from unknown
 957     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
 958     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
 959     /// all instructions that define 32-bit values implicit zero-extend the
 960     /// result out to 64 bits.
 961     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
 962     bool isZExtFree(EVT VT1, EVT VT2) const override;
 963     bool isZExtFree(SDValue Val, EVT VT2) const override;
 964
 965     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
 966     /// extend node) is profitable.
 967     bool isVectorLoadExtDesirable(SDValue) const override;
 968
 969     /// Return true if an FMA operation is faster than a pair of fmul and fadd
 970     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
 971     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
 972     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
 973
 974     /// Return true if it's profitable to narrow
 975     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
 976     /// from i32 to i8 but not from i32 to i16.
 977     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
 978
 979     /// Given an intrinsic, checks if on the target the intrinsic will need to map
 980     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
 981     /// true and stores the intrinsic information into the IntrinsicInfo that was
 982     /// passed to the function.
 983     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
 984                             MachineFunction &MF,
 985                             unsigned Intrinsic) const override;
 986
 987     /// Returns true if the target can instruction select the
 988     /// specified FP immediate natively. If false, the legalizer will
 989     /// materialize the FP immediate as a load from a constant pool.
 990     bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
 991
 992     /// Targets can use this to indicate that they only support *some*
 993     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
 994     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
 995     /// be legal.
 996     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
 997
 998     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
 999     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1000     /// constant pool entry.
1001     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1002
1003     /// Returns true if lowering to a jump table is allowed.
1004     bool areJTsAllowed(const Function *Fn) const override;
1005
1006     /// If true, then instruction selection should
1007     /// seek to shrink the FP constant of the specified type to a smaller type
1008     /// in order to save space and / or reduce runtime.
1009     bool ShouldShrinkFPConstant(EVT VT) const override {
1010       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1011       // expensive than a straight movsd. On the other hand, it's important to
1012       // shrink long double fp constant since fldt is very slow.
1013       return !X86ScalarSSEf64 || VT == MVT::f80;
1014     }
1015
1016     /// Return true if we believe it is correct and profitable to reduce the
1017     /// load node to a smaller type.
1018     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1019                                EVT NewVT) const override;
1020
1021     /// Return true if the specified scalar FP type is computed in an SSE
1022     /// register, not on the X87 floating point stack.
1023     bool isScalarFPTypeInSSEReg(EVT VT) const {
1024       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1025              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1026     }
1027
1028     /// Returns true if it is beneficial to convert a load of a constant
1029     /// to just the constant itself.
1030     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1031                                            Type *Ty) const override;
1032
1033     bool convertSelectOfConstantsToMath(EVT VT) const override;
1034
1035     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1036     /// with this index.
1037     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1038                                  unsigned Index) const override;
1039
1040     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1041                                       unsigned AddrSpace) const override {
1042       // If we can replace more than 2 scalar stores, there will be a reduction
1043       // in instructions even after we add a vector constant load.
1044       return NumElem > 2;
1045     }
1046
1047     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
1048
1049     /// Intel processors have a unified instruction and data cache
1050     const char * getClearCacheBuiltinName() const override {
1051       return nullptr; // nothing to do, move along.
1052     }
1053
1054     unsigned getRegisterByName(const char* RegName, EVT VT,
1055                                SelectionDAG &DAG) const override;
1056
1057     /// If a physical register, this returns the register that receives the
1058     /// exception address on entry to an EH pad.
1059     unsigned
1060     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1061
1062     /// If a physical register, this returns the register that receives the
1063     /// exception typeid on entry to a landing pad.
1064     unsigned
1065     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1066
1067     virtual bool needsFixedCatchObjects() const override;
1068
1069     /// This method returns a target specific FastISel object,
1070     /// or null if the target does not support "fast" ISel.
1071     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1072                              const TargetLibraryInfo *libInfo) const override;
1073
1074     /// If the target has a standard location for the stack protector cookie,
1075     /// returns the address of that location. Otherwise, returns nullptr.
1076     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1077
1078     bool useLoadStackGuardNode() const override;
1079     bool useStackGuardXorFP() const override;
1080     void insertSSPDeclarations(Module &M) const override;
1081     Value *getSDagStackGuard(const Module &M) const override;
1082     Value *getSSPStackGuardCheck(const Module &M) const override;
1083     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1084                                 const SDLoc &DL) const override;
1085
1086
1087     /// Return true if the target stores SafeStack pointer at a fixed offset in
1088     /// some non-standard address space, and populates the address space and
1089     /// offset as appropriate.
1090     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1091
1092     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1093                       SelectionDAG &DAG) const;
1094
1095     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1096
1097     /// Customize the preferred legalization strategy for certain types.
1098     LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
1099
1100     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1101                                       EVT VT) const override;
1102
1103     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1104                                            CallingConv::ID CC,
1105                                            EVT VT) const override;
1106
1107     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1108
1109     bool supportSwiftError() const override;
1110
1111     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1112
1113     bool hasVectorBlend() const override { return true; }
1114
1115     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1116
1117     /// Lower interleaved load(s) into target specific
1118     /// instructions/intrinsics.
1119     bool lowerInterleavedLoad(LoadInst *LI,
1120                               ArrayRef<ShuffleVectorInst *> Shuffles,
1121                               ArrayRef<unsigned> Indices,
1122                               unsigned Factor) const override;
1123
1124     /// Lower interleaved store(s) into target specific
1125     /// instructions/intrinsics.
1126     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1127                                unsigned Factor) const override;
1128
1129     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1130                                    SDValue Addr, SelectionDAG &DAG)
1131                                    const override;
1132
1133   protected:
1134     std::pair<const TargetRegisterClass *, uint8_t>
1135     findRepresentativeClass(const TargetRegisterInfo *TRI,
1136                             MVT VT) const override;
1137
1138   private:
1139     /// Keep a reference to the X86Subtarget around so that we can
1140     /// make the right decision when generating code for different targets.
1141     const X86Subtarget &Subtarget;
1142
1143     /// Select between SSE or x87 floating point ops.
1144     /// When SSE is available, use it for f32 operations.
1145     /// When SSE2 is available, use it for f64 operations.
1146     bool X86ScalarSSEf32;
1147     bool X86ScalarSSEf64;
1148
1149     /// A list of legal FP immediates.
1150     std::vector<APFloat> LegalFPImmediates;
1151
1152     /// Indicate that this x86 target can instruction
1153     /// select the specified FP immediate natively.
1154     void addLegalFPImmediate(const APFloat& Imm) {
1155       LegalFPImmediates.push_back(Imm);
1156     }
1157
1158     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1159                             CallingConv::ID CallConv, bool isVarArg,
1160                             const SmallVectorImpl<ISD::InputArg> &Ins,
1161                             const SDLoc &dl, SelectionDAG &DAG,
1162                             SmallVectorImpl<SDValue> &InVals,
1163                             uint32_t *RegMask) const;
1164     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1165                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1166                              const SDLoc &dl, SelectionDAG &DAG,
1167                              const CCValAssign &VA, MachineFrameInfo &MFI,
1168                              unsigned i) const;
1169     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1170                              const SDLoc &dl, SelectionDAG &DAG,
1171                              const CCValAssign &VA,
1172                              ISD::ArgFlagsTy Flags) const;
1173
1174     // Call lowering helpers.
1175
1176     /// Check whether the call is eligible for tail call optimization. Targets
1177     /// that want to do tail call optimization should implement this function.
1178     bool IsEligibleForTailCallOptimization(SDValue Callee,
1179                                            CallingConv::ID CalleeCC,
1180                                            bool isVarArg,
1181                                            bool isCalleeStructRet,
1182                                            bool isCallerStructRet,
1183                                            Type *RetTy,
1184                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1185                                     const SmallVectorImpl<SDValue> &OutVals,
1186                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1187                                            SelectionDAG& DAG) const;
1188     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1189                                     SDValue Chain, bool IsTailCall,
1190                                     bool Is64Bit, int FPDiff,
1191                                     const SDLoc &dl) const;
1192
1193     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1194                                          SelectionDAG &DAG) const;
1195
1196     unsigned getAddressSpace(void) const;
1197
1198     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
1199                                                bool isSigned,
1200                                                bool isReplace) const;
1201
1202     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1203     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1204     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1205     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1206
1207     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1208                                   const unsigned char OpFlags = 0) const;
1209     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1210     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1211     SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1212                                int64_t Offset, SelectionDAG &DAG) const;
1213     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1214     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1215     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1216
1217     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1218     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1219     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1220     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1221     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1222     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1223     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1224     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1225     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1226     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1227     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1228     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1229     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1230     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1231     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1232     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1233     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1234     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1235     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1236     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1237     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1238     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1239     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1240     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1241     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1242     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1243
1244     SDValue
1245     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1246                          const SmallVectorImpl<ISD::InputArg> &Ins,
1247                          const SDLoc &dl, SelectionDAG &DAG,
1248                          SmallVectorImpl<SDValue> &InVals) const override;
1249     SDValue LowerCall(CallLoweringInfo &CLI,
1250                       SmallVectorImpl<SDValue> &InVals) const override;
1251
1252     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1253                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1254                         const SmallVectorImpl<SDValue> &OutVals,
1255                         const SDLoc &dl, SelectionDAG &DAG) const override;
1256
1257     bool supportSplitCSR(MachineFunction *MF) const override {
1258       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1259           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1260     }
1261     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1262     void insertCopiesSplitCSR(
1263       MachineBasicBlock *Entry,
1264       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1265
1266     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1267
1268     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1269
1270     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1271                             ISD::NodeType ExtendKind) const override;
1272
1273     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1274                         bool isVarArg,
1275                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1276                         LLVMContext &Context) const override;
1277
1278     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1279
1280     TargetLoweringBase::AtomicExpansionKind
1281     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1282     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1283     TargetLoweringBase::AtomicExpansionKind
1284     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1285
1286     LoadInst *
1287     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1288
1289     bool needsCmpXchgNb(Type *MemType) const;
1290
1291     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1292                                 MachineBasicBlock *DispatchBB, int FI) const;
1293
1294     // Utility function to emit the low-level va_arg code for X86-64.
1295     MachineBasicBlock *
1296     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1297                                   MachineBasicBlock *MBB) const;
1298
1299     /// Utility function to emit the xmm reg save portion of va_start.
1300     MachineBasicBlock *
1301     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1302                                              MachineBasicBlock *BB) const;
1303
1304     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1305                                                  MachineInstr &MI2,
1306                                                  MachineBasicBlock *BB) const;
1307
1308     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1309                                          MachineBasicBlock *BB) const;
1310
1311     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1312                                            MachineBasicBlock *BB) const;
1313
1314     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1315                                            MachineBasicBlock *BB) const;
1316
1317     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1318                                            MachineBasicBlock *BB) const;
1319
1320     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1321                                             MachineBasicBlock *BB) const;
1322
1323     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1324                                           MachineBasicBlock *BB) const;
1325
1326     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1327                                           MachineBasicBlock *BB) const;
1328
1329     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1330                                             MachineBasicBlock *BB) const;
1331
1332     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1333                                         MachineBasicBlock *MBB) const;
1334
1335     void emitSetJmpShadowStackFix(MachineInstr &MI,
1336                                   MachineBasicBlock *MBB) const;
1337
1338     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1339                                          MachineBasicBlock *MBB) const;
1340
1341     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1342                                                  MachineBasicBlock *MBB) const;
1343
1344     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1345                                      MachineBasicBlock *MBB) const;
1346
1347     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1348                                              MachineBasicBlock *MBB) const;
1349
1350     /// Emit nodes that will be selected as "test Op0,Op0", or something
1351     /// equivalent, for use with the given x86 condition code.
1352     SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
1353                      SelectionDAG &DAG) const;
1354
1355     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1356     /// equivalent, for use with the given x86 condition code.
1357     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1358                     SelectionDAG &DAG) const;
1359
1360     /// Convert a comparison if required by the subtarget.
1361     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1362
1363     /// Check if replacement of SQRT with RSQRT should be disabled.
1364     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1365
1366     /// Use rsqrt* to speed up sqrt calculations.
1367     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1368                             int &RefinementSteps, bool &UseOneConstNR,
1369                             bool Reciprocal) const override;
1370
1371     /// Use rcp* to speed up fdiv calculations.
1372     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1373                              int &RefinementSteps) const override;
1374
1375     /// Reassociate floating point divisions into multiply by reciprocal.
1376     unsigned combineRepeatedFPDivisors() const override;
1377   };
1378
1379   namespace X86 {
1380     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1381                              const TargetLibraryInfo *libInfo);
1382   } // end namespace X86
1383
1384   // Base class for all X86 non-masked store operations.
1385   class X86StoreSDNode : public MemSDNode {
1386   public:
1387     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1388                    SDVTList VTs, EVT MemVT,
1389                    MachineMemOperand *MMO)
1390       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1391     const SDValue &getValue() const { return getOperand(1); }
1392     const SDValue &getBasePtr() const { return getOperand(2); }
1393
1394     static bool classof(const SDNode *N) {
1395       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1396         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1397     }
1398   };
1399
1400   // Base class for all X86 masked store operations.
1401   // The class has the same order of operands as MaskedStoreSDNode for
1402   // convenience.
1403   class X86MaskedStoreSDNode : public MemSDNode {
1404   public:
1405     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1406                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1407                          MachineMemOperand *MMO)
1408       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1409
1410     const SDValue &getBasePtr() const { return getOperand(1); }
1411     const SDValue &getMask()    const { return getOperand(2); }
1412     const SDValue &getValue()   const { return getOperand(3); }
1413
1414     static bool classof(const SDNode *N) {
1415       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1416         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1417     }
1418   };
1419
1420   // X86 Truncating Store with Signed saturation.
1421   class TruncSStoreSDNode : public X86StoreSDNode {
1422   public:
1423     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1424                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1425       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1426
1427     static bool classof(const SDNode *N) {
1428       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1429     }
1430   };
1431
1432   // X86 Truncating Store with Unsigned saturation.
1433   class TruncUSStoreSDNode : public X86StoreSDNode {
1434   public:
1435     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1436                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1437       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1438
1439     static bool classof(const SDNode *N) {
1440       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1441     }
1442   };
1443
1444   // X86 Truncating Masked Store with Signed saturation.
1445   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1446   public:
1447     MaskedTruncSStoreSDNode(unsigned Order,
1448                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1449                          MachineMemOperand *MMO)
1450       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1451
1452     static bool classof(const SDNode *N) {
1453       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1454     }
1455   };
1456
1457   // X86 Truncating Masked Store with Unsigned saturation.
1458   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1459   public:
1460     MaskedTruncUSStoreSDNode(unsigned Order,
1461                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1462                             MachineMemOperand *MMO)
1463       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1464
1465     static bool classof(const SDNode *N) {
1466       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1467     }
1468   };
1469
1470   // X86 specific Gather/Scatter nodes.
1471   // The class has the same order of operands as MaskedGatherScatterSDNode for
1472   // convenience.
1473   class X86MaskedGatherScatterSDNode : public MemSDNode {
1474   public:
1475     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1476                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1477                                  MachineMemOperand *MMO)
1478         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1479
1480     const SDValue &getBasePtr() const { return getOperand(3); }
1481     const SDValue &getIndex()   const { return getOperand(4); }
1482     const SDValue &getMask()    const { return getOperand(2); }
1483     const SDValue &getValue()   const { return getOperand(1); }
1484     const SDValue &getScale()   const { return getOperand(5); }
1485
1486     static bool classof(const SDNode *N) {
1487       return N->getOpcode() == X86ISD::MGATHER ||
1488              N->getOpcode() == X86ISD::MSCATTER;
1489     }
1490   };
1491
1492   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1493   public:
1494     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1495                           EVT MemVT, MachineMemOperand *MMO)
1496         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1497                                        MMO) {}
1498
1499     static bool classof(const SDNode *N) {
1500       return N->getOpcode() == X86ISD::MGATHER;
1501     }
1502   };
1503
1504   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1505   public:
1506     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1507                            EVT MemVT, MachineMemOperand *MMO)
1508         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1509                                        MMO) {}
1510
1511     static bool classof(const SDNode *N) {
1512       return N->getOpcode() == X86ISD::MSCATTER;
1513     }
1514   };
1515
1516   /// Generate unpacklo/unpackhi shuffle mask.
1517   template <typename T = int>
1518   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1519                                bool Unary) {
1520     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1521     int NumElts = VT.getVectorNumElements();
1522     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1523     for (int i = 0; i < NumElts; ++i) {
1524       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1525       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1526       Pos += (Unary ? 0 : NumElts * (i % 2));
1527       Pos += (Lo ? 0 : NumEltsInLane / 2);
1528       Mask.push_back(Pos);
1529     }
1530   }
1531
1532   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1533   /// mask index with the scaled sequential indices for an equivalent narrowed
1534   /// mask. This is the reverse process to canWidenShuffleElements, but can
1535   /// always succeed.
1536   template <typename T>
1537   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1538                         SmallVectorImpl<T> &ScaledMask) {
1539     assert(0 < Scale && "Unexpected scaling factor");
1540     int NumElts = Mask.size();
1541     ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1542
1543     for (int i = 0; i != NumElts; ++i) {
1544       int M = Mask[i];
1545
1546       // Repeat sentinel values in every mask element.
1547       if (M < 0) {
1548         for (int s = 0; s != Scale; ++s)
1549           ScaledMask[(Scale * i) + s] = M;
1550         continue;
1551       }
1552
1553       // Scale mask element and increment across each mask element.
1554       for (int s = 0; s != Scale; ++s)
1555         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1556     }
1557   }
1558 } // end namespace llvm
1559
1560 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H