contrib/llvm/lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the interfaces that X86 uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  17
  18 #include "llvm/CodeGen/CallingConvLower.h"
  19 #include "llvm/CodeGen/SelectionDAG.h"
  20 #include "llvm/CodeGen/TargetLowering.h"
  21 #include "llvm/Target/TargetOptions.h"
  22
  23 namespace llvm {
  24   class X86Subtarget;
  25   class X86TargetMachine;
  26
  27   namespace X86ISD {
  28     // X86 Specific DAG Nodes
  29     enum NodeType : unsigned {
  30       // Start the numbering where the builtin ops leave off.
  31       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  32
  33       /// Bit scan forward.
  34       BSF,
  35       /// Bit scan reverse.
  36       BSR,
  37
  38       /// Double shift instructions. These correspond to
  39       /// X86::SHLDxx and X86::SHRDxx instructions.
  40       SHLD,
  41       SHRD,
  42
  43       /// Bitwise logical AND of floating point values. This corresponds
  44       /// to X86::ANDPS or X86::ANDPD.
  45       FAND,
  46
  47       /// Bitwise logical OR of floating point values. This corresponds
  48       /// to X86::ORPS or X86::ORPD.
  49       FOR,
  50
  51       /// Bitwise logical XOR of floating point values. This corresponds
  52       /// to X86::XORPS or X86::XORPD.
  53       FXOR,
  54
  55       ///  Bitwise logical ANDNOT of floating point values. This
  56       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  57       FANDN,
  58
  59       /// These operations represent an abstract X86 call
  60       /// instruction, which includes a bunch of information.  In particular the
  61       /// operands of these node are:
  62       ///
  63       ///     #0 - The incoming token chain
  64       ///     #1 - The callee
  65       ///     #2 - The number of arg bytes the caller pushes on the stack.
  66       ///     #3 - The number of arg bytes the callee pops off the stack.
  67       ///     #4 - The value to pass in AL/AX/EAX (optional)
  68       ///     #5 - The value to pass in DL/DX/EDX (optional)
  69       ///
  70       /// The result values of these nodes are:
  71       ///
  72       ///     #0 - The outgoing token chain
  73       ///     #1 - The first register result value (optional)
  74       ///     #2 - The second register result value (optional)
  75       ///
  76       CALL,
  77
  78       /// This operation implements the lowering for readcyclecounter.
  79       RDTSC_DAG,
  80
  81       /// X86 Read Time-Stamp Counter and Processor ID.
  82       RDTSCP_DAG,
  83
  84       /// X86 Read Performance Monitoring Counters.
  85       RDPMC_DAG,
  86
  87       /// X86 compare and logical compare instructions.
  88       CMP, COMI, UCOMI,
  89
  90       /// X86 bit-test instructions.
  91       BT,
  92
  93       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  94       /// operand, usually produced by a CMP instruction.
  95       SETCC,
  96
  97       /// X86 Select
  98       SELECT, SELECTS,
  99
 100       // Same as SETCC except it's materialized with a sbb and the value is all
 101       // one's or all zero's.
 102       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
 103
 104       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
 105       /// Operands are two FP values to compare; result is a mask of
 106       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
 107       FSETCC,
 108
 109       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 110       /// with optional rounding mode.
 111       FSETCCM, FSETCCM_RND,
 112
 113       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 114       /// to select from. Operand 2 is the condition code, and operand 3 is the
 115       /// flag operand produced by a CMP or TEST instruction. It also writes a
 116       /// flag result.
 117       CMOV,
 118
 119       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 120       /// is the block to branch if condition is true, operand 2 is the
 121       /// condition code, and operand 3 is the flag operand produced by a CMP
 122       /// or TEST instruction.
 123       BRCOND,
 124
 125       /// Return with a flag operand. Operand 0 is the chain operand, operand
 126       /// 1 is the number of bytes of stack to pop.
 127       RET_FLAG,
 128
 129       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 130       IRET,
 131
 132       /// Repeat fill, corresponds to X86::REP_STOSx.
 133       REP_STOS,
 134
 135       /// Repeat move, corresponds to X86::REP_MOVSx.
 136       REP_MOVS,
 137
 138       /// On Darwin, this node represents the result of the popl
 139       /// at function entry, used for PIC code.
 140       GlobalBaseReg,
 141
 142       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 143       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 144       /// MCSymbol and TargetBlockAddress.
 145       Wrapper,
 146
 147       /// Special wrapper used under X86-64 PIC mode for RIP
 148       /// relative displacements.
 149       WrapperRIP,
 150
 151       /// Copies a 64-bit value from the low word of an XMM vector
 152       /// to an MMX vector.
 153       MOVDQ2Q,
 154
 155       /// Copies a 32-bit value from the low word of a MMX
 156       /// vector to a GPR.
 157       MMX_MOVD2W,
 158
 159       /// Copies a GPR into the low 32-bit word of a MMX vector
 160       /// and zero out the high word.
 161       MMX_MOVW2D,
 162
 163       /// Extract an 8-bit value from a vector and zero extend it to
 164       /// i32, corresponds to X86::PEXTRB.
 165       PEXTRB,
 166
 167       /// Extract a 16-bit value from a vector and zero extend it to
 168       /// i32, corresponds to X86::PEXTRW.
 169       PEXTRW,
 170
 171       /// Insert any element of a 4 x float vector into any element
 172       /// of a destination 4 x floatvector.
 173       INSERTPS,
 174
 175       /// Insert the lower 8-bits of a 32-bit value to a vector,
 176       /// corresponds to X86::PINSRB.
 177       PINSRB,
 178
 179       /// Insert the lower 16-bits of a 32-bit value to a vector,
 180       /// corresponds to X86::PINSRW.
 181       PINSRW,
 182
 183       /// Shuffle 16 8-bit values within a vector.
 184       PSHUFB,
 185
 186       /// Compute Sum of Absolute Differences.
 187       PSADBW,
 188       /// Compute Double Block Packed Sum-Absolute-Differences
 189       DBPSADBW,
 190
 191       /// Bitwise Logical AND NOT of Packed FP values.
 192       ANDNP,
 193
 194       /// Blend where the selector is an immediate.
 195       BLENDI,
 196
 197       /// Dynamic (non-constant condition) vector blend where only the sign bits
 198       /// of the condition elements are used. This is used to enforce that the
 199       /// condition mask is not valid for generic VSELECT optimizations.
 200       SHRUNKBLEND,
 201
 202       /// Combined add and sub on an FP vector.
 203       ADDSUB,
 204
 205       //  FP vector ops with rounding mode.
 206       FADD_RND, FADDS_RND,
 207       FSUB_RND, FSUBS_RND,
 208       FMUL_RND, FMULS_RND,
 209       FDIV_RND, FDIVS_RND,
 210       FMAX_RND, FMAXS_RND,
 211       FMIN_RND, FMINS_RND,
 212       FSQRT_RND, FSQRTS_RND,
 213
 214       // FP vector get exponent.
 215       FGETEXP_RND, FGETEXPS_RND,
 216       // Extract Normalized Mantissas.
 217       VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
 218       // FP Scale.
 219       SCALEF,
 220       SCALEFS,
 221
 222       // Integer add/sub with unsigned saturation.
 223       ADDUS,
 224       SUBUS,
 225
 226       // Integer add/sub with signed saturation.
 227       ADDS,
 228       SUBS,
 229
 230       // Unsigned Integer average.
 231       AVG,
 232
 233       /// Integer horizontal add/sub.
 234       HADD,
 235       HSUB,
 236
 237       /// Floating point horizontal add/sub.
 238       FHADD,
 239       FHSUB,
 240
 241       // Detect Conflicts Within a Vector
 242       CONFLICT,
 243
 244       /// Floating point max and min.
 245       FMAX, FMIN,
 246
 247       /// Commutative FMIN and FMAX.
 248       FMAXC, FMINC,
 249
 250       /// Scalar intrinsic floating point max and min.
 251       FMAXS, FMINS,
 252
 253       /// Floating point reciprocal-sqrt and reciprocal approximation.
 254       /// Note that these typically require refinement
 255       /// in order to obtain suitable precision.
 256       FRSQRT, FRCP,
 257
 258       // AVX-512 reciprocal approximations with a little more precision.
 259       RSQRT14, RSQRT14S, RCP14, RCP14S,
 260
 261       // Thread Local Storage.
 262       TLSADDR,
 263
 264       // Thread Local Storage. A call to get the start address
 265       // of the TLS block for the current module.
 266       TLSBASEADDR,
 267
 268       // Thread Local Storage.  When calling to an OS provided
 269       // thunk at the address from an earlier relocation.
 270       TLSCALL,
 271
 272       // Exception Handling helpers.
 273       EH_RETURN,
 274
 275       // SjLj exception handling setjmp.
 276       EH_SJLJ_SETJMP,
 277
 278       // SjLj exception handling longjmp.
 279       EH_SJLJ_LONGJMP,
 280
 281       // SjLj exception handling dispatch.
 282       EH_SJLJ_SETUP_DISPATCH,
 283
 284       /// Tail call return. See X86TargetLowering::LowerCall for
 285       /// the list of operands.
 286       TC_RETURN,
 287
 288       // Vector move to low scalar and zero higher vector elements.
 289       VZEXT_MOVL,
 290
 291       // Vector integer zero-extend.
 292       VZEXT,
 293       // Vector integer signed-extend.
 294       VSEXT,
 295
 296       // Vector integer truncate.
 297       VTRUNC,
 298       // Vector integer truncate with unsigned/signed saturation.
 299       VTRUNCUS, VTRUNCS,
 300
 301       // Vector FP extend.
 302       VFPEXT, VFPEXT_RND, VFPEXTS_RND,
 303
 304       // Vector FP round.
 305       VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
 306
 307       // Convert a vector to mask, set bits base on MSB.
 308       CVT2MASK,
 309
 310       // 128-bit vector logical left / right shift
 311       VSHLDQ, VSRLDQ,
 312
 313       // Vector shift elements
 314       VSHL, VSRL, VSRA,
 315
 316       // Vector variable shift right arithmetic.
 317       // Unlike ISD::SRA, in case shift count greater then element size
 318       // use sign bit to fill destination data element.
 319       VSRAV,
 320
 321       // Vector shift elements by immediate
 322       VSHLI, VSRLI, VSRAI,
 323
 324       // Shifts of mask registers.
 325       KSHIFTL, KSHIFTR,
 326
 327       // Bit rotate by immediate
 328       VROTLI, VROTRI,
 329
 330       // Vector packed double/float comparison.
 331       CMPP,
 332
 333       // Vector integer comparisons.
 334       PCMPEQ, PCMPGT,
 335       // Vector integer comparisons, the result is in a mask vector.
 336       PCMPEQM, PCMPGTM,
 337
 338       // v8i16 Horizontal minimum and position.
 339       PHMINPOS,
 340
 341       MULTISHIFT,
 342
 343       /// Vector comparison generating mask bits for fp and
 344       /// integer signed and unsigned data types.
 345       CMPM,
 346       CMPMU,
 347       // Vector comparison with rounding mode for FP values
 348       CMPM_RND,
 349
 350       // Arithmetic operations with FLAGS results.
 351       ADD, SUB, ADC, SBB, SMUL,
 352       INC, DEC, OR, XOR, AND,
 353
 354       // LOW, HI, FLAGS = umul LHS, RHS.
 355       UMUL,
 356
 357       // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
 358       SMUL8, UMUL8,
 359
 360       // 8-bit divrem that zero-extend the high result (AH).
 361       UDIVREM8_ZEXT_HREG,
 362       SDIVREM8_SEXT_HREG,
 363
 364       // X86-specific multiply by immediate.
 365       MUL_IMM,
 366
 367       // Vector sign bit extraction.
 368       MOVMSK,
 369
 370       // Vector bitwise comparisons.
 371       PTEST,
 372
 373       // Vector packed fp sign bitwise comparisons.
 374       TESTP,
 375
 376       // Vector "test" in AVX-512, the result is in a mask vector.
 377       TESTM,
 378       TESTNM,
 379
 380       // OR/AND test for masks.
 381       KORTEST,
 382       KTEST,
 383
 384       // Several flavors of instructions with vector shuffle behaviors.
 385       // Saturated signed/unnsigned packing.
 386       PACKSS,
 387       PACKUS,
 388       // Intra-lane alignr.
 389       PALIGNR,
 390       // AVX512 inter-lane alignr.
 391       VALIGN,
 392       PSHUFD,
 393       PSHUFHW,
 394       PSHUFLW,
 395       SHUFP,
 396       // VBMI2 Concat & Shift.
 397       VSHLD,
 398       VSHRD,
 399       VSHLDV,
 400       VSHRDV,
 401       //Shuffle Packed Values at 128-bit granularity.
 402       SHUF128,
 403       MOVDDUP,
 404       MOVSHDUP,
 405       MOVSLDUP,
 406       MOVLHPS,
 407       MOVHLPS,
 408       MOVLPS,
 409       MOVLPD,
 410       MOVSD,
 411       MOVSS,
 412       UNPCKL,
 413       UNPCKH,
 414       VPERMILPV,
 415       VPERMILPI,
 416       VPERMI,
 417       VPERM2X128,
 418
 419       // Variable Permute (VPERM).
 420       // Res = VPERMV MaskV, V0
 421       VPERMV,
 422
 423       // 3-op Variable Permute (VPERMT2).
 424       // Res = VPERMV3 V0, MaskV, V1
 425       VPERMV3,
 426
 427       // 3-op Variable Permute overwriting the index (VPERMI2).
 428       // Res = VPERMIV3 V0, MaskV, V1
 429       VPERMIV3,
 430
 431       // Bitwise ternary logic.
 432       VPTERNLOG,
 433       // Fix Up Special Packed Float32/64 values.
 434       VFIXUPIMM,
 435       VFIXUPIMMS,
 436       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 437       VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
 438       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 439       VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
 440       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 441       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 442       // scaling part of the immediate.
 443       VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
 444       // Tests Types Of a FP Values for packed types.
 445       VFPCLASS,
 446       // Tests Types Of a FP Values for scalar types.
 447       VFPCLASSS,
 448
 449       // Broadcast scalar to vector.
 450       VBROADCAST,
 451       // Broadcast mask to vector.
 452       VBROADCASTM,
 453       // Broadcast subvector to vector.
 454       SUBV_BROADCAST,
 455
 456       /// SSE4A Extraction and Insertion.
 457       EXTRQI, INSERTQI,
 458
 459       // XOP arithmetic/logical shifts.
 460       VPSHA, VPSHL,
 461       // XOP signed/unsigned integer comparisons.
 462       VPCOM, VPCOMU,
 463       // XOP packed permute bytes.
 464       VPPERM,
 465       // XOP two source permutation.
 466       VPERMIL2,
 467
 468       // Vector multiply packed unsigned doubleword integers.
 469       PMULUDQ,
 470       // Vector multiply packed signed doubleword integers.
 471       PMULDQ,
 472       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 473       MULHRS,
 474
 475       // Multiply and Add Packed Integers.
 476       VPMADDUBSW, VPMADDWD,
 477
 478       // AVX512IFMA multiply and add.
 479       // NOTE: These are different than the instruction and perform
 480       // op0 x op1 + op2.
 481       VPMADD52L, VPMADD52H,
 482
 483       // VNNI
 484       VPDPBUSD,
 485       VPDPBUSDS,
 486       VPDPWSSD,
 487       VPDPWSSDS,
 488
 489       // FMA nodes.
 490       // We use the target independent ISD::FMA for the non-inverted case.
 491       FNMADD,
 492       FMSUB,
 493       FNMSUB,
 494       FMADDSUB,
 495       FMSUBADD,
 496
 497       // FMA with rounding mode.
 498       FMADD_RND,
 499       FNMADD_RND,
 500       FMSUB_RND,
 501       FNMSUB_RND,
 502       FMADDSUB_RND,
 503       FMSUBADD_RND,
 504
 505       // FMA4 specific scalar intrinsics bits that zero the non-scalar bits.
 506       FMADD4S, FNMADD4S, FMSUB4S, FNMSUB4S,
 507
 508       // Scalar intrinsic FMA.
 509       FMADDS1, FMADDS3,
 510       FNMADDS1, FNMADDS3,
 511       FMSUBS1, FMSUBS3,
 512       FNMSUBS1, FNMSUBS3,
 513
 514       // Scalar intrinsic FMA with rounding mode.
 515       // Two versions, passthru bits on op1 or op3.
 516       FMADDS1_RND, FMADDS3_RND,
 517       FNMADDS1_RND, FNMADDS3_RND,
 518       FMSUBS1_RND, FMSUBS3_RND,
 519       FNMSUBS1_RND, FNMSUBS3_RND,
 520
 521       // Compress and expand.
 522       COMPRESS,
 523       EXPAND,
 524
 525       // Bits shuffle
 526       VPSHUFBITQMB,
 527
 528       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 529       SINT_TO_FP_RND, UINT_TO_FP_RND,
 530       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 531
 532       // Vector float/double to signed/unsigned integer.
 533       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 534       // Scalar float/double to signed/unsigned integer.
 535       CVTS2SI_RND, CVTS2UI_RND,
 536
 537       // Vector float/double to signed/unsigned integer with truncation.
 538       CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
 539       // Scalar float/double to signed/unsigned integer with truncation.
 540       CVTTS2SI_RND, CVTTS2UI_RND,
 541
 542       // Vector signed/unsigned integer to float/double.
 543       CVTSI2P, CVTUI2P,
 544
 545       // Save xmm argument registers to the stack, according to %al. An operator
 546       // is needed so that this can be expanded with control flow.
 547       VASTART_SAVE_XMM_REGS,
 548
 549       // Windows's _chkstk call to do stack probing.
 550       WIN_ALLOCA,
 551
 552       // For allocating variable amounts of stack space when using
 553       // segmented stacks. Check if the current stacklet has enough space, and
 554       // falls back to heap allocation if not.
 555       SEG_ALLOCA,
 556
 557       // Memory barriers.
 558       MEMBARRIER,
 559       MFENCE,
 560
 561       // Store FP status word into i16 register.
 562       FNSTSW16r,
 563
 564       // Store contents of %ah into %eflags.
 565       SAHF,
 566
 567       // Get a random integer and indicate whether it is valid in CF.
 568       RDRAND,
 569
 570       // Get a NIST SP800-90B & C compliant random integer and
 571       // indicate whether it is valid in CF.
 572       RDSEED,
 573
 574       // SSE42 string comparisons.
 575       PCMPISTRI,
 576       PCMPESTRI,
 577
 578       // Test if in transactional execution.
 579       XTEST,
 580
 581       // ERI instructions.
 582       RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
 583
 584       // Conversions between float and half-float.
 585       CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
 586
 587       // Galois Field Arithmetic Instructions
 588       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 589
 590       // LWP insert record.
 591       LWPINS,
 592
 593       // Compare and swap.
 594       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 595       LCMPXCHG8_DAG,
 596       LCMPXCHG16_DAG,
 597       LCMPXCHG8_SAVE_EBX_DAG,
 598       LCMPXCHG16_SAVE_RBX_DAG,
 599
 600       /// LOCK-prefixed arithmetic read-modify-write instructions.
 601       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 602       LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
 603
 604       // Load, scalar_to_vector, and zero extend.
 605       VZEXT_LOAD,
 606
 607       // Store FP control world into i16 memory.
 608       FNSTCW16m,
 609
 610       /// This instruction implements FP_TO_SINT with the
 611       /// integer destination in memory and a FP reg source.  This corresponds
 612       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 613       /// has two inputs (token chain and address) and two outputs (int value
 614       /// and token chain).
 615       FP_TO_INT16_IN_MEM,
 616       FP_TO_INT32_IN_MEM,
 617       FP_TO_INT64_IN_MEM,
 618
 619       /// This instruction implements SINT_TO_FP with the
 620       /// integer source in memory and FP reg result.  This corresponds to the
 621       /// X86::FILD*m instructions. It has three inputs (token chain, address,
 622       /// and source type) and two outputs (FP value and token chain). FILD_FLAG
 623       /// also produces a flag).
 624       FILD,
 625       FILD_FLAG,
 626
 627       /// This instruction implements an extending load to FP stack slots.
 628       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 629       /// operand, ptr to load from, and a ValueType node indicating the type
 630       /// to load to.
 631       FLD,
 632
 633       /// This instruction implements a truncating store to FP stack
 634       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 635       /// chain operand, value to store, address, and a ValueType to store it
 636       /// as.
 637       FST,
 638
 639       /// This instruction grabs the address of the next argument
 640       /// from a va_list. (reads and modifies the va_list in memory)
 641       VAARG_64,
 642
 643       // Vector truncating store with unsigned/signed saturation
 644       VTRUNCSTOREUS, VTRUNCSTORES,
 645       // Vector truncating masked store with unsigned/signed saturation
 646       VMTRUNCSTOREUS, VMTRUNCSTORES,
 647
 648       // X86 specific gather and scatter
 649       MGATHER, MSCATTER,
 650
 651       // WARNING: Do not add anything in the end unless you want the node to
 652       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 653       // opcodes will be thought as target memory ops!
 654     };
 655   } // end namespace X86ISD
 656
 657   /// Define some predicates that are used for node matching.
 658   namespace X86 {
 659     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 660     bool isZeroNode(SDValue Elt);
 661
 662     /// Returns true of the given offset can be
 663     /// fit into displacement field of the instruction.
 664     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 665                                       bool hasSymbolicDisplacement = true);
 666
 667     /// Determines whether the callee is required to pop its
 668     /// own arguments. Callee pop is necessary to support tail calls.
 669     bool isCalleePop(CallingConv::ID CallingConv,
 670                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 671
 672   } // end namespace X86
 673
 674   //===--------------------------------------------------------------------===//
 675   //  X86 Implementation of the TargetLowering interface
 676   class X86TargetLowering final : public TargetLowering {
 677   public:
 678     explicit X86TargetLowering(const X86TargetMachine &TM,
 679                                const X86Subtarget &STI);
 680
 681     unsigned getJumpTableEncoding() const override;
 682     bool useSoftFloat() const override;
 683
 684     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 685                                ArgListTy &Args) const override;
 686
 687     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 688       return MVT::i8;
 689     }
 690
 691     const MCExpr *
 692     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 693                               const MachineBasicBlock *MBB, unsigned uid,
 694                               MCContext &Ctx) const override;
 695
 696     /// Returns relocation base for the given PIC jumptable.
 697     SDValue getPICJumpTableRelocBase(SDValue Table,
 698                                      SelectionDAG &DAG) const override;
 699     const MCExpr *
 700     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 701                                  unsigned JTI, MCContext &Ctx) const override;
 702
 703     /// Return the desired alignment for ByVal aggregate
 704     /// function arguments in the caller parameter area. For X86, aggregates
 705     /// that contains are placed at 16-byte boundaries while the rest are at
 706     /// 4-byte boundaries.
 707     unsigned getByValTypeAlignment(Type *Ty,
 708                                    const DataLayout &DL) const override;
 709
 710     /// Returns the target specific optimal type for load
 711     /// and store operations as a result of memset, memcpy, and memmove
 712     /// lowering. If DstAlign is zero that means it's safe to destination
 713     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 714     /// means there isn't a need to check it against alignment requirement,
 715     /// probably because the source does not need to be loaded. If 'IsMemset' is
 716     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 717     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 718     /// source is constant so it does not need to be loaded.
 719     /// It returns EVT::Other if the type should be determined using generic
 720     /// target-independent logic.
 721     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 722                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 723                             MachineFunction &MF) const override;
 724
 725     /// Returns true if it's safe to use load / store of the
 726     /// specified type to expand memcpy / memset inline. This is mostly true
 727     /// for all types except for some special cases. For example, on X86
 728     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 729     /// also does type conversion. Note the specified type doesn't have to be
 730     /// legal as the hook is used before type legalization.
 731     bool isSafeMemOpType(MVT VT) const override;
 732
 733     /// Returns true if the target allows unaligned memory accesses of the
 734     /// specified type. Returns whether it is "fast" in the last argument.
 735     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 736                                        bool *Fast) const override;
 737
 738     /// Provide custom lowering hooks for some operations.
 739     ///
 740     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 741
 742     /// Places new result values for the node in Results (their number
 743     /// and types must exactly match those of the original return values of
 744     /// the node), or leaves Results empty, which indicates that the node is not
 745     /// to be custom lowered after all.
 746     void LowerOperationWrapper(SDNode *N,
 747                                SmallVectorImpl<SDValue> &Results,
 748                                SelectionDAG &DAG) const override;
 749
 750     /// Replace the results of node with an illegal result
 751     /// type with new values built out of custom code.
 752     ///
 753     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 754                             SelectionDAG &DAG) const override;
 755
 756     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 757
 758     // Return true if it is profitable to combine a BUILD_VECTOR with a
 759     // stride-pattern to a shuffle and a truncate.
 760     // Example of such a combine:
 761     // v4i32 build_vector((extract_elt V, 1),
 762     //                    (extract_elt V, 3),
 763     //                    (extract_elt V, 5),
 764     //                    (extract_elt V, 7))
 765     //  -->
 766     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 767     // v4i64)
 768     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 769         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 770
 771     /// Return true if the target has native support for
 772     /// the specified value type and it is 'desirable' to use the type for the
 773     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 774     /// instruction encodings are longer and some i16 instructions are slow.
 775     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 776
 777     /// Return true if the target has native support for the
 778     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 779     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 780     /// and some i16 instructions are slow.
 781     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 782
 783     MachineBasicBlock *
 784     EmitInstrWithCustomInserter(MachineInstr &MI,
 785                                 MachineBasicBlock *MBB) const override;
 786
 787     /// This method returns the name of a target specific DAG node.
 788     const char *getTargetNodeName(unsigned Opcode) const override;
 789
 790     bool mergeStoresAfterLegalization() const override { return true; }
 791
 792     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 793                           const SelectionDAG &DAG) const override;
 794
 795     bool isCheapToSpeculateCttz() const override;
 796
 797     bool isCheapToSpeculateCtlz() const override;
 798
 799     bool isCtlzFast() const override;
 800
 801     bool hasBitPreservingFPLogic(EVT VT) const override {
 802       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 803     }
 804
 805     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 806       // If the pair to store is a mixture of float and int values, we will
 807       // save two bitwise instructions and one float-to-int instruction and
 808       // increase one store instruction. There is potentially a more
 809       // significant benefit because it avoids the float->int domain switch
 810       // for input value. So It is more likely a win.
 811       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 812           (LTy.isInteger() && HTy.isFloatingPoint()))
 813         return true;
 814       // If the pair only contains int values, we will save two bitwise
 815       // instructions and increase one store instruction (costing one more
 816       // store buffer). Since the benefit is more blurred so we leave
 817       // such pair out until we get testcase to prove it is a win.
 818       return false;
 819     }
 820
 821     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 822
 823     bool hasAndNotCompare(SDValue Y) const override;
 824
 825     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 826       return VT.isScalarInteger();
 827     }
 828
 829     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 830     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 831
 832     /// Return the value type to use for ISD::SETCC.
 833     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 834                            EVT VT) const override;
 835
 836     /// Determine which of the bits specified in Mask are known to be either
 837     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 838     void computeKnownBitsForTargetNode(const SDValue Op,
 839                                        KnownBits &Known,
 840                                        const APInt &DemandedElts,
 841                                        const SelectionDAG &DAG,
 842                                        unsigned Depth = 0) const override;
 843
 844     /// Determine the number of bits in the operation that are sign bits.
 845     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 846                                              const APInt &DemandedElts,
 847                                              const SelectionDAG &DAG,
 848                                              unsigned Depth) const override;
 849
 850     SDValue unwrapAddress(SDValue N) const override;
 851
 852     bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
 853                         int64_t &Offset) const override;
 854
 855     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 856
 857     bool ExpandInlineAsm(CallInst *CI) const override;
 858
 859     ConstraintType getConstraintType(StringRef Constraint) const override;
 860
 861     /// Examine constraint string and operand type and determine a weight value.
 862     /// The operand object must already have been set up with the operand type.
 863     ConstraintWeight
 864       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 865                                      const char *constraint) const override;
 866
 867     const char *LowerXConstraint(EVT ConstraintVT) const override;
 868
 869     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 870     /// add anything to Ops. If hasMemory is true it means one of the asm
 871     /// constraint of the inline asm instruction being processed is 'm'.
 872     void LowerAsmOperandForConstraint(SDValue Op,
 873                                       std::string &Constraint,
 874                                       std::vector<SDValue> &Ops,
 875                                       SelectionDAG &DAG) const override;
 876
 877     unsigned
 878     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 879       if (ConstraintCode == "i")
 880         return InlineAsm::Constraint_i;
 881       else if (ConstraintCode == "o")
 882         return InlineAsm::Constraint_o;
 883       else if (ConstraintCode == "v")
 884         return InlineAsm::Constraint_v;
 885       else if (ConstraintCode == "X")
 886         return InlineAsm::Constraint_X;
 887       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 888     }
 889
 890     /// Given a physical register constraint
 891     /// (e.g. {edx}), return the register number and the register class for the
 892     /// register.  This should only be used for C_Register constraints.  On
 893     /// error, this returns a register number of 0.
 894     std::pair<unsigned, const TargetRegisterClass *>
 895     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 896                                  StringRef Constraint, MVT VT) const override;
 897
 898     /// Return true if the addressing mode represented
 899     /// by AM is legal for this target, for a load/store of the specified type.
 900     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 901                                Type *Ty, unsigned AS,
 902                                Instruction *I = nullptr) const override;
 903
 904     /// Return true if the specified immediate is legal
 905     /// icmp immediate, that is the target has icmp instructions which can
 906     /// compare a register against the immediate without having to materialize
 907     /// the immediate into a register.
 908     bool isLegalICmpImmediate(int64_t Imm) const override;
 909
 910     /// Return true if the specified immediate is legal
 911     /// add immediate, that is the target has add instructions which can
 912     /// add a register and the immediate without having to materialize
 913     /// the immediate into a register.
 914     bool isLegalAddImmediate(int64_t Imm) const override;
 915
 916     /// \brief Return the cost of the scaling factor used in the addressing
 917     /// mode represented by AM for this target, for a load/store
 918     /// of the specified type.
 919     /// If the AM is supported, the return value must be >= 0.
 920     /// If the AM is not supported, it returns a negative value.
 921     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
 922                              unsigned AS) const override;
 923
 924     bool isVectorShiftByScalarCheap(Type *Ty) const override;
 925
 926     /// Return true if it's free to truncate a value of
 927     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
 928     /// register EAX to i16 by referencing its sub-register AX.
 929     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
 930     bool isTruncateFree(EVT VT1, EVT VT2) const override;
 931
 932     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
 933
 934     /// Return true if any actual instruction that defines a
 935     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
 936     /// register. This does not necessarily include registers defined in
 937     /// unknown ways, such as incoming arguments, or copies from unknown
 938     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
 939     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
 940     /// all instructions that define 32-bit values implicit zero-extend the
 941     /// result out to 64 bits.
 942     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
 943     bool isZExtFree(EVT VT1, EVT VT2) const override;
 944     bool isZExtFree(SDValue Val, EVT VT2) const override;
 945
 946     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
 947     /// extend node) is profitable.
 948     bool isVectorLoadExtDesirable(SDValue) const override;
 949
 950     /// Return true if an FMA operation is faster than a pair of fmul and fadd
 951     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
 952     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
 953     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
 954
 955     /// Return true if it's profitable to narrow
 956     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
 957     /// from i32 to i8 but not from i32 to i16.
 958     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
 959
 960     /// Given an intrinsic, checks if on the target the intrinsic will need to map
 961     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
 962     /// true and stores the intrinsic information into the IntrinsicInfo that was
 963     /// passed to the function.
 964     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
 965                             MachineFunction &MF,
 966                             unsigned Intrinsic) const override;
 967
 968     /// Returns true if the target can instruction select the
 969     /// specified FP immediate natively. If false, the legalizer will
 970     /// materialize the FP immediate as a load from a constant pool.
 971     bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
 972
 973     /// Targets can use this to indicate that they only support *some*
 974     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
 975     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
 976     /// be legal.
 977     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
 978
 979     /// Similar to isShuffleMaskLegal. This is used by Targets can use this to
 980     /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to
 981     /// replace a VAND with a constant pool entry.
 982     bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
 983                                 EVT VT) const override;
 984
 985     /// Returns true if lowering to a jump table is allowed.
 986     bool areJTsAllowed(const Function *Fn) const override;
 987
 988     /// If true, then instruction selection should
 989     /// seek to shrink the FP constant of the specified type to a smaller type
 990     /// in order to save space and / or reduce runtime.
 991     bool ShouldShrinkFPConstant(EVT VT) const override {
 992       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
 993       // expensive than a straight movsd. On the other hand, it's important to
 994       // shrink long double fp constant since fldt is very slow.
 995       return !X86ScalarSSEf64 || VT == MVT::f80;
 996     }
 997
 998     /// Return true if we believe it is correct and profitable to reduce the
 999     /// load node to a smaller type.
1000     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1001                                EVT NewVT) const override;
1002
1003     /// Return true if the specified scalar FP type is computed in an SSE
1004     /// register, not on the X87 floating point stack.
1005     bool isScalarFPTypeInSSEReg(EVT VT) const {
1006       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1007              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1008     }
1009
1010     /// \brief Returns true if it is beneficial to convert a load of a constant
1011     /// to just the constant itself.
1012     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1013                                            Type *Ty) const override;
1014
1015     bool convertSelectOfConstantsToMath(EVT VT) const override;
1016
1017     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1018     /// with this index.
1019     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1020                                  unsigned Index) const override;
1021
1022     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1023                                       unsigned AddrSpace) const override {
1024       // If we can replace more than 2 scalar stores, there will be a reduction
1025       // in instructions even after we add a vector constant load.
1026       return NumElem > 2;
1027     }
1028
1029     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
1030
1031     /// Intel processors have a unified instruction and data cache
1032     const char * getClearCacheBuiltinName() const override {
1033       return nullptr; // nothing to do, move along.
1034     }
1035
1036     unsigned getRegisterByName(const char* RegName, EVT VT,
1037                                SelectionDAG &DAG) const override;
1038
1039     /// If a physical register, this returns the register that receives the
1040     /// exception address on entry to an EH pad.
1041     unsigned
1042     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1043
1044     /// If a physical register, this returns the register that receives the
1045     /// exception typeid on entry to a landing pad.
1046     unsigned
1047     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1048
1049     virtual bool needsFixedCatchObjects() const override;
1050
1051     /// This method returns a target specific FastISel object,
1052     /// or null if the target does not support "fast" ISel.
1053     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1054                              const TargetLibraryInfo *libInfo) const override;
1055
1056     /// If the target has a standard location for the stack protector cookie,
1057     /// returns the address of that location. Otherwise, returns nullptr.
1058     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1059
1060     bool useLoadStackGuardNode() const override;
1061     bool useStackGuardXorFP() const override;
1062     void insertSSPDeclarations(Module &M) const override;
1063     Value *getSDagStackGuard(const Module &M) const override;
1064     Value *getSSPStackGuardCheck(const Module &M) const override;
1065     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1066                                 const SDLoc &DL) const override;
1067
1068
1069     /// Return true if the target stores SafeStack pointer at a fixed offset in
1070     /// some non-standard address space, and populates the address space and
1071     /// offset as appropriate.
1072     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1073
1074     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1075                       SelectionDAG &DAG) const;
1076
1077     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1078
1079     /// \brief Customize the preferred legalization strategy for certain types.
1080     LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
1081
1082     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1083
1084     bool supportSwiftError() const override;
1085
1086     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1087
1088     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1089
1090     /// \brief Lower interleaved load(s) into target specific
1091     /// instructions/intrinsics.
1092     bool lowerInterleavedLoad(LoadInst *LI,
1093                               ArrayRef<ShuffleVectorInst *> Shuffles,
1094                               ArrayRef<unsigned> Indices,
1095                               unsigned Factor) const override;
1096
1097     /// \brief Lower interleaved store(s) into target specific
1098     /// instructions/intrinsics.
1099     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1100                                unsigned Factor) const override;
1101
1102
1103     void finalizeLowering(MachineFunction &MF) const override;
1104
1105   protected:
1106     std::pair<const TargetRegisterClass *, uint8_t>
1107     findRepresentativeClass(const TargetRegisterInfo *TRI,
1108                             MVT VT) const override;
1109
1110   private:
1111     /// Keep a reference to the X86Subtarget around so that we can
1112     /// make the right decision when generating code for different targets.
1113     const X86Subtarget &Subtarget;
1114
1115     /// Select between SSE or x87 floating point ops.
1116     /// When SSE is available, use it for f32 operations.
1117     /// When SSE2 is available, use it for f64 operations.
1118     bool X86ScalarSSEf32;
1119     bool X86ScalarSSEf64;
1120
1121     /// A list of legal FP immediates.
1122     std::vector<APFloat> LegalFPImmediates;
1123
1124     /// Indicate that this x86 target can instruction
1125     /// select the specified FP immediate natively.
1126     void addLegalFPImmediate(const APFloat& Imm) {
1127       LegalFPImmediates.push_back(Imm);
1128     }
1129
1130     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1131                             CallingConv::ID CallConv, bool isVarArg,
1132                             const SmallVectorImpl<ISD::InputArg> &Ins,
1133                             const SDLoc &dl, SelectionDAG &DAG,
1134                             SmallVectorImpl<SDValue> &InVals,
1135                             uint32_t *RegMask) const;
1136     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1137                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1138                              const SDLoc &dl, SelectionDAG &DAG,
1139                              const CCValAssign &VA, MachineFrameInfo &MFI,
1140                              unsigned i) const;
1141     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1142                              const SDLoc &dl, SelectionDAG &DAG,
1143                              const CCValAssign &VA,
1144                              ISD::ArgFlagsTy Flags) const;
1145
1146     // Call lowering helpers.
1147
1148     /// Check whether the call is eligible for tail call optimization. Targets
1149     /// that want to do tail call optimization should implement this function.
1150     bool IsEligibleForTailCallOptimization(SDValue Callee,
1151                                            CallingConv::ID CalleeCC,
1152                                            bool isVarArg,
1153                                            bool isCalleeStructRet,
1154                                            bool isCallerStructRet,
1155                                            Type *RetTy,
1156                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1157                                     const SmallVectorImpl<SDValue> &OutVals,
1158                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1159                                            SelectionDAG& DAG) const;
1160     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1161                                     SDValue Chain, bool IsTailCall,
1162                                     bool Is64Bit, int FPDiff,
1163                                     const SDLoc &dl) const;
1164
1165     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1166                                          SelectionDAG &DAG) const;
1167
1168     unsigned getAddressSpace(void) const;
1169
1170     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
1171                                                bool isSigned,
1172                                                bool isReplace) const;
1173
1174     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1175     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1176     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1177     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1178
1179     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr) const;
1180     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1181     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1182     SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1183                                int64_t Offset, SelectionDAG &DAG) const;
1184     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1185     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1186     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1187
1188     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1189     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1190     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1191     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1192     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1193     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1194     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1195     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1196     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1197     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1198     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1199     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1200     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1201     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1202     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1203     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1204     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1205     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1206     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1207     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1208     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1209     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1210     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1211     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1212     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1213     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1214
1215     SDValue
1216     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1217                          const SmallVectorImpl<ISD::InputArg> &Ins,
1218                          const SDLoc &dl, SelectionDAG &DAG,
1219                          SmallVectorImpl<SDValue> &InVals) const override;
1220     SDValue LowerCall(CallLoweringInfo &CLI,
1221                       SmallVectorImpl<SDValue> &InVals) const override;
1222
1223     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1224                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1225                         const SmallVectorImpl<SDValue> &OutVals,
1226                         const SDLoc &dl, SelectionDAG &DAG) const override;
1227
1228     bool supportSplitCSR(MachineFunction *MF) const override {
1229       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1230           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1231     }
1232     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1233     void insertCopiesSplitCSR(
1234       MachineBasicBlock *Entry,
1235       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1236
1237     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1238
1239     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1240
1241     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1242                             ISD::NodeType ExtendKind) const override;
1243
1244     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1245                         bool isVarArg,
1246                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1247                         LLVMContext &Context) const override;
1248
1249     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1250
1251     TargetLoweringBase::AtomicExpansionKind
1252     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1253     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1254     TargetLoweringBase::AtomicExpansionKind
1255     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1256
1257     LoadInst *
1258     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1259
1260     bool needsCmpXchgNb(Type *MemType) const;
1261
1262     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1263                                 MachineBasicBlock *DispatchBB, int FI) const;
1264
1265     // Utility function to emit the low-level va_arg code for X86-64.
1266     MachineBasicBlock *
1267     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1268                                   MachineBasicBlock *MBB) const;
1269
1270     /// Utility function to emit the xmm reg save portion of va_start.
1271     MachineBasicBlock *
1272     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1273                                              MachineBasicBlock *BB) const;
1274
1275     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1276                                                  MachineInstr &MI2,
1277                                                  MachineBasicBlock *BB) const;
1278
1279     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1280                                          MachineBasicBlock *BB) const;
1281
1282     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1283                                            MachineBasicBlock *BB) const;
1284
1285     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1286                                            MachineBasicBlock *BB) const;
1287
1288     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1289                                            MachineBasicBlock *BB) const;
1290
1291     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1292                                             MachineBasicBlock *BB) const;
1293
1294     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1295                                           MachineBasicBlock *BB) const;
1296
1297     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1298                                           MachineBasicBlock *BB) const;
1299
1300     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1301                                             MachineBasicBlock *BB) const;
1302
1303     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1304                                         MachineBasicBlock *MBB) const;
1305
1306     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1307                                          MachineBasicBlock *MBB) const;
1308
1309     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1310                                      MachineBasicBlock *MBB) const;
1311
1312     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1313                                              MachineBasicBlock *MBB) const;
1314
1315     /// Emit nodes that will be selected as "test Op0,Op0", or something
1316     /// equivalent, for use with the given x86 condition code.
1317     SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
1318                      SelectionDAG &DAG) const;
1319
1320     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1321     /// equivalent, for use with the given x86 condition code.
1322     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1323                     SelectionDAG &DAG) const;
1324
1325     /// Convert a comparison if required by the subtarget.
1326     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1327
1328     /// Check if replacement of SQRT with RSQRT should be disabled.
1329     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1330
1331     /// Use rsqrt* to speed up sqrt calculations.
1332     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1333                             int &RefinementSteps, bool &UseOneConstNR,
1334                             bool Reciprocal) const override;
1335
1336     /// Use rcp* to speed up fdiv calculations.
1337     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1338                              int &RefinementSteps) const override;
1339
1340     /// Reassociate floating point divisions into multiply by reciprocal.
1341     unsigned combineRepeatedFPDivisors() const override;
1342   };
1343
1344   namespace X86 {
1345     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1346                              const TargetLibraryInfo *libInfo);
1347   } // end namespace X86
1348
1349   // Base class for all X86 non-masked store operations.
1350   class X86StoreSDNode : public MemSDNode {
1351   public:
1352     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1353                    SDVTList VTs, EVT MemVT,
1354                    MachineMemOperand *MMO)
1355       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1356     const SDValue &getValue() const { return getOperand(1); }
1357     const SDValue &getBasePtr() const { return getOperand(2); }
1358
1359     static bool classof(const SDNode *N) {
1360       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1361         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1362     }
1363   };
1364
1365   // Base class for all X86 masked store operations.
1366   // The class has the same order of operands as MaskedStoreSDNode for
1367   // convenience.
1368   class X86MaskedStoreSDNode : public MemSDNode {
1369   public:
1370     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1371                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1372                          MachineMemOperand *MMO)
1373       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1374
1375     const SDValue &getBasePtr() const { return getOperand(1); }
1376     const SDValue &getMask()    const { return getOperand(2); }
1377     const SDValue &getValue()   const { return getOperand(3); }
1378
1379     static bool classof(const SDNode *N) {
1380       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1381         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1382     }
1383   };
1384
1385   // X86 Truncating Store with Signed saturation.
1386   class TruncSStoreSDNode : public X86StoreSDNode {
1387   public:
1388     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1389                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1390       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1391
1392     static bool classof(const SDNode *N) {
1393       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1394     }
1395   };
1396
1397   // X86 Truncating Store with Unsigned saturation.
1398   class TruncUSStoreSDNode : public X86StoreSDNode {
1399   public:
1400     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1401                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1402       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1403
1404     static bool classof(const SDNode *N) {
1405       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1406     }
1407   };
1408
1409   // X86 Truncating Masked Store with Signed saturation.
1410   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1411   public:
1412     MaskedTruncSStoreSDNode(unsigned Order,
1413                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1414                          MachineMemOperand *MMO)
1415       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1416
1417     static bool classof(const SDNode *N) {
1418       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1419     }
1420   };
1421
1422   // X86 Truncating Masked Store with Unsigned saturation.
1423   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1424   public:
1425     MaskedTruncUSStoreSDNode(unsigned Order,
1426                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1427                             MachineMemOperand *MMO)
1428       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1429
1430     static bool classof(const SDNode *N) {
1431       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1432     }
1433   };
1434
1435   // X86 specific Gather/Scatter nodes.
1436   // The class has the same order of operands as MaskedGatherScatterSDNode for
1437   // convenience.
1438   class X86MaskedGatherScatterSDNode : public MemSDNode {
1439   public:
1440     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1441                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1442                                  MachineMemOperand *MMO)
1443         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1444
1445     const SDValue &getBasePtr() const { return getOperand(3); }
1446     const SDValue &getIndex()   const { return getOperand(4); }
1447     const SDValue &getMask()    const { return getOperand(2); }
1448     const SDValue &getValue()   const { return getOperand(1); }
1449
1450     static bool classof(const SDNode *N) {
1451       return N->getOpcode() == X86ISD::MGATHER ||
1452              N->getOpcode() == X86ISD::MSCATTER;
1453     }
1454   };
1455
1456   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1457   public:
1458     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1459                           EVT MemVT, MachineMemOperand *MMO)
1460         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1461                                        MMO) {}
1462
1463     static bool classof(const SDNode *N) {
1464       return N->getOpcode() == X86ISD::MGATHER;
1465     }
1466   };
1467
1468   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1469   public:
1470     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1471                            EVT MemVT, MachineMemOperand *MMO)
1472         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1473                                        MMO) {}
1474
1475     static bool classof(const SDNode *N) {
1476       return N->getOpcode() == X86ISD::MSCATTER;
1477     }
1478   };
1479
1480   /// Generate unpacklo/unpackhi shuffle mask.
1481   template <typename T = int>
1482   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1483                                bool Unary) {
1484     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1485     int NumElts = VT.getVectorNumElements();
1486     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1487     for (int i = 0; i < NumElts; ++i) {
1488       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1489       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1490       Pos += (Unary ? 0 : NumElts * (i % 2));
1491       Pos += (Lo ? 0 : NumEltsInLane / 2);
1492       Mask.push_back(Pos);
1493     }
1494   }
1495
1496   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1497   /// mask index with the scaled sequential indices for an equivalent narrowed
1498   /// mask. This is the reverse process to canWidenShuffleElements, but can
1499   /// always succeed.
1500   template <typename T>
1501   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1502                         SmallVectorImpl<T> &ScaledMask) {
1503     assert(0 < Scale && "Unexpected scaling factor");
1504     int NumElts = Mask.size();
1505     ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1506
1507     for (int i = 0; i != NumElts; ++i) {
1508       int M = Mask[i];
1509
1510       // Repeat sentinel values in every mask element.
1511       if (M < 0) {
1512         for (int s = 0; s != Scale; ++s)
1513           ScaledMask[(Scale * i) + s] = M;
1514         continue;
1515       }
1516
1517       // Scale mask element and increment across each mask element.
1518       for (int s = 0; s != Scale; ++s)
1519         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1520     }
1521   }
1522 } // end namespace llvm
1523
1524 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H