contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that X86 uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16
  17 #include "llvm/CodeGen/CallingConvLower.h"
  18 #include "llvm/CodeGen/SelectionDAG.h"
  19 #include "llvm/CodeGen/TargetLowering.h"
  20
  21 namespace llvm {
  22   class X86Subtarget;
  23   class X86TargetMachine;
  24
  25   namespace X86ISD {
  26     // X86 Specific DAG Nodes
  27     enum NodeType : unsigned {
  28       // Start the numbering where the builtin ops leave off.
  29       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  30
  31       /// Bit scan forward.
  32       BSF,
  33       /// Bit scan reverse.
  34       BSR,
  35
  36       /// Double shift instructions. These correspond to
  37       /// X86::SHLDxx and X86::SHRDxx instructions.
  38       SHLD,
  39       SHRD,
  40
  41       /// Bitwise logical AND of floating point values. This corresponds
  42       /// to X86::ANDPS or X86::ANDPD.
  43       FAND,
  44
  45       /// Bitwise logical OR of floating point values. This corresponds
  46       /// to X86::ORPS or X86::ORPD.
  47       FOR,
  48
  49       /// Bitwise logical XOR of floating point values. This corresponds
  50       /// to X86::XORPS or X86::XORPD.
  51       FXOR,
  52
  53       ///  Bitwise logical ANDNOT of floating point values. This
  54       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  55       FANDN,
  56
  57       /// These operations represent an abstract X86 call
  58       /// instruction, which includes a bunch of information.  In particular the
  59       /// operands of these node are:
  60       ///
  61       ///     #0 - The incoming token chain
  62       ///     #1 - The callee
  63       ///     #2 - The number of arg bytes the caller pushes on the stack.
  64       ///     #3 - The number of arg bytes the callee pops off the stack.
  65       ///     #4 - The value to pass in AL/AX/EAX (optional)
  66       ///     #5 - The value to pass in DL/DX/EDX (optional)
  67       ///
  68       /// The result values of these nodes are:
  69       ///
  70       ///     #0 - The outgoing token chain
  71       ///     #1 - The first register result value (optional)
  72       ///     #2 - The second register result value (optional)
  73       ///
  74       CALL,
  75
  76       /// Same as call except it adds the NoTrack prefix.
  77       NT_CALL,
  78
  79       /// X86 compare and logical compare instructions.
  80       CMP, COMI, UCOMI,
  81
  82       /// X86 bit-test instructions.
  83       BT,
  84
  85       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  86       /// operand, usually produced by a CMP instruction.
  87       SETCC,
  88
  89       /// X86 Select
  90       SELECTS,
  91
  92       // Same as SETCC except it's materialized with a sbb and the value is all
  93       // one's or all zero's.
  94       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
  95
  96       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
  97       /// Operands are two FP values to compare; result is a mask of
  98       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
  99       FSETCC,
 100
 101       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 102       /// and a version with SAE.
 103       FSETCCM, FSETCCM_SAE,
 104
 105       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 106       /// to select from. Operand 2 is the condition code, and operand 3 is the
 107       /// flag operand produced by a CMP or TEST instruction.
 108       CMOV,
 109
 110       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 111       /// is the block to branch if condition is true, operand 2 is the
 112       /// condition code, and operand 3 is the flag operand produced by a CMP
 113       /// or TEST instruction.
 114       BRCOND,
 115
 116       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 117       /// operand 1 is the target address.
 118       NT_BRIND,
 119
 120       /// Return with a flag operand. Operand 0 is the chain operand, operand
 121       /// 1 is the number of bytes of stack to pop.
 122       RET_FLAG,
 123
 124       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 125       IRET,
 126
 127       /// Repeat fill, corresponds to X86::REP_STOSx.
 128       REP_STOS,
 129
 130       /// Repeat move, corresponds to X86::REP_MOVSx.
 131       REP_MOVS,
 132
 133       /// On Darwin, this node represents the result of the popl
 134       /// at function entry, used for PIC code.
 135       GlobalBaseReg,
 136
 137       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 138       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 139       /// MCSymbol and TargetBlockAddress.
 140       Wrapper,
 141
 142       /// Special wrapper used under X86-64 PIC mode for RIP
 143       /// relative displacements.
 144       WrapperRIP,
 145
 146       /// Copies a 64-bit value from an MMX vector to the low word
 147       /// of an XMM vector, with the high word zero filled.
 148       MOVQ2DQ,
 149
 150       /// Copies a 64-bit value from the low word of an XMM vector
 151       /// to an MMX vector.
 152       MOVDQ2Q,
 153
 154       /// Copies a 32-bit value from the low word of a MMX
 155       /// vector to a GPR.
 156       MMX_MOVD2W,
 157
 158       /// Copies a GPR into the low 32-bit word of a MMX vector
 159       /// and zero out the high word.
 160       MMX_MOVW2D,
 161
 162       /// Extract an 8-bit value from a vector and zero extend it to
 163       /// i32, corresponds to X86::PEXTRB.
 164       PEXTRB,
 165
 166       /// Extract a 16-bit value from a vector and zero extend it to
 167       /// i32, corresponds to X86::PEXTRW.
 168       PEXTRW,
 169
 170       /// Insert any element of a 4 x float vector into any element
 171       /// of a destination 4 x floatvector.
 172       INSERTPS,
 173
 174       /// Insert the lower 8-bits of a 32-bit value to a vector,
 175       /// corresponds to X86::PINSRB.
 176       PINSRB,
 177
 178       /// Insert the lower 16-bits of a 32-bit value to a vector,
 179       /// corresponds to X86::PINSRW.
 180       PINSRW,
 181
 182       /// Shuffle 16 8-bit values within a vector.
 183       PSHUFB,
 184
 185       /// Compute Sum of Absolute Differences.
 186       PSADBW,
 187       /// Compute Double Block Packed Sum-Absolute-Differences
 188       DBPSADBW,
 189
 190       /// Bitwise Logical AND NOT of Packed FP values.
 191       ANDNP,
 192
 193       /// Blend where the selector is an immediate.
 194       BLENDI,
 195
 196       /// Dynamic (non-constant condition) vector blend where only the sign bits
 197       /// of the condition elements are used. This is used to enforce that the
 198       /// condition mask is not valid for generic VSELECT optimizations. This
 199       /// is also used to implement the intrinsics.
 200       /// Operands are in VSELECT order: MASK, TRUE, FALSE
 201       BLENDV,
 202
 203       /// Combined add and sub on an FP vector.
 204       ADDSUB,
 205
 206       //  FP vector ops with rounding mode.
 207       FADD_RND, FADDS, FADDS_RND,
 208       FSUB_RND, FSUBS, FSUBS_RND,
 209       FMUL_RND, FMULS, FMULS_RND,
 210       FDIV_RND, FDIVS, FDIVS_RND,
 211       FMAX_SAE, FMAXS_SAE,
 212       FMIN_SAE, FMINS_SAE,
 213       FSQRT_RND, FSQRTS, FSQRTS_RND,
 214
 215       // FP vector get exponent.
 216       FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
 217       // Extract Normalized Mantissas.
 218       VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
 219       // FP Scale.
 220       SCALEF, SCALEF_RND,
 221       SCALEFS, SCALEFS_RND,
 222
 223       // Unsigned Integer average.
 224       AVG,
 225
 226       /// Integer horizontal add/sub.
 227       HADD,
 228       HSUB,
 229
 230       /// Floating point horizontal add/sub.
 231       FHADD,
 232       FHSUB,
 233
 234       // Detect Conflicts Within a Vector
 235       CONFLICT,
 236
 237       /// Floating point max and min.
 238       FMAX, FMIN,
 239
 240       /// Commutative FMIN and FMAX.
 241       FMAXC, FMINC,
 242
 243       /// Scalar intrinsic floating point max and min.
 244       FMAXS, FMINS,
 245
 246       /// Floating point reciprocal-sqrt and reciprocal approximation.
 247       /// Note that these typically require refinement
 248       /// in order to obtain suitable precision.
 249       FRSQRT, FRCP,
 250
 251       // AVX-512 reciprocal approximations with a little more precision.
 252       RSQRT14, RSQRT14S, RCP14, RCP14S,
 253
 254       // Thread Local Storage.
 255       TLSADDR,
 256
 257       // Thread Local Storage. A call to get the start address
 258       // of the TLS block for the current module.
 259       TLSBASEADDR,
 260
 261       // Thread Local Storage.  When calling to an OS provided
 262       // thunk at the address from an earlier relocation.
 263       TLSCALL,
 264
 265       // Exception Handling helpers.
 266       EH_RETURN,
 267
 268       // SjLj exception handling setjmp.
 269       EH_SJLJ_SETJMP,
 270
 271       // SjLj exception handling longjmp.
 272       EH_SJLJ_LONGJMP,
 273
 274       // SjLj exception handling dispatch.
 275       EH_SJLJ_SETUP_DISPATCH,
 276
 277       /// Tail call return. See X86TargetLowering::LowerCall for
 278       /// the list of operands.
 279       TC_RETURN,
 280
 281       // Vector move to low scalar and zero higher vector elements.
 282       VZEXT_MOVL,
 283
 284       // Vector integer truncate.
 285       VTRUNC,
 286       // Vector integer truncate with unsigned/signed saturation.
 287       VTRUNCUS, VTRUNCS,
 288
 289       // Masked version of the above. Used when less than a 128-bit result is
 290       // produced since the mask only applies to the lower elements and can't
 291       // be represented by a select.
 292       // SRC, PASSTHRU, MASK
 293       VMTRUNC, VMTRUNCUS, VMTRUNCS,
 294
 295       // Vector FP extend.
 296       VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
 297
 298       // Vector FP round.
 299       VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
 300
 301       // Masked version of above. Used for v2f64->v4f32.
 302       // SRC, PASSTHRU, MASK
 303       VMFPROUND,
 304
 305       // 128-bit vector logical left / right shift
 306       VSHLDQ, VSRLDQ,
 307
 308       // Vector shift elements
 309       VSHL, VSRL, VSRA,
 310
 311       // Vector variable shift
 312       VSHLV, VSRLV, VSRAV,
 313
 314       // Vector shift elements by immediate
 315       VSHLI, VSRLI, VSRAI,
 316
 317       // Shifts of mask registers.
 318       KSHIFTL, KSHIFTR,
 319
 320       // Bit rotate by immediate
 321       VROTLI, VROTRI,
 322
 323       // Vector packed double/float comparison.
 324       CMPP,
 325
 326       // Vector integer comparisons.
 327       PCMPEQ, PCMPGT,
 328
 329       // v8i16 Horizontal minimum and position.
 330       PHMINPOS,
 331
 332       MULTISHIFT,
 333
 334       /// Vector comparison generating mask bits for fp and
 335       /// integer signed and unsigned data types.
 336       CMPM,
 337       // Vector comparison with SAE for FP values
 338       CMPM_SAE,
 339
 340       // Arithmetic operations with FLAGS results.
 341       ADD, SUB, ADC, SBB, SMUL, UMUL,
 342       OR, XOR, AND,
 343
 344       // Bit field extract.
 345       BEXTR,
 346
 347       // Zero High Bits Starting with Specified Bit Position.
 348       BZHI,
 349
 350       // X86-specific multiply by immediate.
 351       MUL_IMM,
 352
 353       // Vector sign bit extraction.
 354       MOVMSK,
 355
 356       // Vector bitwise comparisons.
 357       PTEST,
 358
 359       // Vector packed fp sign bitwise comparisons.
 360       TESTP,
 361
 362       // OR/AND test for masks.
 363       KORTEST,
 364       KTEST,
 365
 366       // ADD for masks.
 367       KADD,
 368
 369       // Several flavors of instructions with vector shuffle behaviors.
 370       // Saturated signed/unnsigned packing.
 371       PACKSS,
 372       PACKUS,
 373       // Intra-lane alignr.
 374       PALIGNR,
 375       // AVX512 inter-lane alignr.
 376       VALIGN,
 377       PSHUFD,
 378       PSHUFHW,
 379       PSHUFLW,
 380       SHUFP,
 381       // VBMI2 Concat & Shift.
 382       VSHLD,
 383       VSHRD,
 384       VSHLDV,
 385       VSHRDV,
 386       //Shuffle Packed Values at 128-bit granularity.
 387       SHUF128,
 388       MOVDDUP,
 389       MOVSHDUP,
 390       MOVSLDUP,
 391       MOVLHPS,
 392       MOVHLPS,
 393       MOVSD,
 394       MOVSS,
 395       UNPCKL,
 396       UNPCKH,
 397       VPERMILPV,
 398       VPERMILPI,
 399       VPERMI,
 400       VPERM2X128,
 401
 402       // Variable Permute (VPERM).
 403       // Res = VPERMV MaskV, V0
 404       VPERMV,
 405
 406       // 3-op Variable Permute (VPERMT2).
 407       // Res = VPERMV3 V0, MaskV, V1
 408       VPERMV3,
 409
 410       // Bitwise ternary logic.
 411       VPTERNLOG,
 412       // Fix Up Special Packed Float32/64 values.
 413       VFIXUPIMM, VFIXUPIMM_SAE,
 414       VFIXUPIMMS, VFIXUPIMMS_SAE,
 415       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 416       VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
 417       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 418       VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
 419       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 420       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 421       // scaling part of the immediate.
 422       VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
 423       // Tests Types Of a FP Values for packed types.
 424       VFPCLASS,
 425       // Tests Types Of a FP Values for scalar types.
 426       VFPCLASSS,
 427
 428       // Broadcast (splat) scalar or element 0 of a vector. If the operand is
 429       // a vector, this node may change the vector length as part of the splat.
 430       VBROADCAST,
 431       // Broadcast mask to vector.
 432       VBROADCASTM,
 433       // Broadcast subvector to vector.
 434       SUBV_BROADCAST,
 435
 436       /// SSE4A Extraction and Insertion.
 437       EXTRQI, INSERTQI,
 438
 439       // XOP arithmetic/logical shifts.
 440       VPSHA, VPSHL,
 441       // XOP signed/unsigned integer comparisons.
 442       VPCOM, VPCOMU,
 443       // XOP packed permute bytes.
 444       VPPERM,
 445       // XOP two source permutation.
 446       VPERMIL2,
 447
 448       // Vector multiply packed unsigned doubleword integers.
 449       PMULUDQ,
 450       // Vector multiply packed signed doubleword integers.
 451       PMULDQ,
 452       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 453       MULHRS,
 454
 455       // Multiply and Add Packed Integers.
 456       VPMADDUBSW, VPMADDWD,
 457
 458       // AVX512IFMA multiply and add.
 459       // NOTE: These are different than the instruction and perform
 460       // op0 x op1 + op2.
 461       VPMADD52L, VPMADD52H,
 462
 463       // VNNI
 464       VPDPBUSD,
 465       VPDPBUSDS,
 466       VPDPWSSD,
 467       VPDPWSSDS,
 468
 469       // FMA nodes.
 470       // We use the target independent ISD::FMA for the non-inverted case.
 471       FNMADD,
 472       FMSUB,
 473       FNMSUB,
 474       FMADDSUB,
 475       FMSUBADD,
 476
 477       // FMA with rounding mode.
 478       FMADD_RND,
 479       FNMADD_RND,
 480       FMSUB_RND,
 481       FNMSUB_RND,
 482       FMADDSUB_RND,
 483       FMSUBADD_RND,
 484
 485       // Compress and expand.
 486       COMPRESS,
 487       EXPAND,
 488
 489       // Bits shuffle
 490       VPSHUFBITQMB,
 491
 492       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 493       SINT_TO_FP_RND, UINT_TO_FP_RND,
 494       SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
 495       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 496
 497       // Vector float/double to signed/unsigned integer.
 498       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 499       // Scalar float/double to signed/unsigned integer.
 500       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 501
 502       // Vector float/double to signed/unsigned integer with truncation.
 503       CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
 504       // Scalar float/double to signed/unsigned integer with truncation.
 505       CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
 506
 507       // Vector signed/unsigned integer to float/double.
 508       CVTSI2P, CVTUI2P,
 509
 510       // Masked versions of above. Used for v2f64->v4f32.
 511       // SRC, PASSTHRU, MASK
 512       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
 513       MCVTSI2P, MCVTUI2P,
 514
 515       // Vector float to bfloat16.
 516       // Convert TWO packed single data to one packed BF16 data
 517       CVTNE2PS2BF16,
 518       // Convert packed single data to packed BF16 data
 519       CVTNEPS2BF16,
 520       // Masked version of above.
 521       // SRC, PASSTHRU, MASK
 522       MCVTNEPS2BF16,
 523
 524       // Dot product of BF16 pairs to accumulated into
 525       // packed single precision.
 526       DPBF16PS,
 527
 528       // Save xmm argument registers to the stack, according to %al. An operator
 529       // is needed so that this can be expanded with control flow.
 530       VASTART_SAVE_XMM_REGS,
 531
 532       // Windows's _chkstk call to do stack probing.
 533       WIN_ALLOCA,
 534
 535       // For allocating variable amounts of stack space when using
 536       // segmented stacks. Check if the current stacklet has enough space, and
 537       // falls back to heap allocation if not.
 538       SEG_ALLOCA,
 539
 540       // Memory barriers.
 541       MEMBARRIER,
 542       MFENCE,
 543
 544       // Store FP status word into i16 register.
 545       FNSTSW16r,
 546
 547       // Store contents of %ah into %eflags.
 548       SAHF,
 549
 550       // Get a random integer and indicate whether it is valid in CF.
 551       RDRAND,
 552
 553       // Get a NIST SP800-90B & C compliant random integer and
 554       // indicate whether it is valid in CF.
 555       RDSEED,
 556
 557       // Protection keys
 558       // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
 559       // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
 560       // value for ECX.
 561       RDPKRU, WRPKRU,
 562
 563       // SSE42 string comparisons.
 564       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 565       // will emit one or two instructions based on which results are used. If
 566       // flags and index/mask this allows us to use a single instruction since
 567       // we won't have to pick and opcode for flags. Instead we can rely on the
 568       // DAG to CSE everything and decide at isel.
 569       PCMPISTR,
 570       PCMPESTR,
 571
 572       // Test if in transactional execution.
 573       XTEST,
 574
 575       // ERI instructions.
 576       RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
 577       RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
 578
 579       // Conversions between float and half-float.
 580       CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
 581
 582       // Masked version of above.
 583       // SRC, RND, PASSTHRU, MASK
 584       MCVTPS2PH,
 585
 586       // Galois Field Arithmetic Instructions
 587       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 588
 589       // LWP insert record.
 590       LWPINS,
 591
 592       // User level wait
 593       UMWAIT, TPAUSE,
 594
 595       // Enqueue Stores Instructions
 596       ENQCMD, ENQCMDS,
 597
 598       // For avx512-vp2intersect
 599       VP2INTERSECT,
 600
 601       /// X86 strict FP compare instructions.
 602       STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
 603       STRICT_FCMPS,
 604
 605       // Vector packed double/float comparison.
 606       STRICT_CMPP,
 607
 608       /// Vector comparison generating mask bits for fp and
 609       /// integer signed and unsigned data types.
 610       STRICT_CMPM,
 611
 612       // Vector float/double to signed/unsigned integer with truncation.
 613       STRICT_CVTTP2SI, STRICT_CVTTP2UI,
 614
 615       // Vector FP extend.
 616       STRICT_VFPEXT,
 617
 618       // Vector FP round.
 619       STRICT_VFPROUND,
 620
 621       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 622       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 623       // scaling part of the immediate.
 624       STRICT_VRNDSCALE,
 625
 626       // Vector signed/unsigned integer to float/double.
 627       STRICT_CVTSI2P, STRICT_CVTUI2P,
 628
 629       // Compare and swap.
 630       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 631       LCMPXCHG8_DAG,
 632       LCMPXCHG16_DAG,
 633       LCMPXCHG8_SAVE_EBX_DAG,
 634       LCMPXCHG16_SAVE_RBX_DAG,
 635
 636       /// LOCK-prefixed arithmetic read-modify-write instructions.
 637       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 638       LADD, LSUB, LOR, LXOR, LAND,
 639
 640       // Load, scalar_to_vector, and zero extend.
 641       VZEXT_LOAD,
 642
 643       // extract_vector_elt, store.
 644       VEXTRACT_STORE,
 645
 646       // scalar broadcast from memory
 647       VBROADCAST_LOAD,
 648
 649       // Store FP control world into i16 memory.
 650       FNSTCW16m,
 651
 652       /// This instruction implements FP_TO_SINT with the
 653       /// integer destination in memory and a FP reg source.  This corresponds
 654       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 655       /// has two inputs (token chain and address) and two outputs (int value
 656       /// and token chain). Memory VT specifies the type to store to.
 657       FP_TO_INT_IN_MEM,
 658
 659       /// This instruction implements SINT_TO_FP with the
 660       /// integer source in memory and FP reg result.  This corresponds to the
 661       /// X86::FILD*m instructions. It has two inputs (token chain and address)
 662       /// and two outputs (FP value and token chain). FILD_FLAG also produces a
 663       /// flag). The integer source type is specified by the memory VT.
 664       FILD,
 665       FILD_FLAG,
 666
 667       /// This instruction implements a fp->int store from FP stack
 668       /// slots. This corresponds to the fist instruction. It takes a
 669       /// chain operand, value to store, address, and glue. The memory VT
 670       /// specifies the type to store as.
 671       FIST,
 672
 673       /// This instruction implements an extending load to FP stack slots.
 674       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 675       /// operand, and ptr to load from. The memory VT specifies the type to
 676       /// load from.
 677       FLD,
 678
 679       /// This instruction implements a truncating store from FP stack
 680       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 681       /// chain operand, value to store, address, and glue. The memory VT
 682       /// specifies the type to store as.
 683       FST,
 684
 685       /// This instruction grabs the address of the next argument
 686       /// from a va_list. (reads and modifies the va_list in memory)
 687       VAARG_64,
 688
 689       // Vector truncating store with unsigned/signed saturation
 690       VTRUNCSTOREUS, VTRUNCSTORES,
 691       // Vector truncating masked store with unsigned/signed saturation
 692       VMTRUNCSTOREUS, VMTRUNCSTORES,
 693
 694       // X86 specific gather and scatter
 695       MGATHER, MSCATTER,
 696
 697       // WARNING: Do not add anything in the end unless you want the node to
 698       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 699       // opcodes will be thought as target memory ops!
 700     };
 701   } // end namespace X86ISD
 702
 703   /// Define some predicates that are used for node matching.
 704   namespace X86 {
 705     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 706     bool isZeroNode(SDValue Elt);
 707
 708     /// Returns true of the given offset can be
 709     /// fit into displacement field of the instruction.
 710     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 711                                       bool hasSymbolicDisplacement = true);
 712
 713     /// Determines whether the callee is required to pop its
 714     /// own arguments. Callee pop is necessary to support tail calls.
 715     bool isCalleePop(CallingConv::ID CallingConv,
 716                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 717
 718     /// If Op is a constant whose elements are all the same constant or
 719     /// undefined, return true and return the constant value in \p SplatVal.
 720     bool isConstantSplat(SDValue Op, APInt &SplatVal);
 721   } // end namespace X86
 722
 723   //===--------------------------------------------------------------------===//
 724   //  X86 Implementation of the TargetLowering interface
 725   class X86TargetLowering final : public TargetLowering {
 726   public:
 727     explicit X86TargetLowering(const X86TargetMachine &TM,
 728                                const X86Subtarget &STI);
 729
 730     unsigned getJumpTableEncoding() const override;
 731     bool useSoftFloat() const override;
 732
 733     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 734                                ArgListTy &Args) const override;
 735
 736     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 737       return MVT::i8;
 738     }
 739
 740     const MCExpr *
 741     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 742                               const MachineBasicBlock *MBB, unsigned uid,
 743                               MCContext &Ctx) const override;
 744
 745     /// Returns relocation base for the given PIC jumptable.
 746     SDValue getPICJumpTableRelocBase(SDValue Table,
 747                                      SelectionDAG &DAG) const override;
 748     const MCExpr *
 749     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 750                                  unsigned JTI, MCContext &Ctx) const override;
 751
 752     /// Return the desired alignment for ByVal aggregate
 753     /// function arguments in the caller parameter area. For X86, aggregates
 754     /// that contains are placed at 16-byte boundaries while the rest are at
 755     /// 4-byte boundaries.
 756     unsigned getByValTypeAlignment(Type *Ty,
 757                                    const DataLayout &DL) const override;
 758
 759     /// Returns the target specific optimal type for load
 760     /// and store operations as a result of memset, memcpy, and memmove
 761     /// lowering. If DstAlign is zero that means it's safe to destination
 762     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 763     /// means there isn't a need to check it against alignment requirement,
 764     /// probably because the source does not need to be loaded. If 'IsMemset' is
 765     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 766     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 767     /// source is constant so it does not need to be loaded.
 768     /// It returns EVT::Other if the type should be determined using generic
 769     /// target-independent logic.
 770     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 771                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 772                             const AttributeList &FuncAttributes) const override;
 773
 774     /// Returns true if it's safe to use load / store of the
 775     /// specified type to expand memcpy / memset inline. This is mostly true
 776     /// for all types except for some special cases. For example, on X86
 777     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 778     /// also does type conversion. Note the specified type doesn't have to be
 779     /// legal as the hook is used before type legalization.
 780     bool isSafeMemOpType(MVT VT) const override;
 781
 782     /// Returns true if the target allows unaligned memory accesses of the
 783     /// specified type. Returns whether it is "fast" in the last argument.
 784     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 785                                         MachineMemOperand::Flags Flags,
 786                                         bool *Fast) const override;
 787
 788     /// Provide custom lowering hooks for some operations.
 789     ///
 790     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 791
 792     /// Places new result values for the node in Results (their number
 793     /// and types must exactly match those of the original return values of
 794     /// the node), or leaves Results empty, which indicates that the node is not
 795     /// to be custom lowered after all.
 796     void LowerOperationWrapper(SDNode *N,
 797                                SmallVectorImpl<SDValue> &Results,
 798                                SelectionDAG &DAG) const override;
 799
 800     /// Replace the results of node with an illegal result
 801     /// type with new values built out of custom code.
 802     ///
 803     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 804                             SelectionDAG &DAG) const override;
 805
 806     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 807
 808     // Return true if it is profitable to combine a BUILD_VECTOR with a
 809     // stride-pattern to a shuffle and a truncate.
 810     // Example of such a combine:
 811     // v4i32 build_vector((extract_elt V, 1),
 812     //                    (extract_elt V, 3),
 813     //                    (extract_elt V, 5),
 814     //                    (extract_elt V, 7))
 815     //  -->
 816     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 817     // v4i64)
 818     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 819         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 820
 821     /// Return true if the target has native support for
 822     /// the specified value type and it is 'desirable' to use the type for the
 823     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 824     /// instruction encodings are longer and some i16 instructions are slow.
 825     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 826
 827     /// Return true if the target has native support for the
 828     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 829     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 830     /// and some i16 instructions are slow.
 831     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 832
 833     /// Return 1 if we can compute the negated form of the specified expression
 834     /// for the same cost as the expression itself, or 2 if we can compute the
 835     /// negated form more cheaply than the expression itself. Else return 0.
 836     char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations,
 837                             bool ForCodeSize, unsigned Depth) const override;
 838
 839     /// If isNegatibleForFree returns true, return the newly negated expression.
 840     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
 841                                  bool LegalOperations, bool ForCodeSize,
 842                                  unsigned Depth) const override;
 843
 844     MachineBasicBlock *
 845     EmitInstrWithCustomInserter(MachineInstr &MI,
 846                                 MachineBasicBlock *MBB) const override;
 847
 848     /// This method returns the name of a target specific DAG node.
 849     const char *getTargetNodeName(unsigned Opcode) const override;
 850
 851     /// Do not merge vector stores after legalization because that may conflict
 852     /// with x86-specific store splitting optimizations.
 853     bool mergeStoresAfterLegalization(EVT MemVT) const override {
 854       return !MemVT.isVector();
 855     }
 856
 857     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 858                           const SelectionDAG &DAG) const override;
 859
 860     bool isCheapToSpeculateCttz() const override;
 861
 862     bool isCheapToSpeculateCtlz() const override;
 863
 864     bool isCtlzFast() const override;
 865
 866     bool hasBitPreservingFPLogic(EVT VT) const override {
 867       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 868     }
 869
 870     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 871       // If the pair to store is a mixture of float and int values, we will
 872       // save two bitwise instructions and one float-to-int instruction and
 873       // increase one store instruction. There is potentially a more
 874       // significant benefit because it avoids the float->int domain switch
 875       // for input value. So It is more likely a win.
 876       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 877           (LTy.isInteger() && HTy.isFloatingPoint()))
 878         return true;
 879       // If the pair only contains int values, we will save two bitwise
 880       // instructions and increase one store instruction (costing one more
 881       // store buffer). Since the benefit is more blurred so we leave
 882       // such pair out until we get testcase to prove it is a win.
 883       return false;
 884     }
 885
 886     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 887
 888     bool hasAndNotCompare(SDValue Y) const override;
 889
 890     bool hasAndNot(SDValue Y) const override;
 891
 892     bool hasBitTest(SDValue X, SDValue Y) const override;
 893
 894     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
 895         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
 896         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
 897         SelectionDAG &DAG) const override;
 898
 899     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
 900                                            CombineLevel Level) const override;
 901
 902     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
 903
 904     bool
 905     shouldTransformSignedTruncationCheck(EVT XVT,
 906                                          unsigned KeptBits) const override {
 907       // For vectors, we don't have a preference..
 908       if (XVT.isVector())
 909         return false;
 910
 911       auto VTIsOk = [](EVT VT) -> bool {
 912         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 913                VT == MVT::i64;
 914       };
 915
 916       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
 917       // XVT will be larger than KeptBitsVT.
 918       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
 919       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
 920     }
 921
 922     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
 923
 924     bool shouldSplatInsEltVarIndex(EVT VT) const override;
 925
 926     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 927       return VT.isScalarInteger();
 928     }
 929
 930     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 931     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 932
 933     /// Return the value type to use for ISD::SETCC.
 934     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 935                            EVT VT) const override;
 936
 937     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 938                                       TargetLoweringOpt &TLO) const override;
 939
 940     /// Determine which of the bits specified in Mask are known to be either
 941     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 942     void computeKnownBitsForTargetNode(const SDValue Op,
 943                                        KnownBits &Known,
 944                                        const APInt &DemandedElts,
 945                                        const SelectionDAG &DAG,
 946                                        unsigned Depth = 0) const override;
 947
 948     /// Determine the number of bits in the operation that are sign bits.
 949     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 950                                              const APInt &DemandedElts,
 951                                              const SelectionDAG &DAG,
 952                                              unsigned Depth) const override;
 953
 954     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
 955                                                  const APInt &DemandedElts,
 956                                                  APInt &KnownUndef,
 957                                                  APInt &KnownZero,
 958                                                  TargetLoweringOpt &TLO,
 959                                                  unsigned Depth) const override;
 960
 961     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
 962                                            const APInt &DemandedBits,
 963                                            const APInt &DemandedElts,
 964                                            KnownBits &Known,
 965                                            TargetLoweringOpt &TLO,
 966                                            unsigned Depth) const override;
 967
 968     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
 969         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
 970         SelectionDAG &DAG, unsigned Depth) const override;
 971
 972     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
 973
 974     SDValue unwrapAddress(SDValue N) const override;
 975
 976     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 977
 978     bool ExpandInlineAsm(CallInst *CI) const override;
 979
 980     ConstraintType getConstraintType(StringRef Constraint) const override;
 981
 982     /// Examine constraint string and operand type and determine a weight value.
 983     /// The operand object must already have been set up with the operand type.
 984     ConstraintWeight
 985       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 986                                      const char *constraint) const override;
 987
 988     const char *LowerXConstraint(EVT ConstraintVT) const override;
 989
 990     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 991     /// add anything to Ops. If hasMemory is true it means one of the asm
 992     /// constraint of the inline asm instruction being processed is 'm'.
 993     void LowerAsmOperandForConstraint(SDValue Op,
 994                                       std::string &Constraint,
 995                                       std::vector<SDValue> &Ops,
 996                                       SelectionDAG &DAG) const override;
 997
 998     unsigned
 999     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1000       if (ConstraintCode == "o")
1001         return InlineAsm::Constraint_o;
1002       else if (ConstraintCode == "v")
1003         return InlineAsm::Constraint_v;
1004       else if (ConstraintCode == "X")
1005         return InlineAsm::Constraint_X;
1006       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1007     }
1008
1009     /// Handle Lowering flag assembly outputs.
1010     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
1011                                         const AsmOperandInfo &Constraint,
1012                                         SelectionDAG &DAG) const override;
1013
1014     /// Given a physical register constraint
1015     /// (e.g. {edx}), return the register number and the register class for the
1016     /// register.  This should only be used for C_Register constraints.  On
1017     /// error, this returns a register number of 0.
1018     std::pair<unsigned, const TargetRegisterClass *>
1019     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1020                                  StringRef Constraint, MVT VT) const override;
1021
1022     /// Return true if the addressing mode represented
1023     /// by AM is legal for this target, for a load/store of the specified type.
1024     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1025                                Type *Ty, unsigned AS,
1026                                Instruction *I = nullptr) const override;
1027
1028     /// Return true if the specified immediate is legal
1029     /// icmp immediate, that is the target has icmp instructions which can
1030     /// compare a register against the immediate without having to materialize
1031     /// the immediate into a register.
1032     bool isLegalICmpImmediate(int64_t Imm) const override;
1033
1034     /// Return true if the specified immediate is legal
1035     /// add immediate, that is the target has add instructions which can
1036     /// add a register and the immediate without having to materialize
1037     /// the immediate into a register.
1038     bool isLegalAddImmediate(int64_t Imm) const override;
1039
1040     bool isLegalStoreImmediate(int64_t Imm) const override;
1041
1042     /// Return the cost of the scaling factor used in the addressing
1043     /// mode represented by AM for this target, for a load/store
1044     /// of the specified type.
1045     /// If the AM is supported, the return value must be >= 0.
1046     /// If the AM is not supported, it returns a negative value.
1047     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
1048                              unsigned AS) const override;
1049
1050     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1051
1052     /// Add x86-specific opcodes to the default list.
1053     bool isBinOp(unsigned Opcode) const override;
1054
1055     /// Returns true if the opcode is a commutative binary operation.
1056     bool isCommutativeBinOp(unsigned Opcode) const override;
1057
1058     /// Return true if it's free to truncate a value of
1059     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1060     /// register EAX to i16 by referencing its sub-register AX.
1061     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1062     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1063
1064     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1065
1066     /// Return true if any actual instruction that defines a
1067     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1068     /// register. This does not necessarily include registers defined in
1069     /// unknown ways, such as incoming arguments, or copies from unknown
1070     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1071     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1072     /// all instructions that define 32-bit values implicit zero-extend the
1073     /// result out to 64 bits.
1074     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1075     bool isZExtFree(EVT VT1, EVT VT2) const override;
1076     bool isZExtFree(SDValue Val, EVT VT2) const override;
1077
1078     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1079     /// extend node) is profitable.
1080     bool isVectorLoadExtDesirable(SDValue) const override;
1081
1082     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1083     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1084     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1085     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1086                                     EVT VT) const override;
1087
1088     /// Return true if it's profitable to narrow
1089     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1090     /// from i32 to i8 but not from i32 to i16.
1091     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1092
1093     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1094     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1095     /// true and stores the intrinsic information into the IntrinsicInfo that was
1096     /// passed to the function.
1097     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1098                             MachineFunction &MF,
1099                             unsigned Intrinsic) const override;
1100
1101     /// Returns true if the target can instruction select the
1102     /// specified FP immediate natively. If false, the legalizer will
1103     /// materialize the FP immediate as a load from a constant pool.
1104     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1105                       bool ForCodeSize) const override;
1106
1107     /// Targets can use this to indicate that they only support *some*
1108     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1109     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1110     /// be legal.
1111     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1112
1113     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1114     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1115     /// constant pool entry.
1116     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1117
1118     /// Returns true if lowering to a jump table is allowed.
1119     bool areJTsAllowed(const Function *Fn) const override;
1120
1121     /// If true, then instruction selection should
1122     /// seek to shrink the FP constant of the specified type to a smaller type
1123     /// in order to save space and / or reduce runtime.
1124     bool ShouldShrinkFPConstant(EVT VT) const override {
1125       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1126       // expensive than a straight movsd. On the other hand, it's important to
1127       // shrink long double fp constant since fldt is very slow.
1128       return !X86ScalarSSEf64 || VT == MVT::f80;
1129     }
1130
1131     /// Return true if we believe it is correct and profitable to reduce the
1132     /// load node to a smaller type.
1133     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1134                                EVT NewVT) const override;
1135
1136     /// Return true if the specified scalar FP type is computed in an SSE
1137     /// register, not on the X87 floating point stack.
1138     bool isScalarFPTypeInSSEReg(EVT VT) const {
1139       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1140              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1141     }
1142
1143     /// Returns true if it is beneficial to convert a load of a constant
1144     /// to just the constant itself.
1145     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1146                                            Type *Ty) const override;
1147
1148     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1149
1150     bool convertSelectOfConstantsToMath(EVT VT) const override;
1151
1152     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1153                                 SDValue C) const override;
1154
1155     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1156     /// with this index.
1157     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1158                                  unsigned Index) const override;
1159
1160     /// Scalar ops always have equal or better analysis/performance/power than
1161     /// the vector equivalent, so this always makes sense if the scalar op is
1162     /// supported.
1163     bool shouldScalarizeBinop(SDValue) const override;
1164
1165     /// Extract of a scalar FP value from index 0 of a vector is free.
1166     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1167       EVT EltVT = VT.getScalarType();
1168       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1169     }
1170
1171     /// Overflow nodes should get combined/lowered to optimal instructions
1172     /// (they should allow eliminating explicit compares by getting flags from
1173     /// math ops).
1174     bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1175
1176     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1177                                       unsigned AddrSpace) const override {
1178       // If we can replace more than 2 scalar stores, there will be a reduction
1179       // in instructions even after we add a vector constant load.
1180       return NumElem > 2;
1181     }
1182
1183     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1184                                  const SelectionDAG &DAG,
1185                                  const MachineMemOperand &MMO) const override;
1186
1187     /// Intel processors have a unified instruction and data cache
1188     const char * getClearCacheBuiltinName() const override {
1189       return nullptr; // nothing to do, move along.
1190     }
1191
1192     Register getRegisterByName(const char* RegName, LLT VT,
1193                                const MachineFunction &MF) const override;
1194
1195     /// If a physical register, this returns the register that receives the
1196     /// exception address on entry to an EH pad.
1197     unsigned
1198     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1199
1200     /// If a physical register, this returns the register that receives the
1201     /// exception typeid on entry to a landing pad.
1202     unsigned
1203     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1204
1205     virtual bool needsFixedCatchObjects() const override;
1206
1207     /// This method returns a target specific FastISel object,
1208     /// or null if the target does not support "fast" ISel.
1209     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1210                              const TargetLibraryInfo *libInfo) const override;
1211
1212     /// If the target has a standard location for the stack protector cookie,
1213     /// returns the address of that location. Otherwise, returns nullptr.
1214     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1215
1216     bool useLoadStackGuardNode() const override;
1217     bool useStackGuardXorFP() const override;
1218     void insertSSPDeclarations(Module &M) const override;
1219     Value *getSDagStackGuard(const Module &M) const override;
1220     Function *getSSPStackGuardCheck(const Module &M) const override;
1221     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1222                                 const SDLoc &DL) const override;
1223
1224
1225     /// Return true if the target stores SafeStack pointer at a fixed offset in
1226     /// some non-standard address space, and populates the address space and
1227     /// offset as appropriate.
1228     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1229
1230     std::pair<SDValue, SDValue> BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
1231                                           SDValue StackSlot,
1232                                           SelectionDAG &DAG) const;
1233
1234     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1235
1236     /// Customize the preferred legalization strategy for certain types.
1237     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1238
1239     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1240                                       EVT VT) const override;
1241
1242     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1243                                            CallingConv::ID CC,
1244                                            EVT VT) const override;
1245
1246     unsigned getVectorTypeBreakdownForCallingConv(
1247         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1248         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1249
1250     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1251
1252     bool supportSwiftError() const override;
1253
1254     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1255
1256     unsigned getStackProbeSize(MachineFunction &MF) const;
1257
1258     bool hasVectorBlend() const override { return true; }
1259
1260     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1261
1262     /// Lower interleaved load(s) into target specific
1263     /// instructions/intrinsics.
1264     bool lowerInterleavedLoad(LoadInst *LI,
1265                               ArrayRef<ShuffleVectorInst *> Shuffles,
1266                               ArrayRef<unsigned> Indices,
1267                               unsigned Factor) const override;
1268
1269     /// Lower interleaved store(s) into target specific
1270     /// instructions/intrinsics.
1271     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1272                                unsigned Factor) const override;
1273
1274     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1275                                    SDValue Addr, SelectionDAG &DAG)
1276                                    const override;
1277
1278   protected:
1279     std::pair<const TargetRegisterClass *, uint8_t>
1280     findRepresentativeClass(const TargetRegisterInfo *TRI,
1281                             MVT VT) const override;
1282
1283   private:
1284     /// Keep a reference to the X86Subtarget around so that we can
1285     /// make the right decision when generating code for different targets.
1286     const X86Subtarget &Subtarget;
1287
1288     /// Select between SSE or x87 floating point ops.
1289     /// When SSE is available, use it for f32 operations.
1290     /// When SSE2 is available, use it for f64 operations.
1291     bool X86ScalarSSEf32;
1292     bool X86ScalarSSEf64;
1293
1294     /// A list of legal FP immediates.
1295     std::vector<APFloat> LegalFPImmediates;
1296
1297     /// Indicate that this x86 target can instruction
1298     /// select the specified FP immediate natively.
1299     void addLegalFPImmediate(const APFloat& Imm) {
1300       LegalFPImmediates.push_back(Imm);
1301     }
1302
1303     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1304                             CallingConv::ID CallConv, bool isVarArg,
1305                             const SmallVectorImpl<ISD::InputArg> &Ins,
1306                             const SDLoc &dl, SelectionDAG &DAG,
1307                             SmallVectorImpl<SDValue> &InVals,
1308                             uint32_t *RegMask) const;
1309     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1310                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1311                              const SDLoc &dl, SelectionDAG &DAG,
1312                              const CCValAssign &VA, MachineFrameInfo &MFI,
1313                              unsigned i) const;
1314     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1315                              const SDLoc &dl, SelectionDAG &DAG,
1316                              const CCValAssign &VA,
1317                              ISD::ArgFlagsTy Flags) const;
1318
1319     // Call lowering helpers.
1320
1321     /// Check whether the call is eligible for tail call optimization. Targets
1322     /// that want to do tail call optimization should implement this function.
1323     bool IsEligibleForTailCallOptimization(SDValue Callee,
1324                                            CallingConv::ID CalleeCC,
1325                                            bool isVarArg,
1326                                            bool isCalleeStructRet,
1327                                            bool isCallerStructRet,
1328                                            Type *RetTy,
1329                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1330                                     const SmallVectorImpl<SDValue> &OutVals,
1331                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1332                                            SelectionDAG& DAG) const;
1333     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1334                                     SDValue Chain, bool IsTailCall,
1335                                     bool Is64Bit, int FPDiff,
1336                                     const SDLoc &dl) const;
1337
1338     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1339                                          SelectionDAG &DAG) const;
1340
1341     unsigned getAddressSpace(void) const;
1342
1343     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned,
1344                             SDValue &Chain) const;
1345
1346     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1347     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1348     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1349     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1350
1351     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1352                                   const unsigned char OpFlags = 0) const;
1353     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1354     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1355     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1356     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1357     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1358
1359     /// Creates target global address or external symbol nodes for calls or
1360     /// other uses.
1361     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1362                                   bool ForCall) const;
1363
1364     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1365     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1366     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1367     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1368     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1369     SDValue LowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const;
1370     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1371     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1372     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1373     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1374     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1375     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1376     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1377     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1378     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1379     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1380     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1381     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1382     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1383     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1384     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1385     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1386     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1387     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1388     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1389     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1390     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1391     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1392     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1393
1394     SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
1395                           RTLIB::Libcall Call) const;
1396
1397     SDValue
1398     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1399                          const SmallVectorImpl<ISD::InputArg> &Ins,
1400                          const SDLoc &dl, SelectionDAG &DAG,
1401                          SmallVectorImpl<SDValue> &InVals) const override;
1402     SDValue LowerCall(CallLoweringInfo &CLI,
1403                       SmallVectorImpl<SDValue> &InVals) const override;
1404
1405     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1406                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1407                         const SmallVectorImpl<SDValue> &OutVals,
1408                         const SDLoc &dl, SelectionDAG &DAG) const override;
1409
1410     bool supportSplitCSR(MachineFunction *MF) const override {
1411       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1412           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1413     }
1414     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1415     void insertCopiesSplitCSR(
1416       MachineBasicBlock *Entry,
1417       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1418
1419     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1420
1421     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1422
1423     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1424                             ISD::NodeType ExtendKind) const override;
1425
1426     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1427                         bool isVarArg,
1428                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1429                         LLVMContext &Context) const override;
1430
1431     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1432
1433     TargetLoweringBase::AtomicExpansionKind
1434     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1435     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1436     TargetLoweringBase::AtomicExpansionKind
1437     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1438
1439     LoadInst *
1440     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1441
1442     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1443     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1444
1445     bool needsCmpXchgNb(Type *MemType) const;
1446
1447     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1448                                 MachineBasicBlock *DispatchBB, int FI) const;
1449
1450     // Utility function to emit the low-level va_arg code for X86-64.
1451     MachineBasicBlock *
1452     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1453                                   MachineBasicBlock *MBB) const;
1454
1455     /// Utility function to emit the xmm reg save portion of va_start.
1456     MachineBasicBlock *
1457     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1458                                              MachineBasicBlock *BB) const;
1459
1460     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1461                                                  MachineInstr &MI2,
1462                                                  MachineBasicBlock *BB) const;
1463
1464     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1465                                          MachineBasicBlock *BB) const;
1466
1467     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1468                                            MachineBasicBlock *BB) const;
1469
1470     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1471                                            MachineBasicBlock *BB) const;
1472
1473     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1474                                            MachineBasicBlock *BB) const;
1475
1476     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1477                                             MachineBasicBlock *BB) const;
1478
1479     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1480                                           MachineBasicBlock *BB) const;
1481
1482     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1483                                           MachineBasicBlock *BB) const;
1484
1485     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1486                                                 MachineBasicBlock *BB) const;
1487
1488     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1489                                         MachineBasicBlock *MBB) const;
1490
1491     void emitSetJmpShadowStackFix(MachineInstr &MI,
1492                                   MachineBasicBlock *MBB) const;
1493
1494     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1495                                          MachineBasicBlock *MBB) const;
1496
1497     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1498                                                  MachineBasicBlock *MBB) const;
1499
1500     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1501                                      MachineBasicBlock *MBB) const;
1502
1503     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1504                                              MachineBasicBlock *MBB) const;
1505
1506     /// Convert a comparison if required by the subtarget.
1507     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1508
1509     /// Emit flags for the given setcc condition and operands. Also returns the
1510     /// corresponding X86 condition code constant in X86CC.
1511     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1512                               const SDLoc &dl, SelectionDAG &DAG,
1513                               SDValue &X86CC, SDValue &Chain,
1514                               bool IsSignaling) const;
1515
1516     /// Check if replacement of SQRT with RSQRT should be disabled.
1517     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1518
1519     /// Use rsqrt* to speed up sqrt calculations.
1520     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1521                             int &RefinementSteps, bool &UseOneConstNR,
1522                             bool Reciprocal) const override;
1523
1524     /// Use rcp* to speed up fdiv calculations.
1525     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1526                              int &RefinementSteps) const override;
1527
1528     /// Reassociate floating point divisions into multiply by reciprocal.
1529     unsigned combineRepeatedFPDivisors() const override;
1530
1531     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1532                           SmallVectorImpl<SDNode *> &Created) const override;
1533   };
1534
1535   namespace X86 {
1536     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1537                              const TargetLibraryInfo *libInfo);
1538   } // end namespace X86
1539
1540   // Base class for all X86 non-masked store operations.
1541   class X86StoreSDNode : public MemSDNode {
1542   public:
1543     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1544                    SDVTList VTs, EVT MemVT,
1545                    MachineMemOperand *MMO)
1546       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1547     const SDValue &getValue() const { return getOperand(1); }
1548     const SDValue &getBasePtr() const { return getOperand(2); }
1549
1550     static bool classof(const SDNode *N) {
1551       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1552         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1553     }
1554   };
1555
1556   // Base class for all X86 masked store operations.
1557   // The class has the same order of operands as MaskedStoreSDNode for
1558   // convenience.
1559   class X86MaskedStoreSDNode : public MemSDNode {
1560   public:
1561     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1562                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1563                          MachineMemOperand *MMO)
1564       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1565
1566     const SDValue &getValue()   const { return getOperand(1); }
1567     const SDValue &getBasePtr() const { return getOperand(2); }
1568     const SDValue &getMask()    const { return getOperand(3); }
1569
1570     static bool classof(const SDNode *N) {
1571       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1572         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1573     }
1574   };
1575
1576   // X86 Truncating Store with Signed saturation.
1577   class TruncSStoreSDNode : public X86StoreSDNode {
1578   public:
1579     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1580                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1581       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1582
1583     static bool classof(const SDNode *N) {
1584       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1585     }
1586   };
1587
1588   // X86 Truncating Store with Unsigned saturation.
1589   class TruncUSStoreSDNode : public X86StoreSDNode {
1590   public:
1591     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1592                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1593       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1594
1595     static bool classof(const SDNode *N) {
1596       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1597     }
1598   };
1599
1600   // X86 Truncating Masked Store with Signed saturation.
1601   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1602   public:
1603     MaskedTruncSStoreSDNode(unsigned Order,
1604                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1605                          MachineMemOperand *MMO)
1606       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1607
1608     static bool classof(const SDNode *N) {
1609       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1610     }
1611   };
1612
1613   // X86 Truncating Masked Store with Unsigned saturation.
1614   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1615   public:
1616     MaskedTruncUSStoreSDNode(unsigned Order,
1617                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1618                             MachineMemOperand *MMO)
1619       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1620
1621     static bool classof(const SDNode *N) {
1622       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1623     }
1624   };
1625
1626   // X86 specific Gather/Scatter nodes.
1627   // The class has the same order of operands as MaskedGatherScatterSDNode for
1628   // convenience.
1629   class X86MaskedGatherScatterSDNode : public MemSDNode {
1630   public:
1631     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1632                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1633                                  MachineMemOperand *MMO)
1634         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1635
1636     const SDValue &getBasePtr() const { return getOperand(3); }
1637     const SDValue &getIndex()   const { return getOperand(4); }
1638     const SDValue &getMask()    const { return getOperand(2); }
1639     const SDValue &getScale()   const { return getOperand(5); }
1640
1641     static bool classof(const SDNode *N) {
1642       return N->getOpcode() == X86ISD::MGATHER ||
1643              N->getOpcode() == X86ISD::MSCATTER;
1644     }
1645   };
1646
1647   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1648   public:
1649     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1650                           EVT MemVT, MachineMemOperand *MMO)
1651         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1652                                        MMO) {}
1653
1654     const SDValue &getPassThru() const { return getOperand(1); }
1655
1656     static bool classof(const SDNode *N) {
1657       return N->getOpcode() == X86ISD::MGATHER;
1658     }
1659   };
1660
1661   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1662   public:
1663     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1664                            EVT MemVT, MachineMemOperand *MMO)
1665         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1666                                        MMO) {}
1667
1668     const SDValue &getValue() const { return getOperand(1); }
1669
1670     static bool classof(const SDNode *N) {
1671       return N->getOpcode() == X86ISD::MSCATTER;
1672     }
1673   };
1674
1675   /// Generate unpacklo/unpackhi shuffle mask.
1676   template <typename T = int>
1677   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1678                                bool Unary) {
1679     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1680     int NumElts = VT.getVectorNumElements();
1681     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1682     for (int i = 0; i < NumElts; ++i) {
1683       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1684       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1685       Pos += (Unary ? 0 : NumElts * (i % 2));
1686       Pos += (Lo ? 0 : NumEltsInLane / 2);
1687       Mask.push_back(Pos);
1688     }
1689   }
1690
1691   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1692   /// mask index with the scaled sequential indices for an equivalent narrowed
1693   /// mask. This is the reverse process to canWidenShuffleElements, but can
1694   /// always succeed.
1695   template <typename T>
1696   void scaleShuffleMask(size_t Scale, ArrayRef<T> Mask,
1697                         SmallVectorImpl<T> &ScaledMask) {
1698     assert(0 < Scale && "Unexpected scaling factor");
1699     size_t NumElts = Mask.size();
1700     ScaledMask.assign(NumElts * Scale, -1);
1701
1702     for (size_t i = 0; i != NumElts; ++i) {
1703       int M = Mask[i];
1704
1705       // Repeat sentinel values in every mask element.
1706       if (M < 0) {
1707         for (size_t s = 0; s != Scale; ++s)
1708           ScaledMask[(Scale * i) + s] = M;
1709         continue;
1710       }
1711
1712       // Scale mask element and increment across each mask element.
1713       for (size_t s = 0; s != Scale; ++s)
1714         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1715     }
1716   }
1717 } // end namespace llvm
1718
1719 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H