contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that X86 uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16
  17 #include "llvm/CodeGen/TargetLowering.h"
  18
  19 namespace llvm {
  20   class X86Subtarget;
  21   class X86TargetMachine;
  22
  23   namespace X86ISD {
  24     // X86 Specific DAG Nodes
  25   enum NodeType : unsigned {
  26     // Start the numbering where the builtin ops leave off.
  27     FIRST_NUMBER = ISD::BUILTIN_OP_END,
  28
  29     /// Bit scan forward.
  30     BSF,
  31     /// Bit scan reverse.
  32     BSR,
  33
  34     /// X86 funnel/double shift i16 instructions. These correspond to
  35     /// X86::SHLDW and X86::SHRDW instructions which have different amt
  36     /// modulo rules to generic funnel shifts.
  37     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
  38     FSHL,
  39     FSHR,
  40
  41     /// Bitwise logical AND of floating point values. This corresponds
  42     /// to X86::ANDPS or X86::ANDPD.
  43     FAND,
  44
  45     /// Bitwise logical OR of floating point values. This corresponds
  46     /// to X86::ORPS or X86::ORPD.
  47     FOR,
  48
  49     /// Bitwise logical XOR of floating point values. This corresponds
  50     /// to X86::XORPS or X86::XORPD.
  51     FXOR,
  52
  53     ///  Bitwise logical ANDNOT of floating point values. This
  54     /// corresponds to X86::ANDNPS or X86::ANDNPD.
  55     FANDN,
  56
  57     /// These operations represent an abstract X86 call
  58     /// instruction, which includes a bunch of information.  In particular the
  59     /// operands of these node are:
  60     ///
  61     ///     #0 - The incoming token chain
  62     ///     #1 - The callee
  63     ///     #2 - The number of arg bytes the caller pushes on the stack.
  64     ///     #3 - The number of arg bytes the callee pops off the stack.
  65     ///     #4 - The value to pass in AL/AX/EAX (optional)
  66     ///     #5 - The value to pass in DL/DX/EDX (optional)
  67     ///
  68     /// The result values of these nodes are:
  69     ///
  70     ///     #0 - The outgoing token chain
  71     ///     #1 - The first register result value (optional)
  72     ///     #2 - The second register result value (optional)
  73     ///
  74     CALL,
  75
  76     /// Same as call except it adds the NoTrack prefix.
  77     NT_CALL,
  78
  79     // Pseudo for a OBJC call that gets emitted together with a special
  80     // marker instruction.
  81     CALL_RVMARKER,
  82
  83     /// X86 compare and logical compare instructions.
  84     CMP,
  85     FCMP,
  86     COMI,
  87     UCOMI,
  88
  89     /// X86 bit-test instructions.
  90     BT,
  91
  92     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  93     /// operand, usually produced by a CMP instruction.
  94     SETCC,
  95
  96     /// X86 Select
  97     SELECTS,
  98
  99     // Same as SETCC except it's materialized with a sbb and the value is all
 100     // one's or all zero's.
 101     SETCC_CARRY, // R = carry_bit ? ~0 : 0
 102
 103     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
 104     /// Operands are two FP values to compare; result is a mask of
 105     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
 106     FSETCC,
 107
 108     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 109     /// and a version with SAE.
 110     FSETCCM,
 111     FSETCCM_SAE,
 112
 113     /// X86 conditional moves. Operand 0 and operand 1 are the two values
 114     /// to select from. Operand 2 is the condition code, and operand 3 is the
 115     /// flag operand produced by a CMP or TEST instruction.
 116     CMOV,
 117
 118     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 119     /// is the block to branch if condition is true, operand 2 is the
 120     /// condition code, and operand 3 is the flag operand produced by a CMP
 121     /// or TEST instruction.
 122     BRCOND,
 123
 124     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 125     /// operand 1 is the target address.
 126     NT_BRIND,
 127
 128     /// Return with a flag operand. Operand 0 is the chain operand, operand
 129     /// 1 is the number of bytes of stack to pop.
 130     RET_FLAG,
 131
 132     /// Return from interrupt. Operand 0 is the number of bytes to pop.
 133     IRET,
 134
 135     /// Repeat fill, corresponds to X86::REP_STOSx.
 136     REP_STOS,
 137
 138     /// Repeat move, corresponds to X86::REP_MOVSx.
 139     REP_MOVS,
 140
 141     /// On Darwin, this node represents the result of the popl
 142     /// at function entry, used for PIC code.
 143     GlobalBaseReg,
 144
 145     /// A wrapper node for TargetConstantPool, TargetJumpTable,
 146     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 147     /// MCSymbol and TargetBlockAddress.
 148     Wrapper,
 149
 150     /// Special wrapper used under X86-64 PIC mode for RIP
 151     /// relative displacements.
 152     WrapperRIP,
 153
 154     /// Copies a 64-bit value from an MMX vector to the low word
 155     /// of an XMM vector, with the high word zero filled.
 156     MOVQ2DQ,
 157
 158     /// Copies a 64-bit value from the low word of an XMM vector
 159     /// to an MMX vector.
 160     MOVDQ2Q,
 161
 162     /// Copies a 32-bit value from the low word of a MMX
 163     /// vector to a GPR.
 164     MMX_MOVD2W,
 165
 166     /// Copies a GPR into the low 32-bit word of a MMX vector
 167     /// and zero out the high word.
 168     MMX_MOVW2D,
 169
 170     /// Extract an 8-bit value from a vector and zero extend it to
 171     /// i32, corresponds to X86::PEXTRB.
 172     PEXTRB,
 173
 174     /// Extract a 16-bit value from a vector and zero extend it to
 175     /// i32, corresponds to X86::PEXTRW.
 176     PEXTRW,
 177
 178     /// Insert any element of a 4 x float vector into any element
 179     /// of a destination 4 x floatvector.
 180     INSERTPS,
 181
 182     /// Insert the lower 8-bits of a 32-bit value to a vector,
 183     /// corresponds to X86::PINSRB.
 184     PINSRB,
 185
 186     /// Insert the lower 16-bits of a 32-bit value to a vector,
 187     /// corresponds to X86::PINSRW.
 188     PINSRW,
 189
 190     /// Shuffle 16 8-bit values within a vector.
 191     PSHUFB,
 192
 193     /// Compute Sum of Absolute Differences.
 194     PSADBW,
 195     /// Compute Double Block Packed Sum-Absolute-Differences
 196     DBPSADBW,
 197
 198     /// Bitwise Logical AND NOT of Packed FP values.
 199     ANDNP,
 200
 201     /// Blend where the selector is an immediate.
 202     BLENDI,
 203
 204     /// Dynamic (non-constant condition) vector blend where only the sign bits
 205     /// of the condition elements are used. This is used to enforce that the
 206     /// condition mask is not valid for generic VSELECT optimizations. This
 207     /// is also used to implement the intrinsics.
 208     /// Operands are in VSELECT order: MASK, TRUE, FALSE
 209     BLENDV,
 210
 211     /// Combined add and sub on an FP vector.
 212     ADDSUB,
 213
 214     //  FP vector ops with rounding mode.
 215     FADD_RND,
 216     FADDS,
 217     FADDS_RND,
 218     FSUB_RND,
 219     FSUBS,
 220     FSUBS_RND,
 221     FMUL_RND,
 222     FMULS,
 223     FMULS_RND,
 224     FDIV_RND,
 225     FDIVS,
 226     FDIVS_RND,
 227     FMAX_SAE,
 228     FMAXS_SAE,
 229     FMIN_SAE,
 230     FMINS_SAE,
 231     FSQRT_RND,
 232     FSQRTS,
 233     FSQRTS_RND,
 234
 235     // FP vector get exponent.
 236     FGETEXP,
 237     FGETEXP_SAE,
 238     FGETEXPS,
 239     FGETEXPS_SAE,
 240     // Extract Normalized Mantissas.
 241     VGETMANT,
 242     VGETMANT_SAE,
 243     VGETMANTS,
 244     VGETMANTS_SAE,
 245     // FP Scale.
 246     SCALEF,
 247     SCALEF_RND,
 248     SCALEFS,
 249     SCALEFS_RND,
 250
 251     // Unsigned Integer average.
 252     AVG,
 253
 254     /// Integer horizontal add/sub.
 255     HADD,
 256     HSUB,
 257
 258     /// Floating point horizontal add/sub.
 259     FHADD,
 260     FHSUB,
 261
 262     // Detect Conflicts Within a Vector
 263     CONFLICT,
 264
 265     /// Floating point max and min.
 266     FMAX,
 267     FMIN,
 268
 269     /// Commutative FMIN and FMAX.
 270     FMAXC,
 271     FMINC,
 272
 273     /// Scalar intrinsic floating point max and min.
 274     FMAXS,
 275     FMINS,
 276
 277     /// Floating point reciprocal-sqrt and reciprocal approximation.
 278     /// Note that these typically require refinement
 279     /// in order to obtain suitable precision.
 280     FRSQRT,
 281     FRCP,
 282
 283     // AVX-512 reciprocal approximations with a little more precision.
 284     RSQRT14,
 285     RSQRT14S,
 286     RCP14,
 287     RCP14S,
 288
 289     // Thread Local Storage.
 290     TLSADDR,
 291
 292     // Thread Local Storage. A call to get the start address
 293     // of the TLS block for the current module.
 294     TLSBASEADDR,
 295
 296     // Thread Local Storage.  When calling to an OS provided
 297     // thunk at the address from an earlier relocation.
 298     TLSCALL,
 299
 300     // Exception Handling helpers.
 301     EH_RETURN,
 302
 303     // SjLj exception handling setjmp.
 304     EH_SJLJ_SETJMP,
 305
 306     // SjLj exception handling longjmp.
 307     EH_SJLJ_LONGJMP,
 308
 309     // SjLj exception handling dispatch.
 310     EH_SJLJ_SETUP_DISPATCH,
 311
 312     /// Tail call return. See X86TargetLowering::LowerCall for
 313     /// the list of operands.
 314     TC_RETURN,
 315
 316     // Vector move to low scalar and zero higher vector elements.
 317     VZEXT_MOVL,
 318
 319     // Vector integer truncate.
 320     VTRUNC,
 321     // Vector integer truncate with unsigned/signed saturation.
 322     VTRUNCUS,
 323     VTRUNCS,
 324
 325     // Masked version of the above. Used when less than a 128-bit result is
 326     // produced since the mask only applies to the lower elements and can't
 327     // be represented by a select.
 328     // SRC, PASSTHRU, MASK
 329     VMTRUNC,
 330     VMTRUNCUS,
 331     VMTRUNCS,
 332
 333     // Vector FP extend.
 334     VFPEXT,
 335     VFPEXT_SAE,
 336     VFPEXTS,
 337     VFPEXTS_SAE,
 338
 339     // Vector FP round.
 340     VFPROUND,
 341     VFPROUND_RND,
 342     VFPROUNDS,
 343     VFPROUNDS_RND,
 344
 345     // Masked version of above. Used for v2f64->v4f32.
 346     // SRC, PASSTHRU, MASK
 347     VMFPROUND,
 348
 349     // 128-bit vector logical left / right shift
 350     VSHLDQ,
 351     VSRLDQ,
 352
 353     // Vector shift elements
 354     VSHL,
 355     VSRL,
 356     VSRA,
 357
 358     // Vector variable shift
 359     VSHLV,
 360     VSRLV,
 361     VSRAV,
 362
 363     // Vector shift elements by immediate
 364     VSHLI,
 365     VSRLI,
 366     VSRAI,
 367
 368     // Shifts of mask registers.
 369     KSHIFTL,
 370     KSHIFTR,
 371
 372     // Bit rotate by immediate
 373     VROTLI,
 374     VROTRI,
 375
 376     // Vector packed double/float comparison.
 377     CMPP,
 378
 379     // Vector integer comparisons.
 380     PCMPEQ,
 381     PCMPGT,
 382
 383     // v8i16 Horizontal minimum and position.
 384     PHMINPOS,
 385
 386     MULTISHIFT,
 387
 388     /// Vector comparison generating mask bits for fp and
 389     /// integer signed and unsigned data types.
 390     CMPM,
 391     // Vector mask comparison generating mask bits for FP values.
 392     CMPMM,
 393     // Vector mask comparison with SAE for FP values.
 394     CMPMM_SAE,
 395
 396     // Arithmetic operations with FLAGS results.
 397     ADD,
 398     SUB,
 399     ADC,
 400     SBB,
 401     SMUL,
 402     UMUL,
 403     OR,
 404     XOR,
 405     AND,
 406
 407     // Bit field extract.
 408     BEXTR,
 409     BEXTRI,
 410
 411     // Zero High Bits Starting with Specified Bit Position.
 412     BZHI,
 413
 414     // Parallel extract and deposit.
 415     PDEP,
 416     PEXT,
 417
 418     // X86-specific multiply by immediate.
 419     MUL_IMM,
 420
 421     // Vector sign bit extraction.
 422     MOVMSK,
 423
 424     // Vector bitwise comparisons.
 425     PTEST,
 426
 427     // Vector packed fp sign bitwise comparisons.
 428     TESTP,
 429
 430     // OR/AND test for masks.
 431     KORTEST,
 432     KTEST,
 433
 434     // ADD for masks.
 435     KADD,
 436
 437     // Several flavors of instructions with vector shuffle behaviors.
 438     // Saturated signed/unnsigned packing.
 439     PACKSS,
 440     PACKUS,
 441     // Intra-lane alignr.
 442     PALIGNR,
 443     // AVX512 inter-lane alignr.
 444     VALIGN,
 445     PSHUFD,
 446     PSHUFHW,
 447     PSHUFLW,
 448     SHUFP,
 449     // VBMI2 Concat & Shift.
 450     VSHLD,
 451     VSHRD,
 452     VSHLDV,
 453     VSHRDV,
 454     // Shuffle Packed Values at 128-bit granularity.
 455     SHUF128,
 456     MOVDDUP,
 457     MOVSHDUP,
 458     MOVSLDUP,
 459     MOVLHPS,
 460     MOVHLPS,
 461     MOVSD,
 462     MOVSS,
 463     UNPCKL,
 464     UNPCKH,
 465     VPERMILPV,
 466     VPERMILPI,
 467     VPERMI,
 468     VPERM2X128,
 469
 470     // Variable Permute (VPERM).
 471     // Res = VPERMV MaskV, V0
 472     VPERMV,
 473
 474     // 3-op Variable Permute (VPERMT2).
 475     // Res = VPERMV3 V0, MaskV, V1
 476     VPERMV3,
 477
 478     // Bitwise ternary logic.
 479     VPTERNLOG,
 480     // Fix Up Special Packed Float32/64 values.
 481     VFIXUPIMM,
 482     VFIXUPIMM_SAE,
 483     VFIXUPIMMS,
 484     VFIXUPIMMS_SAE,
 485     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 486     VRANGE,
 487     VRANGE_SAE,
 488     VRANGES,
 489     VRANGES_SAE,
 490     // Reduce - Perform Reduction Transformation on scalar\packed FP.
 491     VREDUCE,
 492     VREDUCE_SAE,
 493     VREDUCES,
 494     VREDUCES_SAE,
 495     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 496     // Also used by the legacy (V)ROUND intrinsics where we mask out the
 497     // scaling part of the immediate.
 498     VRNDSCALE,
 499     VRNDSCALE_SAE,
 500     VRNDSCALES,
 501     VRNDSCALES_SAE,
 502     // Tests Types Of a FP Values for packed types.
 503     VFPCLASS,
 504     // Tests Types Of a FP Values for scalar types.
 505     VFPCLASSS,
 506
 507     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
 508     // a vector, this node may change the vector length as part of the splat.
 509     VBROADCAST,
 510     // Broadcast mask to vector.
 511     VBROADCASTM,
 512
 513     /// SSE4A Extraction and Insertion.
 514     EXTRQI,
 515     INSERTQI,
 516
 517     // XOP arithmetic/logical shifts.
 518     VPSHA,
 519     VPSHL,
 520     // XOP signed/unsigned integer comparisons.
 521     VPCOM,
 522     VPCOMU,
 523     // XOP packed permute bytes.
 524     VPPERM,
 525     // XOP two source permutation.
 526     VPERMIL2,
 527
 528     // Vector multiply packed unsigned doubleword integers.
 529     PMULUDQ,
 530     // Vector multiply packed signed doubleword integers.
 531     PMULDQ,
 532     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 533     MULHRS,
 534
 535     // Multiply and Add Packed Integers.
 536     VPMADDUBSW,
 537     VPMADDWD,
 538
 539     // AVX512IFMA multiply and add.
 540     // NOTE: These are different than the instruction and perform
 541     // op0 x op1 + op2.
 542     VPMADD52L,
 543     VPMADD52H,
 544
 545     // VNNI
 546     VPDPBUSD,
 547     VPDPBUSDS,
 548     VPDPWSSD,
 549     VPDPWSSDS,
 550
 551     // FMA nodes.
 552     // We use the target independent ISD::FMA for the non-inverted case.
 553     FNMADD,
 554     FMSUB,
 555     FNMSUB,
 556     FMADDSUB,
 557     FMSUBADD,
 558
 559     // FMA with rounding mode.
 560     FMADD_RND,
 561     FNMADD_RND,
 562     FMSUB_RND,
 563     FNMSUB_RND,
 564     FMADDSUB_RND,
 565     FMSUBADD_RND,
 566
 567     // Compress and expand.
 568     COMPRESS,
 569     EXPAND,
 570
 571     // Bits shuffle
 572     VPSHUFBITQMB,
 573
 574     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 575     SINT_TO_FP_RND,
 576     UINT_TO_FP_RND,
 577     SCALAR_SINT_TO_FP,
 578     SCALAR_UINT_TO_FP,
 579     SCALAR_SINT_TO_FP_RND,
 580     SCALAR_UINT_TO_FP_RND,
 581
 582     // Vector float/double to signed/unsigned integer.
 583     CVTP2SI,
 584     CVTP2UI,
 585     CVTP2SI_RND,
 586     CVTP2UI_RND,
 587     // Scalar float/double to signed/unsigned integer.
 588     CVTS2SI,
 589     CVTS2UI,
 590     CVTS2SI_RND,
 591     CVTS2UI_RND,
 592
 593     // Vector float/double to signed/unsigned integer with truncation.
 594     CVTTP2SI,
 595     CVTTP2UI,
 596     CVTTP2SI_SAE,
 597     CVTTP2UI_SAE,
 598     // Scalar float/double to signed/unsigned integer with truncation.
 599     CVTTS2SI,
 600     CVTTS2UI,
 601     CVTTS2SI_SAE,
 602     CVTTS2UI_SAE,
 603
 604     // Vector signed/unsigned integer to float/double.
 605     CVTSI2P,
 606     CVTUI2P,
 607
 608     // Masked versions of above. Used for v2f64->v4f32.
 609     // SRC, PASSTHRU, MASK
 610     MCVTP2SI,
 611     MCVTP2UI,
 612     MCVTTP2SI,
 613     MCVTTP2UI,
 614     MCVTSI2P,
 615     MCVTUI2P,
 616
 617     // Vector float to bfloat16.
 618     // Convert TWO packed single data to one packed BF16 data
 619     CVTNE2PS2BF16,
 620     // Convert packed single data to packed BF16 data
 621     CVTNEPS2BF16,
 622     // Masked version of above.
 623     // SRC, PASSTHRU, MASK
 624     MCVTNEPS2BF16,
 625
 626     // Dot product of BF16 pairs to accumulated into
 627     // packed single precision.
 628     DPBF16PS,
 629
 630     // Save xmm argument registers to the stack, according to %al. An operator
 631     // is needed so that this can be expanded with control flow.
 632     VASTART_SAVE_XMM_REGS,
 633
 634     // Windows's _chkstk call to do stack probing.
 635     WIN_ALLOCA,
 636
 637     // For allocating variable amounts of stack space when using
 638     // segmented stacks. Check if the current stacklet has enough space, and
 639     // falls back to heap allocation if not.
 640     SEG_ALLOCA,
 641
 642     // For allocating stack space when using stack clash protector.
 643     // Allocation is performed by block, and each block is probed.
 644     PROBED_ALLOCA,
 645
 646     // Memory barriers.
 647     MEMBARRIER,
 648     MFENCE,
 649
 650     // Get a random integer and indicate whether it is valid in CF.
 651     RDRAND,
 652
 653     // Get a NIST SP800-90B & C compliant random integer and
 654     // indicate whether it is valid in CF.
 655     RDSEED,
 656
 657     // Protection keys
 658     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
 659     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
 660     // value for ECX.
 661     RDPKRU,
 662     WRPKRU,
 663
 664     // SSE42 string comparisons.
 665     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 666     // will emit one or two instructions based on which results are used. If
 667     // flags and index/mask this allows us to use a single instruction since
 668     // we won't have to pick and opcode for flags. Instead we can rely on the
 669     // DAG to CSE everything and decide at isel.
 670     PCMPISTR,
 671     PCMPESTR,
 672
 673     // Test if in transactional execution.
 674     XTEST,
 675
 676     // ERI instructions.
 677     RSQRT28,
 678     RSQRT28_SAE,
 679     RSQRT28S,
 680     RSQRT28S_SAE,
 681     RCP28,
 682     RCP28_SAE,
 683     RCP28S,
 684     RCP28S_SAE,
 685     EXP2,
 686     EXP2_SAE,
 687
 688     // Conversions between float and half-float.
 689     CVTPS2PH,
 690     CVTPH2PS,
 691     CVTPH2PS_SAE,
 692
 693     // Masked version of above.
 694     // SRC, RND, PASSTHRU, MASK
 695     MCVTPS2PH,
 696
 697     // Galois Field Arithmetic Instructions
 698     GF2P8AFFINEINVQB,
 699     GF2P8AFFINEQB,
 700     GF2P8MULB,
 701
 702     // LWP insert record.
 703     LWPINS,
 704
 705     // User level wait
 706     UMWAIT,
 707     TPAUSE,
 708
 709     // Enqueue Stores Instructions
 710     ENQCMD,
 711     ENQCMDS,
 712
 713     // For avx512-vp2intersect
 714     VP2INTERSECT,
 715
 716     // User level interrupts - testui
 717     TESTUI,
 718
 719     /// X86 strict FP compare instructions.
 720     STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
 721     STRICT_FCMPS,
 722
 723     // Vector packed double/float comparison.
 724     STRICT_CMPP,
 725
 726     /// Vector comparison generating mask bits for fp and
 727     /// integer signed and unsigned data types.
 728     STRICT_CMPM,
 729
 730     // Vector float/double to signed/unsigned integer with truncation.
 731     STRICT_CVTTP2SI,
 732     STRICT_CVTTP2UI,
 733
 734     // Vector FP extend.
 735     STRICT_VFPEXT,
 736
 737     // Vector FP round.
 738     STRICT_VFPROUND,
 739
 740     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 741     // Also used by the legacy (V)ROUND intrinsics where we mask out the
 742     // scaling part of the immediate.
 743     STRICT_VRNDSCALE,
 744
 745     // Vector signed/unsigned integer to float/double.
 746     STRICT_CVTSI2P,
 747     STRICT_CVTUI2P,
 748
 749     // Strict FMA nodes.
 750     STRICT_FNMADD,
 751     STRICT_FMSUB,
 752     STRICT_FNMSUB,
 753
 754     // Conversions between float and half-float.
 755     STRICT_CVTPS2PH,
 756     STRICT_CVTPH2PS,
 757
 758     // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and
 759     // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
 760
 761     // Compare and swap.
 762     LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 763     LCMPXCHG8_DAG,
 764     LCMPXCHG16_DAG,
 765     LCMPXCHG16_SAVE_RBX_DAG,
 766
 767     /// LOCK-prefixed arithmetic read-modify-write instructions.
 768     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 769     LADD,
 770     LSUB,
 771     LOR,
 772     LXOR,
 773     LAND,
 774
 775     // Load, scalar_to_vector, and zero extend.
 776     VZEXT_LOAD,
 777
 778     // extract_vector_elt, store.
 779     VEXTRACT_STORE,
 780
 781     // scalar broadcast from memory.
 782     VBROADCAST_LOAD,
 783
 784     // subvector broadcast from memory.
 785     SUBV_BROADCAST_LOAD,
 786
 787     // Store FP control word into i16 memory.
 788     FNSTCW16m,
 789
 790     // Load FP control word from i16 memory.
 791     FLDCW16m,
 792
 793     /// This instruction implements FP_TO_SINT with the
 794     /// integer destination in memory and a FP reg source.  This corresponds
 795     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 796     /// has two inputs (token chain and address) and two outputs (int value
 797     /// and token chain). Memory VT specifies the type to store to.
 798     FP_TO_INT_IN_MEM,
 799
 800     /// This instruction implements SINT_TO_FP with the
 801     /// integer source in memory and FP reg result.  This corresponds to the
 802     /// X86::FILD*m instructions. It has two inputs (token chain and address)
 803     /// and two outputs (FP value and token chain). The integer source type is
 804     /// specified by the memory VT.
 805     FILD,
 806
 807     /// This instruction implements a fp->int store from FP stack
 808     /// slots. This corresponds to the fist instruction. It takes a
 809     /// chain operand, value to store, address, and glue. The memory VT
 810     /// specifies the type to store as.
 811     FIST,
 812
 813     /// This instruction implements an extending load to FP stack slots.
 814     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 815     /// operand, and ptr to load from. The memory VT specifies the type to
 816     /// load from.
 817     FLD,
 818
 819     /// This instruction implements a truncating store from FP stack
 820     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 821     /// chain operand, value to store, address, and glue. The memory VT
 822     /// specifies the type to store as.
 823     FST,
 824
 825     /// These instructions grab the address of the next argument
 826     /// from a va_list. (reads and modifies the va_list in memory)
 827     VAARG_64,
 828     VAARG_X32,
 829
 830     // Vector truncating store with unsigned/signed saturation
 831     VTRUNCSTOREUS,
 832     VTRUNCSTORES,
 833     // Vector truncating masked store with unsigned/signed saturation
 834     VMTRUNCSTOREUS,
 835     VMTRUNCSTORES,
 836
 837     // X86 specific gather and scatter
 838     MGATHER,
 839     MSCATTER,
 840
 841     // Key locker nodes that produce flags.
 842     AESENC128KL,
 843     AESDEC128KL,
 844     AESENC256KL,
 845     AESDEC256KL,
 846     AESENCWIDE128KL,
 847     AESDECWIDE128KL,
 848     AESENCWIDE256KL,
 849     AESDECWIDE256KL,
 850
 851     // WARNING: Do not add anything in the end unless you want the node to
 852     // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 853     // opcodes will be thought as target memory ops!
 854   };
 855   } // end namespace X86ISD
 856
 857   namespace X86 {
 858     /// Current rounding mode is represented in bits 11:10 of FPSR. These
 859     /// values are same as corresponding constants for rounding mode used
 860     /// in glibc.
 861     enum RoundingMode {
 862       rmToNearest   = 0,        // FE_TONEAREST
 863       rmDownward    = 1 << 10,  // FE_DOWNWARD
 864       rmUpward      = 2 << 10,  // FE_UPWARD
 865       rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
 866       rmMask        = 3 << 10   // Bit mask selecting rounding mode
 867     };
 868   }
 869
 870   /// Define some predicates that are used for node matching.
 871   namespace X86 {
 872     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 873     bool isZeroNode(SDValue Elt);
 874
 875     /// Returns true of the given offset can be
 876     /// fit into displacement field of the instruction.
 877     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 878                                       bool hasSymbolicDisplacement);
 879
 880     /// Determines whether the callee is required to pop its
 881     /// own arguments. Callee pop is necessary to support tail calls.
 882     bool isCalleePop(CallingConv::ID CallingConv,
 883                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 884
 885     /// If Op is a constant whose elements are all the same constant or
 886     /// undefined, return true and return the constant value in \p SplatVal.
 887     /// If we have undef bits that don't cover an entire element, we treat these
 888     /// as zero if AllowPartialUndefs is set, else we fail and return false.
 889     bool isConstantSplat(SDValue Op, APInt &SplatVal,
 890                          bool AllowPartialUndefs = true);
 891   } // end namespace X86
 892
 893   //===--------------------------------------------------------------------===//
 894   //  X86 Implementation of the TargetLowering interface
 895   class X86TargetLowering final : public TargetLowering {
 896   public:
 897     explicit X86TargetLowering(const X86TargetMachine &TM,
 898                                const X86Subtarget &STI);
 899
 900     unsigned getJumpTableEncoding() const override;
 901     bool useSoftFloat() const override;
 902
 903     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 904                                ArgListTy &Args) const override;
 905
 906     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 907       return MVT::i8;
 908     }
 909
 910     const MCExpr *
 911     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 912                               const MachineBasicBlock *MBB, unsigned uid,
 913                               MCContext &Ctx) const override;
 914
 915     /// Returns relocation base for the given PIC jumptable.
 916     SDValue getPICJumpTableRelocBase(SDValue Table,
 917                                      SelectionDAG &DAG) const override;
 918     const MCExpr *
 919     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 920                                  unsigned JTI, MCContext &Ctx) const override;
 921
 922     /// Return the desired alignment for ByVal aggregate
 923     /// function arguments in the caller parameter area. For X86, aggregates
 924     /// that contains are placed at 16-byte boundaries while the rest are at
 925     /// 4-byte boundaries.
 926     unsigned getByValTypeAlignment(Type *Ty,
 927                                    const DataLayout &DL) const override;
 928
 929     EVT getOptimalMemOpType(const MemOp &Op,
 930                             const AttributeList &FuncAttributes) const override;
 931
 932     /// Returns true if it's safe to use load / store of the
 933     /// specified type to expand memcpy / memset inline. This is mostly true
 934     /// for all types except for some special cases. For example, on X86
 935     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 936     /// also does type conversion. Note the specified type doesn't have to be
 937     /// legal as the hook is used before type legalization.
 938     bool isSafeMemOpType(MVT VT) const override;
 939
 940     /// Returns true if the target allows unaligned memory accesses of the
 941     /// specified type. Returns whether it is "fast" in the last argument.
 942     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
 943                                         MachineMemOperand::Flags Flags,
 944                                         bool *Fast) const override;
 945
 946     /// Provide custom lowering hooks for some operations.
 947     ///
 948     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 949
 950     /// Replace the results of node with an illegal result
 951     /// type with new values built out of custom code.
 952     ///
 953     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 954                             SelectionDAG &DAG) const override;
 955
 956     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 957
 958     /// Return true if the target has native support for
 959     /// the specified value type and it is 'desirable' to use the type for the
 960     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 961     /// instruction encodings are longer and some i16 instructions are slow.
 962     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 963
 964     /// Return true if the target has native support for the
 965     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 966     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 967     /// and some i16 instructions are slow.
 968     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 969
 970     /// Return the newly negated expression if the cost is not expensive and
 971     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
 972     /// do the negation.
 973     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
 974                                  bool LegalOperations, bool ForCodeSize,
 975                                  NegatibleCost &Cost,
 976                                  unsigned Depth) const override;
 977
 978     MachineBasicBlock *
 979     EmitInstrWithCustomInserter(MachineInstr &MI,
 980                                 MachineBasicBlock *MBB) const override;
 981
 982     /// This method returns the name of a target specific DAG node.
 983     const char *getTargetNodeName(unsigned Opcode) const override;
 984
 985     /// Do not merge vector stores after legalization because that may conflict
 986     /// with x86-specific store splitting optimizations.
 987     bool mergeStoresAfterLegalization(EVT MemVT) const override {
 988       return !MemVT.isVector();
 989     }
 990
 991     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 992                           const SelectionDAG &DAG) const override;
 993
 994     bool isCheapToSpeculateCttz() const override;
 995
 996     bool isCheapToSpeculateCtlz() const override;
 997
 998     bool isCtlzFast() const override;
 999
1000     bool hasBitPreservingFPLogic(EVT VT) const override {
1001       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
1002     }
1003
1004     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1005       // If the pair to store is a mixture of float and int values, we will
1006       // save two bitwise instructions and one float-to-int instruction and
1007       // increase one store instruction. There is potentially a more
1008       // significant benefit because it avoids the float->int domain switch
1009       // for input value. So It is more likely a win.
1010       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1011           (LTy.isInteger() && HTy.isFloatingPoint()))
1012         return true;
1013       // If the pair only contains int values, we will save two bitwise
1014       // instructions and increase one store instruction (costing one more
1015       // store buffer). Since the benefit is more blurred so we leave
1016       // such pair out until we get testcase to prove it is a win.
1017       return false;
1018     }
1019
1020     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1021
1022     bool hasAndNotCompare(SDValue Y) const override;
1023
1024     bool hasAndNot(SDValue Y) const override;
1025
1026     bool hasBitTest(SDValue X, SDValue Y) const override;
1027
1028     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1029         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1030         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1031         SelectionDAG &DAG) const override;
1032
1033     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1034                                            CombineLevel Level) const override;
1035
1036     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1037
1038     bool
1039     shouldTransformSignedTruncationCheck(EVT XVT,
1040                                          unsigned KeptBits) const override {
1041       // For vectors, we don't have a preference..
1042       if (XVT.isVector())
1043         return false;
1044
1045       auto VTIsOk = [](EVT VT) -> bool {
1046         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1047                VT == MVT::i64;
1048       };
1049
1050       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1051       // XVT will be larger than KeptBitsVT.
1052       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1053       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1054     }
1055
1056     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
1057
1058     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1059
1060     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1061       return VT.isScalarInteger();
1062     }
1063
1064     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1065     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1066
1067     /// Return the value type to use for ISD::SETCC.
1068     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1069                            EVT VT) const override;
1070
1071     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1072                                       const APInt &DemandedElts,
1073                                       TargetLoweringOpt &TLO) const override;
1074
1075     /// Determine which of the bits specified in Mask are known to be either
1076     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1077     void computeKnownBitsForTargetNode(const SDValue Op,
1078                                        KnownBits &Known,
1079                                        const APInt &DemandedElts,
1080                                        const SelectionDAG &DAG,
1081                                        unsigned Depth = 0) const override;
1082
1083     /// Determine the number of bits in the operation that are sign bits.
1084     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1085                                              const APInt &DemandedElts,
1086                                              const SelectionDAG &DAG,
1087                                              unsigned Depth) const override;
1088
1089     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1090                                                  const APInt &DemandedElts,
1091                                                  APInt &KnownUndef,
1092                                                  APInt &KnownZero,
1093                                                  TargetLoweringOpt &TLO,
1094                                                  unsigned Depth) const override;
1095
1096     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1097                                                     const APInt &DemandedElts,
1098                                                     unsigned MaskIndex,
1099                                                     TargetLoweringOpt &TLO,
1100                                                     unsigned Depth) const;
1101
1102     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1103                                            const APInt &DemandedBits,
1104                                            const APInt &DemandedElts,
1105                                            KnownBits &Known,
1106                                            TargetLoweringOpt &TLO,
1107                                            unsigned Depth) const override;
1108
1109     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1110         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1111         SelectionDAG &DAG, unsigned Depth) const override;
1112
1113     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1114
1115     SDValue unwrapAddress(SDValue N) const override;
1116
1117     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1118
1119     bool ExpandInlineAsm(CallInst *CI) const override;
1120
1121     ConstraintType getConstraintType(StringRef Constraint) const override;
1122
1123     /// Examine constraint string and operand type and determine a weight value.
1124     /// The operand object must already have been set up with the operand type.
1125     ConstraintWeight
1126       getSingleConstraintMatchWeight(AsmOperandInfo &info,
1127                                      const char *constraint) const override;
1128
1129     const char *LowerXConstraint(EVT ConstraintVT) const override;
1130
1131     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1132     /// add anything to Ops. If hasMemory is true it means one of the asm
1133     /// constraint of the inline asm instruction being processed is 'm'.
1134     void LowerAsmOperandForConstraint(SDValue Op,
1135                                       std::string &Constraint,
1136                                       std::vector<SDValue> &Ops,
1137                                       SelectionDAG &DAG) const override;
1138
1139     unsigned
1140     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1141       if (ConstraintCode == "v")
1142         return InlineAsm::Constraint_v;
1143       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1144     }
1145
1146     /// Handle Lowering flag assembly outputs.
1147     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1148                                         const SDLoc &DL,
1149                                         const AsmOperandInfo &Constraint,
1150                                         SelectionDAG &DAG) const override;
1151
1152     /// Given a physical register constraint
1153     /// (e.g. {edx}), return the register number and the register class for the
1154     /// register.  This should only be used for C_Register constraints.  On
1155     /// error, this returns a register number of 0.
1156     std::pair<unsigned, const TargetRegisterClass *>
1157     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1158                                  StringRef Constraint, MVT VT) const override;
1159
1160     /// Return true if the addressing mode represented
1161     /// by AM is legal for this target, for a load/store of the specified type.
1162     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1163                                Type *Ty, unsigned AS,
1164                                Instruction *I = nullptr) const override;
1165
1166     /// Return true if the specified immediate is legal
1167     /// icmp immediate, that is the target has icmp instructions which can
1168     /// compare a register against the immediate without having to materialize
1169     /// the immediate into a register.
1170     bool isLegalICmpImmediate(int64_t Imm) const override;
1171
1172     /// Return true if the specified immediate is legal
1173     /// add immediate, that is the target has add instructions which can
1174     /// add a register and the immediate without having to materialize
1175     /// the immediate into a register.
1176     bool isLegalAddImmediate(int64_t Imm) const override;
1177
1178     bool isLegalStoreImmediate(int64_t Imm) const override;
1179
1180     /// Return the cost of the scaling factor used in the addressing
1181     /// mode represented by AM for this target, for a load/store
1182     /// of the specified type.
1183     /// If the AM is supported, the return value must be >= 0.
1184     /// If the AM is not supported, it returns a negative value.
1185     InstructionCost getScalingFactorCost(const DataLayout &DL,
1186                                          const AddrMode &AM, Type *Ty,
1187                                          unsigned AS) const override;
1188
1189     /// This is used to enable splatted operand transforms for vector shifts
1190     /// and vector funnel shifts.
1191     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1192
1193     /// Add x86-specific opcodes to the default list.
1194     bool isBinOp(unsigned Opcode) const override;
1195
1196     /// Returns true if the opcode is a commutative binary operation.
1197     bool isCommutativeBinOp(unsigned Opcode) const override;
1198
1199     /// Return true if it's free to truncate a value of
1200     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1201     /// register EAX to i16 by referencing its sub-register AX.
1202     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1203     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1204
1205     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1206
1207     /// Return true if any actual instruction that defines a
1208     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1209     /// register. This does not necessarily include registers defined in
1210     /// unknown ways, such as incoming arguments, or copies from unknown
1211     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1212     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1213     /// all instructions that define 32-bit values implicit zero-extend the
1214     /// result out to 64 bits.
1215     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1216     bool isZExtFree(EVT VT1, EVT VT2) const override;
1217     bool isZExtFree(SDValue Val, EVT VT2) const override;
1218
1219     bool shouldSinkOperands(Instruction *I,
1220                             SmallVectorImpl<Use *> &Ops) const override;
1221     bool shouldConvertPhiType(Type *From, Type *To) const override;
1222
1223     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1224     /// extend node) is profitable.
1225     bool isVectorLoadExtDesirable(SDValue) const override;
1226
1227     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1228     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1229     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1230     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1231                                     EVT VT) const override;
1232
1233     /// Return true if it's profitable to narrow
1234     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1235     /// from i32 to i8 but not from i32 to i16.
1236     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1237
1238     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1239     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1240     /// true and stores the intrinsic information into the IntrinsicInfo that was
1241     /// passed to the function.
1242     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1243                             MachineFunction &MF,
1244                             unsigned Intrinsic) const override;
1245
1246     /// Returns true if the target can instruction select the
1247     /// specified FP immediate natively. If false, the legalizer will
1248     /// materialize the FP immediate as a load from a constant pool.
1249     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1250                       bool ForCodeSize) const override;
1251
1252     /// Targets can use this to indicate that they only support *some*
1253     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1254     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1255     /// be legal.
1256     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1257
1258     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1259     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1260     /// constant pool entry.
1261     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1262
1263     /// Returns true if lowering to a jump table is allowed.
1264     bool areJTsAllowed(const Function *Fn) const override;
1265
1266     /// If true, then instruction selection should
1267     /// seek to shrink the FP constant of the specified type to a smaller type
1268     /// in order to save space and / or reduce runtime.
1269     bool ShouldShrinkFPConstant(EVT VT) const override {
1270       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1271       // expensive than a straight movsd. On the other hand, it's important to
1272       // shrink long double fp constant since fldt is very slow.
1273       return !X86ScalarSSEf64 || VT == MVT::f80;
1274     }
1275
1276     /// Return true if we believe it is correct and profitable to reduce the
1277     /// load node to a smaller type.
1278     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1279                                EVT NewVT) const override;
1280
1281     /// Return true if the specified scalar FP type is computed in an SSE
1282     /// register, not on the X87 floating point stack.
1283     bool isScalarFPTypeInSSEReg(EVT VT) const {
1284       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1285              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1286     }
1287
1288     /// Returns true if it is beneficial to convert a load of a constant
1289     /// to just the constant itself.
1290     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1291                                            Type *Ty) const override;
1292
1293     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1294
1295     bool convertSelectOfConstantsToMath(EVT VT) const override;
1296
1297     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1298                                 SDValue C) const override;
1299
1300     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1301     /// with this index.
1302     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1303                                  unsigned Index) const override;
1304
1305     /// Scalar ops always have equal or better analysis/performance/power than
1306     /// the vector equivalent, so this always makes sense if the scalar op is
1307     /// supported.
1308     bool shouldScalarizeBinop(SDValue) const override;
1309
1310     /// Extract of a scalar FP value from index 0 of a vector is free.
1311     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1312       EVT EltVT = VT.getScalarType();
1313       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1314     }
1315
1316     /// Overflow nodes should get combined/lowered to optimal instructions
1317     /// (they should allow eliminating explicit compares by getting flags from
1318     /// math ops).
1319     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1320                               bool MathUsed) const override;
1321
1322     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1323                                       unsigned AddrSpace) const override {
1324       // If we can replace more than 2 scalar stores, there will be a reduction
1325       // in instructions even after we add a vector constant load.
1326       return NumElem > 2;
1327     }
1328
1329     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1330                                  const SelectionDAG &DAG,
1331                                  const MachineMemOperand &MMO) const override;
1332
1333     /// Intel processors have a unified instruction and data cache
1334     const char * getClearCacheBuiltinName() const override {
1335       return nullptr; // nothing to do, move along.
1336     }
1337
1338     Register getRegisterByName(const char* RegName, LLT VT,
1339                                const MachineFunction &MF) const override;
1340
1341     /// If a physical register, this returns the register that receives the
1342     /// exception address on entry to an EH pad.
1343     Register
1344     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1345
1346     /// If a physical register, this returns the register that receives the
1347     /// exception typeid on entry to a landing pad.
1348     Register
1349     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1350
1351     virtual bool needsFixedCatchObjects() const override;
1352
1353     /// This method returns a target specific FastISel object,
1354     /// or null if the target does not support "fast" ISel.
1355     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1356                              const TargetLibraryInfo *libInfo) const override;
1357
1358     /// If the target has a standard location for the stack protector cookie,
1359     /// returns the address of that location. Otherwise, returns nullptr.
1360     Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1361
1362     bool useLoadStackGuardNode() const override;
1363     bool useStackGuardXorFP() const override;
1364     void insertSSPDeclarations(Module &M) const override;
1365     Value *getSDagStackGuard(const Module &M) const override;
1366     Function *getSSPStackGuardCheck(const Module &M) const override;
1367     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1368                                 const SDLoc &DL) const override;
1369
1370
1371     /// Return true if the target stores SafeStack pointer at a fixed offset in
1372     /// some non-standard address space, and populates the address space and
1373     /// offset as appropriate.
1374     Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1375
1376     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1377                                           SDValue Chain, SDValue Pointer,
1378                                           MachinePointerInfo PtrInfo,
1379                                           Align Alignment,
1380                                           SelectionDAG &DAG) const;
1381
1382     /// Customize the preferred legalization strategy for certain types.
1383     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1384
1385     bool softPromoteHalfType() const override { return true; }
1386
1387     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1388                                       EVT VT) const override;
1389
1390     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1391                                            CallingConv::ID CC,
1392                                            EVT VT) const override;
1393
1394     unsigned getVectorTypeBreakdownForCallingConv(
1395         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1396         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1397
1398     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1399
1400     bool supportSwiftError() const override;
1401
1402     bool hasStackProbeSymbol(MachineFunction &MF) const override;
1403     bool hasInlineStackProbe(MachineFunction &MF) const override;
1404     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1405
1406     unsigned getStackProbeSize(MachineFunction &MF) const;
1407
1408     bool hasVectorBlend() const override { return true; }
1409
1410     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1411
1412     /// Lower interleaved load(s) into target specific
1413     /// instructions/intrinsics.
1414     bool lowerInterleavedLoad(LoadInst *LI,
1415                               ArrayRef<ShuffleVectorInst *> Shuffles,
1416                               ArrayRef<unsigned> Indices,
1417                               unsigned Factor) const override;
1418
1419     /// Lower interleaved store(s) into target specific
1420     /// instructions/intrinsics.
1421     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1422                                unsigned Factor) const override;
1423
1424     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1425                                    SDValue Addr, SelectionDAG &DAG)
1426                                    const override;
1427
1428     Align getPrefLoopAlignment(MachineLoop *ML) const override;
1429
1430   protected:
1431     std::pair<const TargetRegisterClass *, uint8_t>
1432     findRepresentativeClass(const TargetRegisterInfo *TRI,
1433                             MVT VT) const override;
1434
1435   private:
1436     /// Keep a reference to the X86Subtarget around so that we can
1437     /// make the right decision when generating code for different targets.
1438     const X86Subtarget &Subtarget;
1439
1440     /// Select between SSE or x87 floating point ops.
1441     /// When SSE is available, use it for f32 operations.
1442     /// When SSE2 is available, use it for f64 operations.
1443     bool X86ScalarSSEf32;
1444     bool X86ScalarSSEf64;
1445
1446     /// A list of legal FP immediates.
1447     std::vector<APFloat> LegalFPImmediates;
1448
1449     /// Indicate that this x86 target can instruction
1450     /// select the specified FP immediate natively.
1451     void addLegalFPImmediate(const APFloat& Imm) {
1452       LegalFPImmediates.push_back(Imm);
1453     }
1454
1455     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1456                             CallingConv::ID CallConv, bool isVarArg,
1457                             const SmallVectorImpl<ISD::InputArg> &Ins,
1458                             const SDLoc &dl, SelectionDAG &DAG,
1459                             SmallVectorImpl<SDValue> &InVals,
1460                             uint32_t *RegMask) const;
1461     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1462                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1463                              const SDLoc &dl, SelectionDAG &DAG,
1464                              const CCValAssign &VA, MachineFrameInfo &MFI,
1465                              unsigned i) const;
1466     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1467                              const SDLoc &dl, SelectionDAG &DAG,
1468                              const CCValAssign &VA,
1469                              ISD::ArgFlagsTy Flags, bool isByval) const;
1470
1471     // Call lowering helpers.
1472
1473     /// Check whether the call is eligible for tail call optimization. Targets
1474     /// that want to do tail call optimization should implement this function.
1475     bool IsEligibleForTailCallOptimization(SDValue Callee,
1476                                            CallingConv::ID CalleeCC,
1477                                            bool isVarArg,
1478                                            bool isCalleeStructRet,
1479                                            bool isCallerStructRet,
1480                                            Type *RetTy,
1481                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1482                                     const SmallVectorImpl<SDValue> &OutVals,
1483                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1484                                            SelectionDAG& DAG) const;
1485     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1486                                     SDValue Chain, bool IsTailCall,
1487                                     bool Is64Bit, int FPDiff,
1488                                     const SDLoc &dl) const;
1489
1490     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1491                                          SelectionDAG &DAG) const;
1492
1493     unsigned getAddressSpace(void) const;
1494
1495     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1496                             SDValue &Chain) const;
1497     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1498
1499     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1500     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1501     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1502     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1503
1504     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1505                                   const unsigned char OpFlags = 0) const;
1506     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1507     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1508     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1509     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1510     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1511
1512     /// Creates target global address or external symbol nodes for calls or
1513     /// other uses.
1514     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1515                                   bool ForCall) const;
1516
1517     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1518     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1519     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1520     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1521     SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1522     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1523     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1524     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1525     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1526     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1527     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1528     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1529     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1530     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1531     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1532     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1533     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1534     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1535     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1536     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1537     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1538     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1539     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1540     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1541     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1542     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1543     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1544     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1545     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1546     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1547     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1548
1549     SDValue
1550     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1551                          const SmallVectorImpl<ISD::InputArg> &Ins,
1552                          const SDLoc &dl, SelectionDAG &DAG,
1553                          SmallVectorImpl<SDValue> &InVals) const override;
1554     SDValue LowerCall(CallLoweringInfo &CLI,
1555                       SmallVectorImpl<SDValue> &InVals) const override;
1556
1557     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1558                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1559                         const SmallVectorImpl<SDValue> &OutVals,
1560                         const SDLoc &dl, SelectionDAG &DAG) const override;
1561
1562     bool supportSplitCSR(MachineFunction *MF) const override {
1563       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1564           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1565     }
1566     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1567     void insertCopiesSplitCSR(
1568       MachineBasicBlock *Entry,
1569       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1570
1571     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1572
1573     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1574
1575     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1576                             ISD::NodeType ExtendKind) const override;
1577
1578     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1579                         bool isVarArg,
1580                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1581                         LLVMContext &Context) const override;
1582
1583     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1584
1585     TargetLoweringBase::AtomicExpansionKind
1586     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1587     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1588     TargetLoweringBase::AtomicExpansionKind
1589     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1590
1591     LoadInst *
1592     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1593
1594     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1595     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1596
1597     bool needsCmpXchgNb(Type *MemType) const;
1598
1599     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1600                                 MachineBasicBlock *DispatchBB, int FI) const;
1601
1602     // Utility function to emit the low-level va_arg code for X86-64.
1603     MachineBasicBlock *
1604     EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1605
1606     /// Utility function to emit the xmm reg save portion of va_start.
1607     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1608                                                  MachineInstr &MI2,
1609                                                  MachineBasicBlock *BB) const;
1610
1611     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1612                                          MachineBasicBlock *BB) const;
1613
1614     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1615                                            MachineBasicBlock *BB) const;
1616
1617     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1618                                             MachineBasicBlock *BB) const;
1619
1620     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1621                                                MachineBasicBlock *BB) const;
1622
1623     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1624                                           MachineBasicBlock *BB) const;
1625
1626     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1627                                           MachineBasicBlock *BB) const;
1628
1629     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1630                                                 MachineBasicBlock *BB) const;
1631
1632     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1633                                         MachineBasicBlock *MBB) const;
1634
1635     void emitSetJmpShadowStackFix(MachineInstr &MI,
1636                                   MachineBasicBlock *MBB) const;
1637
1638     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1639                                          MachineBasicBlock *MBB) const;
1640
1641     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1642                                                  MachineBasicBlock *MBB) const;
1643
1644     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1645                                              MachineBasicBlock *MBB) const;
1646
1647     /// Emit flags for the given setcc condition and operands. Also returns the
1648     /// corresponding X86 condition code constant in X86CC.
1649     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1650                               const SDLoc &dl, SelectionDAG &DAG,
1651                               SDValue &X86CC) const;
1652
1653     /// Check if replacement of SQRT with RSQRT should be disabled.
1654     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1655
1656     /// Use rsqrt* to speed up sqrt calculations.
1657     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1658                             int &RefinementSteps, bool &UseOneConstNR,
1659                             bool Reciprocal) const override;
1660
1661     /// Use rcp* to speed up fdiv calculations.
1662     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1663                              int &RefinementSteps) const override;
1664
1665     /// Reassociate floating point divisions into multiply by reciprocal.
1666     unsigned combineRepeatedFPDivisors() const override;
1667
1668     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1669                           SmallVectorImpl<SDNode *> &Created) const override;
1670   };
1671
1672   namespace X86 {
1673     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1674                              const TargetLibraryInfo *libInfo);
1675   } // end namespace X86
1676
1677   // X86 specific Gather/Scatter nodes.
1678   // The class has the same order of operands as MaskedGatherScatterSDNode for
1679   // convenience.
1680   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1681   public:
1682     // This is a intended as a utility and should never be directly created.
1683     X86MaskedGatherScatterSDNode() = delete;
1684     ~X86MaskedGatherScatterSDNode() = delete;
1685
1686     const SDValue &getBasePtr() const { return getOperand(3); }
1687     const SDValue &getIndex()   const { return getOperand(4); }
1688     const SDValue &getMask()    const { return getOperand(2); }
1689     const SDValue &getScale()   const { return getOperand(5); }
1690
1691     static bool classof(const SDNode *N) {
1692       return N->getOpcode() == X86ISD::MGATHER ||
1693              N->getOpcode() == X86ISD::MSCATTER;
1694     }
1695   };
1696
1697   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1698   public:
1699     const SDValue &getPassThru() const { return getOperand(1); }
1700
1701     static bool classof(const SDNode *N) {
1702       return N->getOpcode() == X86ISD::MGATHER;
1703     }
1704   };
1705
1706   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1707   public:
1708     const SDValue &getValue() const { return getOperand(1); }
1709
1710     static bool classof(const SDNode *N) {
1711       return N->getOpcode() == X86ISD::MSCATTER;
1712     }
1713   };
1714
1715   /// Generate unpacklo/unpackhi shuffle mask.
1716   void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1717                                bool Unary);
1718
1719   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1720   /// imposed by AVX and specific to the unary pattern. Example:
1721   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1722   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1723   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1724
1725 } // end namespace llvm
1726
1727 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H