contrib/llvm/lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the interfaces that X86 uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  17
  18 #include "llvm/CodeGen/CallingConvLower.h"
  19 #include "llvm/CodeGen/SelectionDAG.h"
  20 #include "llvm/CodeGen/TargetLowering.h"
  21 #include "llvm/Target/TargetOptions.h"
  22
  23 namespace llvm {
  24   class X86Subtarget;
  25   class X86TargetMachine;
  26
  27   namespace X86ISD {
  28     // X86 Specific DAG Nodes
  29     enum NodeType : unsigned {
  30       // Start the numbering where the builtin ops leave off.
  31       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  32
  33       /// Bit scan forward.
  34       BSF,
  35       /// Bit scan reverse.
  36       BSR,
  37
  38       /// Double shift instructions. These correspond to
  39       /// X86::SHLDxx and X86::SHRDxx instructions.
  40       SHLD,
  41       SHRD,
  42
  43       /// Bitwise logical AND of floating point values. This corresponds
  44       /// to X86::ANDPS or X86::ANDPD.
  45       FAND,
  46
  47       /// Bitwise logical OR of floating point values. This corresponds
  48       /// to X86::ORPS or X86::ORPD.
  49       FOR,
  50
  51       /// Bitwise logical XOR of floating point values. This corresponds
  52       /// to X86::XORPS or X86::XORPD.
  53       FXOR,
  54
  55       ///  Bitwise logical ANDNOT of floating point values. This
  56       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  57       FANDN,
  58
  59       /// These operations represent an abstract X86 call
  60       /// instruction, which includes a bunch of information.  In particular the
  61       /// operands of these node are:
  62       ///
  63       ///     #0 - The incoming token chain
  64       ///     #1 - The callee
  65       ///     #2 - The number of arg bytes the caller pushes on the stack.
  66       ///     #3 - The number of arg bytes the callee pops off the stack.
  67       ///     #4 - The value to pass in AL/AX/EAX (optional)
  68       ///     #5 - The value to pass in DL/DX/EDX (optional)
  69       ///
  70       /// The result values of these nodes are:
  71       ///
  72       ///     #0 - The outgoing token chain
  73       ///     #1 - The first register result value (optional)
  74       ///     #2 - The second register result value (optional)
  75       ///
  76       CALL,
  77
  78       /// Same as call except it adds the NoTrack prefix.
  79       NT_CALL,
  80
  81       /// This operation implements the lowering for readcyclecounter.
  82       RDTSC_DAG,
  83
  84       /// X86 Read Time-Stamp Counter and Processor ID.
  85       RDTSCP_DAG,
  86
  87       /// X86 Read Performance Monitoring Counters.
  88       RDPMC_DAG,
  89
  90       /// X86 compare and logical compare instructions.
  91       CMP, COMI, UCOMI,
  92
  93       /// X86 bit-test instructions.
  94       BT,
  95
  96       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  97       /// operand, usually produced by a CMP instruction.
  98       SETCC,
  99
 100       /// X86 Select
 101       SELECTS,
 102
 103       // Same as SETCC except it's materialized with a sbb and the value is all
 104       // one's or all zero's.
 105       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
 106
 107       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
 108       /// Operands are two FP values to compare; result is a mask of
 109       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
 110       FSETCC,
 111
 112       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 113       /// with optional rounding mode.
 114       FSETCCM, FSETCCM_RND,
 115
 116       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 117       /// to select from. Operand 2 is the condition code, and operand 3 is the
 118       /// flag operand produced by a CMP or TEST instruction. It also writes a
 119       /// flag result.
 120       CMOV,
 121
 122       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 123       /// is the block to branch if condition is true, operand 2 is the
 124       /// condition code, and operand 3 is the flag operand produced by a CMP
 125       /// or TEST instruction.
 126       BRCOND,
 127
 128       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 129       /// operand 1 is the target address.
 130       NT_BRIND,
 131
 132       /// Return with a flag operand. Operand 0 is the chain operand, operand
 133       /// 1 is the number of bytes of stack to pop.
 134       RET_FLAG,
 135
 136       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 137       IRET,
 138
 139       /// Repeat fill, corresponds to X86::REP_STOSx.
 140       REP_STOS,
 141
 142       /// Repeat move, corresponds to X86::REP_MOVSx.
 143       REP_MOVS,
 144
 145       /// On Darwin, this node represents the result of the popl
 146       /// at function entry, used for PIC code.
 147       GlobalBaseReg,
 148
 149       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 150       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 151       /// MCSymbol and TargetBlockAddress.
 152       Wrapper,
 153
 154       /// Special wrapper used under X86-64 PIC mode for RIP
 155       /// relative displacements.
 156       WrapperRIP,
 157
 158       /// Copies a 64-bit value from the low word of an XMM vector
 159       /// to an MMX vector.
 160       MOVDQ2Q,
 161
 162       /// Copies a 32-bit value from the low word of a MMX
 163       /// vector to a GPR.
 164       MMX_MOVD2W,
 165
 166       /// Copies a GPR into the low 32-bit word of a MMX vector
 167       /// and zero out the high word.
 168       MMX_MOVW2D,
 169
 170       /// Extract an 8-bit value from a vector and zero extend it to
 171       /// i32, corresponds to X86::PEXTRB.
 172       PEXTRB,
 173
 174       /// Extract a 16-bit value from a vector and zero extend it to
 175       /// i32, corresponds to X86::PEXTRW.
 176       PEXTRW,
 177
 178       /// Insert any element of a 4 x float vector into any element
 179       /// of a destination 4 x floatvector.
 180       INSERTPS,
 181
 182       /// Insert the lower 8-bits of a 32-bit value to a vector,
 183       /// corresponds to X86::PINSRB.
 184       PINSRB,
 185
 186       /// Insert the lower 16-bits of a 32-bit value to a vector,
 187       /// corresponds to X86::PINSRW.
 188       PINSRW,
 189
 190       /// Shuffle 16 8-bit values within a vector.
 191       PSHUFB,
 192
 193       /// Compute Sum of Absolute Differences.
 194       PSADBW,
 195       /// Compute Double Block Packed Sum-Absolute-Differences
 196       DBPSADBW,
 197
 198       /// Bitwise Logical AND NOT of Packed FP values.
 199       ANDNP,
 200
 201       /// Blend where the selector is an immediate.
 202       BLENDI,
 203
 204       /// Dynamic (non-constant condition) vector blend where only the sign bits
 205       /// of the condition elements are used. This is used to enforce that the
 206       /// condition mask is not valid for generic VSELECT optimizations. This
 207       /// can also be used to implement the intrinsics.
 208       BLENDV,
 209
 210       /// Combined add and sub on an FP vector.
 211       ADDSUB,
 212
 213       //  FP vector ops with rounding mode.
 214       FADD_RND, FADDS_RND,
 215       FSUB_RND, FSUBS_RND,
 216       FMUL_RND, FMULS_RND,
 217       FDIV_RND, FDIVS_RND,
 218       FMAX_RND, FMAXS_RND,
 219       FMIN_RND, FMINS_RND,
 220       FSQRT_RND, FSQRTS_RND,
 221
 222       // FP vector get exponent.
 223       FGETEXP_RND, FGETEXPS_RND,
 224       // Extract Normalized Mantissas.
 225       VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
 226       // FP Scale.
 227       SCALEF,
 228       SCALEFS,
 229
 230       // Unsigned Integer average.
 231       AVG,
 232
 233       /// Integer horizontal add/sub.
 234       HADD,
 235       HSUB,
 236
 237       /// Floating point horizontal add/sub.
 238       FHADD,
 239       FHSUB,
 240
 241       // Detect Conflicts Within a Vector
 242       CONFLICT,
 243
 244       /// Floating point max and min.
 245       FMAX, FMIN,
 246
 247       /// Commutative FMIN and FMAX.
 248       FMAXC, FMINC,
 249
 250       /// Scalar intrinsic floating point max and min.
 251       FMAXS, FMINS,
 252
 253       /// Floating point reciprocal-sqrt and reciprocal approximation.
 254       /// Note that these typically require refinement
 255       /// in order to obtain suitable precision.
 256       FRSQRT, FRCP,
 257
 258       // AVX-512 reciprocal approximations with a little more precision.
 259       RSQRT14, RSQRT14S, RCP14, RCP14S,
 260
 261       // Thread Local Storage.
 262       TLSADDR,
 263
 264       // Thread Local Storage. A call to get the start address
 265       // of the TLS block for the current module.
 266       TLSBASEADDR,
 267
 268       // Thread Local Storage.  When calling to an OS provided
 269       // thunk at the address from an earlier relocation.
 270       TLSCALL,
 271
 272       // Exception Handling helpers.
 273       EH_RETURN,
 274
 275       // SjLj exception handling setjmp.
 276       EH_SJLJ_SETJMP,
 277
 278       // SjLj exception handling longjmp.
 279       EH_SJLJ_LONGJMP,
 280
 281       // SjLj exception handling dispatch.
 282       EH_SJLJ_SETUP_DISPATCH,
 283
 284       /// Tail call return. See X86TargetLowering::LowerCall for
 285       /// the list of operands.
 286       TC_RETURN,
 287
 288       // Vector move to low scalar and zero higher vector elements.
 289       VZEXT_MOVL,
 290
 291       // Vector integer truncate.
 292       VTRUNC,
 293       // Vector integer truncate with unsigned/signed saturation.
 294       VTRUNCUS, VTRUNCS,
 295
 296       // Masked version of the above. Used when less than a 128-bit result is
 297       // produced since the mask only applies to the lower elements and can't
 298       // be represented by a select.
 299       // SRC, PASSTHRU, MASK
 300       VMTRUNC, VMTRUNCUS, VMTRUNCS,
 301
 302       // Vector FP extend.
 303       VFPEXT, VFPEXT_RND, VFPEXTS_RND,
 304
 305       // Vector FP round.
 306       VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
 307
 308       // Masked version of above. Used for v2f64->v4f32.
 309       // SRC, PASSTHRU, MASK
 310       VMFPROUND,
 311
 312       // 128-bit vector logical left / right shift
 313       VSHLDQ, VSRLDQ,
 314
 315       // Vector shift elements
 316       VSHL, VSRL, VSRA,
 317
 318       // Vector variable shift
 319       VSHLV, VSRLV, VSRAV,
 320
 321       // Vector shift elements by immediate
 322       VSHLI, VSRLI, VSRAI,
 323
 324       // Shifts of mask registers.
 325       KSHIFTL, KSHIFTR,
 326
 327       // Bit rotate by immediate
 328       VROTLI, VROTRI,
 329
 330       // Vector packed double/float comparison.
 331       CMPP,
 332
 333       // Vector integer comparisons.
 334       PCMPEQ, PCMPGT,
 335
 336       // v8i16 Horizontal minimum and position.
 337       PHMINPOS,
 338
 339       MULTISHIFT,
 340
 341       /// Vector comparison generating mask bits for fp and
 342       /// integer signed and unsigned data types.
 343       CMPM,
 344       // Vector comparison with rounding mode for FP values
 345       CMPM_RND,
 346
 347       // Arithmetic operations with FLAGS results.
 348       ADD, SUB, ADC, SBB, SMUL, UMUL,
 349       OR, XOR, AND,
 350
 351       // Bit field extract.
 352       BEXTR,
 353
 354       // Zero High Bits Starting with Specified Bit Position.
 355       BZHI,
 356
 357       // X86-specific multiply by immediate.
 358       MUL_IMM,
 359
 360       // Vector sign bit extraction.
 361       MOVMSK,
 362
 363       // Vector bitwise comparisons.
 364       PTEST,
 365
 366       // Vector packed fp sign bitwise comparisons.
 367       TESTP,
 368
 369       // OR/AND test for masks.
 370       KORTEST,
 371       KTEST,
 372
 373       // ADD for masks.
 374       KADD,
 375
 376       // Several flavors of instructions with vector shuffle behaviors.
 377       // Saturated signed/unnsigned packing.
 378       PACKSS,
 379       PACKUS,
 380       // Intra-lane alignr.
 381       PALIGNR,
 382       // AVX512 inter-lane alignr.
 383       VALIGN,
 384       PSHUFD,
 385       PSHUFHW,
 386       PSHUFLW,
 387       SHUFP,
 388       // VBMI2 Concat & Shift.
 389       VSHLD,
 390       VSHRD,
 391       VSHLDV,
 392       VSHRDV,
 393       //Shuffle Packed Values at 128-bit granularity.
 394       SHUF128,
 395       MOVDDUP,
 396       MOVSHDUP,
 397       MOVSLDUP,
 398       MOVLHPS,
 399       MOVHLPS,
 400       MOVSD,
 401       MOVSS,
 402       UNPCKL,
 403       UNPCKH,
 404       VPERMILPV,
 405       VPERMILPI,
 406       VPERMI,
 407       VPERM2X128,
 408
 409       // Variable Permute (VPERM).
 410       // Res = VPERMV MaskV, V0
 411       VPERMV,
 412
 413       // 3-op Variable Permute (VPERMT2).
 414       // Res = VPERMV3 V0, MaskV, V1
 415       VPERMV3,
 416
 417       // Bitwise ternary logic.
 418       VPTERNLOG,
 419       // Fix Up Special Packed Float32/64 values.
 420       VFIXUPIMM,
 421       VFIXUPIMMS,
 422       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 423       VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
 424       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 425       VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
 426       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 427       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 428       // scaling part of the immediate.
 429       VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
 430       // Tests Types Of a FP Values for packed types.
 431       VFPCLASS,
 432       // Tests Types Of a FP Values for scalar types.
 433       VFPCLASSS,
 434
 435       // Broadcast scalar to vector.
 436       VBROADCAST,
 437       // Broadcast mask to vector.
 438       VBROADCASTM,
 439       // Broadcast subvector to vector.
 440       SUBV_BROADCAST,
 441
 442       /// SSE4A Extraction and Insertion.
 443       EXTRQI, INSERTQI,
 444
 445       // XOP arithmetic/logical shifts.
 446       VPSHA, VPSHL,
 447       // XOP signed/unsigned integer comparisons.
 448       VPCOM, VPCOMU,
 449       // XOP packed permute bytes.
 450       VPPERM,
 451       // XOP two source permutation.
 452       VPERMIL2,
 453
 454       // Vector multiply packed unsigned doubleword integers.
 455       PMULUDQ,
 456       // Vector multiply packed signed doubleword integers.
 457       PMULDQ,
 458       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 459       MULHRS,
 460
 461       // Multiply and Add Packed Integers.
 462       VPMADDUBSW, VPMADDWD,
 463
 464       // AVX512IFMA multiply and add.
 465       // NOTE: These are different than the instruction and perform
 466       // op0 x op1 + op2.
 467       VPMADD52L, VPMADD52H,
 468
 469       // VNNI
 470       VPDPBUSD,
 471       VPDPBUSDS,
 472       VPDPWSSD,
 473       VPDPWSSDS,
 474
 475       // FMA nodes.
 476       // We use the target independent ISD::FMA for the non-inverted case.
 477       FNMADD,
 478       FMSUB,
 479       FNMSUB,
 480       FMADDSUB,
 481       FMSUBADD,
 482
 483       // FMA with rounding mode.
 484       FMADD_RND,
 485       FNMADD_RND,
 486       FMSUB_RND,
 487       FNMSUB_RND,
 488       FMADDSUB_RND,
 489       FMSUBADD_RND,
 490
 491       // Compress and expand.
 492       COMPRESS,
 493       EXPAND,
 494
 495       // Bits shuffle
 496       VPSHUFBITQMB,
 497
 498       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 499       SINT_TO_FP_RND, UINT_TO_FP_RND,
 500       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 501
 502       // Vector float/double to signed/unsigned integer.
 503       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 504       // Scalar float/double to signed/unsigned integer.
 505       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 506
 507       // Vector float/double to signed/unsigned integer with truncation.
 508       CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
 509       // Scalar float/double to signed/unsigned integer with truncation.
 510       CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND,
 511
 512       // Vector signed/unsigned integer to float/double.
 513       CVTSI2P, CVTUI2P,
 514
 515       // Masked versions of above. Used for v2f64->v4f32.
 516       // SRC, PASSTHRU, MASK
 517       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
 518
 519       // Save xmm argument registers to the stack, according to %al. An operator
 520       // is needed so that this can be expanded with control flow.
 521       VASTART_SAVE_XMM_REGS,
 522
 523       // Windows's _chkstk call to do stack probing.
 524       WIN_ALLOCA,
 525
 526       // For allocating variable amounts of stack space when using
 527       // segmented stacks. Check if the current stacklet has enough space, and
 528       // falls back to heap allocation if not.
 529       SEG_ALLOCA,
 530
 531       // Memory barriers.
 532       MEMBARRIER,
 533       MFENCE,
 534
 535       // Store FP status word into i16 register.
 536       FNSTSW16r,
 537
 538       // Store contents of %ah into %eflags.
 539       SAHF,
 540
 541       // Get a random integer and indicate whether it is valid in CF.
 542       RDRAND,
 543
 544       // Get a NIST SP800-90B & C compliant random integer and
 545       // indicate whether it is valid in CF.
 546       RDSEED,
 547
 548       // SSE42 string comparisons.
 549       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 550       // will emit one or two instructions based on which results are used. If
 551       // flags and index/mask this allows us to use a single instruction since
 552       // we won't have to pick and opcode for flags. Instead we can rely on the
 553       // DAG to CSE everything and decide at isel.
 554       PCMPISTR,
 555       PCMPESTR,
 556
 557       // Test if in transactional execution.
 558       XTEST,
 559
 560       // ERI instructions.
 561       RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
 562
 563       // Conversions between float and half-float.
 564       CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
 565
 566       // Masked version of above.
 567       // SRC, RND, PASSTHRU, MASK
 568       MCVTPS2PH,
 569
 570       // Galois Field Arithmetic Instructions
 571       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 572
 573       // LWP insert record.
 574       LWPINS,
 575
 576       // User level wait
 577       UMWAIT, TPAUSE,
 578
 579       // Compare and swap.
 580       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 581       LCMPXCHG8_DAG,
 582       LCMPXCHG16_DAG,
 583       LCMPXCHG8_SAVE_EBX_DAG,
 584       LCMPXCHG16_SAVE_RBX_DAG,
 585
 586       /// LOCK-prefixed arithmetic read-modify-write instructions.
 587       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 588       LADD, LSUB, LOR, LXOR, LAND,
 589
 590       // Load, scalar_to_vector, and zero extend.
 591       VZEXT_LOAD,
 592
 593       // Store FP control world into i16 memory.
 594       FNSTCW16m,
 595
 596       /// This instruction implements FP_TO_SINT with the
 597       /// integer destination in memory and a FP reg source.  This corresponds
 598       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 599       /// has two inputs (token chain and address) and two outputs (int value
 600       /// and token chain).
 601       FP_TO_INT16_IN_MEM,
 602       FP_TO_INT32_IN_MEM,
 603       FP_TO_INT64_IN_MEM,
 604
 605       /// This instruction implements SINT_TO_FP with the
 606       /// integer source in memory and FP reg result.  This corresponds to the
 607       /// X86::FILD*m instructions. It has three inputs (token chain, address,
 608       /// and source type) and two outputs (FP value and token chain). FILD_FLAG
 609       /// also produces a flag).
 610       FILD,
 611       FILD_FLAG,
 612
 613       /// This instruction implements an extending load to FP stack slots.
 614       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 615       /// operand, ptr to load from, and a ValueType node indicating the type
 616       /// to load to.
 617       FLD,
 618
 619       /// This instruction implements a truncating store to FP stack
 620       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 621       /// chain operand, value to store, address, and a ValueType to store it
 622       /// as.
 623       FST,
 624
 625       /// This instruction grabs the address of the next argument
 626       /// from a va_list. (reads and modifies the va_list in memory)
 627       VAARG_64,
 628
 629       // Vector truncating store with unsigned/signed saturation
 630       VTRUNCSTOREUS, VTRUNCSTORES,
 631       // Vector truncating masked store with unsigned/signed saturation
 632       VMTRUNCSTOREUS, VMTRUNCSTORES,
 633
 634       // X86 specific gather and scatter
 635       MGATHER, MSCATTER,
 636
 637       // WARNING: Do not add anything in the end unless you want the node to
 638       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 639       // opcodes will be thought as target memory ops!
 640     };
 641   } // end namespace X86ISD
 642
 643   /// Define some predicates that are used for node matching.
 644   namespace X86 {
 645     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 646     bool isZeroNode(SDValue Elt);
 647
 648     /// Returns true of the given offset can be
 649     /// fit into displacement field of the instruction.
 650     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 651                                       bool hasSymbolicDisplacement = true);
 652
 653     /// Determines whether the callee is required to pop its
 654     /// own arguments. Callee pop is necessary to support tail calls.
 655     bool isCalleePop(CallingConv::ID CallingConv,
 656                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 657
 658   } // end namespace X86
 659
 660   //===--------------------------------------------------------------------===//
 661   //  X86 Implementation of the TargetLowering interface
 662   class X86TargetLowering final : public TargetLowering {
 663   public:
 664     explicit X86TargetLowering(const X86TargetMachine &TM,
 665                                const X86Subtarget &STI);
 666
 667     unsigned getJumpTableEncoding() const override;
 668     bool useSoftFloat() const override;
 669
 670     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 671                                ArgListTy &Args) const override;
 672
 673     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 674       return MVT::i8;
 675     }
 676
 677     const MCExpr *
 678     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 679                               const MachineBasicBlock *MBB, unsigned uid,
 680                               MCContext &Ctx) const override;
 681
 682     /// Returns relocation base for the given PIC jumptable.
 683     SDValue getPICJumpTableRelocBase(SDValue Table,
 684                                      SelectionDAG &DAG) const override;
 685     const MCExpr *
 686     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 687                                  unsigned JTI, MCContext &Ctx) const override;
 688
 689     /// Return the desired alignment for ByVal aggregate
 690     /// function arguments in the caller parameter area. For X86, aggregates
 691     /// that contains are placed at 16-byte boundaries while the rest are at
 692     /// 4-byte boundaries.
 693     unsigned getByValTypeAlignment(Type *Ty,
 694                                    const DataLayout &DL) const override;
 695
 696     /// Returns the target specific optimal type for load
 697     /// and store operations as a result of memset, memcpy, and memmove
 698     /// lowering. If DstAlign is zero that means it's safe to destination
 699     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 700     /// means there isn't a need to check it against alignment requirement,
 701     /// probably because the source does not need to be loaded. If 'IsMemset' is
 702     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 703     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 704     /// source is constant so it does not need to be loaded.
 705     /// It returns EVT::Other if the type should be determined using generic
 706     /// target-independent logic.
 707     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 708                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 709                             MachineFunction &MF) const override;
 710
 711     /// Returns true if it's safe to use load / store of the
 712     /// specified type to expand memcpy / memset inline. This is mostly true
 713     /// for all types except for some special cases. For example, on X86
 714     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 715     /// also does type conversion. Note the specified type doesn't have to be
 716     /// legal as the hook is used before type legalization.
 717     bool isSafeMemOpType(MVT VT) const override;
 718
 719     /// Returns true if the target allows unaligned memory accesses of the
 720     /// specified type. Returns whether it is "fast" in the last argument.
 721     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 722                                        bool *Fast) const override;
 723
 724     /// Provide custom lowering hooks for some operations.
 725     ///
 726     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 727
 728     /// Places new result values for the node in Results (their number
 729     /// and types must exactly match those of the original return values of
 730     /// the node), or leaves Results empty, which indicates that the node is not
 731     /// to be custom lowered after all.
 732     void LowerOperationWrapper(SDNode *N,
 733                                SmallVectorImpl<SDValue> &Results,
 734                                SelectionDAG &DAG) const override;
 735
 736     /// Replace the results of node with an illegal result
 737     /// type with new values built out of custom code.
 738     ///
 739     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 740                             SelectionDAG &DAG) const override;
 741
 742     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 743
 744     // Return true if it is profitable to combine a BUILD_VECTOR with a
 745     // stride-pattern to a shuffle and a truncate.
 746     // Example of such a combine:
 747     // v4i32 build_vector((extract_elt V, 1),
 748     //                    (extract_elt V, 3),
 749     //                    (extract_elt V, 5),
 750     //                    (extract_elt V, 7))
 751     //  -->
 752     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 753     // v4i64)
 754     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 755         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 756
 757     /// Return true if the target has native support for
 758     /// the specified value type and it is 'desirable' to use the type for the
 759     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 760     /// instruction encodings are longer and some i16 instructions are slow.
 761     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 762
 763     /// Return true if the target has native support for the
 764     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 765     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 766     /// and some i16 instructions are slow.
 767     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 768
 769     MachineBasicBlock *
 770     EmitInstrWithCustomInserter(MachineInstr &MI,
 771                                 MachineBasicBlock *MBB) const override;
 772
 773     /// This method returns the name of a target specific DAG node.
 774     const char *getTargetNodeName(unsigned Opcode) const override;
 775
 776     bool mergeStoresAfterLegalization() const override { return true; }
 777
 778     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 779                           const SelectionDAG &DAG) const override;
 780
 781     bool isCheapToSpeculateCttz() const override;
 782
 783     bool isCheapToSpeculateCtlz() const override;
 784
 785     bool isCtlzFast() const override;
 786
 787     bool hasBitPreservingFPLogic(EVT VT) const override {
 788       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 789     }
 790
 791     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 792       // If the pair to store is a mixture of float and int values, we will
 793       // save two bitwise instructions and one float-to-int instruction and
 794       // increase one store instruction. There is potentially a more
 795       // significant benefit because it avoids the float->int domain switch
 796       // for input value. So It is more likely a win.
 797       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 798           (LTy.isInteger() && HTy.isFloatingPoint()))
 799         return true;
 800       // If the pair only contains int values, we will save two bitwise
 801       // instructions and increase one store instruction (costing one more
 802       // store buffer). Since the benefit is more blurred so we leave
 803       // such pair out until we get testcase to prove it is a win.
 804       return false;
 805     }
 806
 807     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 808
 809     bool hasAndNotCompare(SDValue Y) const override;
 810
 811     bool hasAndNot(SDValue Y) const override;
 812
 813     bool preferShiftsToClearExtremeBits(SDValue Y) const override;
 814
 815     bool
 816     shouldTransformSignedTruncationCheck(EVT XVT,
 817                                          unsigned KeptBits) const override {
 818       // For vectors, we don't have a preference..
 819       if (XVT.isVector())
 820         return false;
 821
 822       auto VTIsOk = [](EVT VT) -> bool {
 823         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 824                VT == MVT::i64;
 825       };
 826
 827       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
 828       // XVT will be larger than KeptBitsVT.
 829       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
 830       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
 831     }
 832
 833     bool shouldSplatInsEltVarIndex(EVT VT) const override;
 834
 835     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 836       return VT.isScalarInteger();
 837     }
 838
 839     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 840     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 841
 842     /// Allow multiple load pairs per block for smaller and faster code.
 843     unsigned getMemcmpEqZeroLoadsPerBlock() const override {
 844       return 2;
 845     }
 846
 847     /// Return the value type to use for ISD::SETCC.
 848     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 849                            EVT VT) const override;
 850
 851     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 852                                       TargetLoweringOpt &TLO) const override;
 853
 854     /// Determine which of the bits specified in Mask are known to be either
 855     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 856     void computeKnownBitsForTargetNode(const SDValue Op,
 857                                        KnownBits &Known,
 858                                        const APInt &DemandedElts,
 859                                        const SelectionDAG &DAG,
 860                                        unsigned Depth = 0) const override;
 861
 862     /// Determine the number of bits in the operation that are sign bits.
 863     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 864                                              const APInt &DemandedElts,
 865                                              const SelectionDAG &DAG,
 866                                              unsigned Depth) const override;
 867
 868     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
 869                                                  const APInt &DemandedElts,
 870                                                  APInt &KnownUndef,
 871                                                  APInt &KnownZero,
 872                                                  TargetLoweringOpt &TLO,
 873                                                  unsigned Depth) const override;
 874
 875     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
 876                                            const APInt &DemandedBits,
 877                                            const APInt &DemandedElts,
 878                                            KnownBits &Known,
 879                                            TargetLoweringOpt &TLO,
 880                                            unsigned Depth) const override;
 881
 882     SDValue unwrapAddress(SDValue N) const override;
 883
 884     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 885
 886     bool ExpandInlineAsm(CallInst *CI) const override;
 887
 888     ConstraintType getConstraintType(StringRef Constraint) const override;
 889
 890     /// Examine constraint string and operand type and determine a weight value.
 891     /// The operand object must already have been set up with the operand type.
 892     ConstraintWeight
 893       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 894                                      const char *constraint) const override;
 895
 896     const char *LowerXConstraint(EVT ConstraintVT) const override;
 897
 898     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 899     /// add anything to Ops. If hasMemory is true it means one of the asm
 900     /// constraint of the inline asm instruction being processed is 'm'.
 901     void LowerAsmOperandForConstraint(SDValue Op,
 902                                       std::string &Constraint,
 903                                       std::vector<SDValue> &Ops,
 904                                       SelectionDAG &DAG) const override;
 905
 906     unsigned
 907     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 908       if (ConstraintCode == "i")
 909         return InlineAsm::Constraint_i;
 910       else if (ConstraintCode == "o")
 911         return InlineAsm::Constraint_o;
 912       else if (ConstraintCode == "v")
 913         return InlineAsm::Constraint_v;
 914       else if (ConstraintCode == "X")
 915         return InlineAsm::Constraint_X;
 916       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 917     }
 918
 919     /// Given a physical register constraint
 920     /// (e.g. {edx}), return the register number and the register class for the
 921     /// register.  This should only be used for C_Register constraints.  On
 922     /// error, this returns a register number of 0.
 923     std::pair<unsigned, const TargetRegisterClass *>
 924     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 925                                  StringRef Constraint, MVT VT) const override;
 926
 927     /// Return true if the addressing mode represented
 928     /// by AM is legal for this target, for a load/store of the specified type.
 929     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 930                                Type *Ty, unsigned AS,
 931                                Instruction *I = nullptr) const override;
 932
 933     /// Return true if the specified immediate is legal
 934     /// icmp immediate, that is the target has icmp instructions which can
 935     /// compare a register against the immediate without having to materialize
 936     /// the immediate into a register.
 937     bool isLegalICmpImmediate(int64_t Imm) const override;
 938
 939     /// Return true if the specified immediate is legal
 940     /// add immediate, that is the target has add instructions which can
 941     /// add a register and the immediate without having to materialize
 942     /// the immediate into a register.
 943     bool isLegalAddImmediate(int64_t Imm) const override;
 944
 945     bool isLegalStoreImmediate(int64_t Imm) const override;
 946
 947     /// Return the cost of the scaling factor used in the addressing
 948     /// mode represented by AM for this target, for a load/store
 949     /// of the specified type.
 950     /// If the AM is supported, the return value must be >= 0.
 951     /// If the AM is not supported, it returns a negative value.
 952     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
 953                              unsigned AS) const override;
 954
 955     bool isVectorShiftByScalarCheap(Type *Ty) const override;
 956
 957     /// Return true if it's free to truncate a value of
 958     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
 959     /// register EAX to i16 by referencing its sub-register AX.
 960     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
 961     bool isTruncateFree(EVT VT1, EVT VT2) const override;
 962
 963     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
 964
 965     /// Return true if any actual instruction that defines a
 966     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
 967     /// register. This does not necessarily include registers defined in
 968     /// unknown ways, such as incoming arguments, or copies from unknown
 969     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
 970     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
 971     /// all instructions that define 32-bit values implicit zero-extend the
 972     /// result out to 64 bits.
 973     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
 974     bool isZExtFree(EVT VT1, EVT VT2) const override;
 975     bool isZExtFree(SDValue Val, EVT VT2) const override;
 976
 977     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
 978     /// extend node) is profitable.
 979     bool isVectorLoadExtDesirable(SDValue) const override;
 980
 981     /// Return true if an FMA operation is faster than a pair of fmul and fadd
 982     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
 983     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
 984     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
 985
 986     /// Return true if it's profitable to narrow
 987     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
 988     /// from i32 to i8 but not from i32 to i16.
 989     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
 990
 991     /// Given an intrinsic, checks if on the target the intrinsic will need to map
 992     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
 993     /// true and stores the intrinsic information into the IntrinsicInfo that was
 994     /// passed to the function.
 995     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
 996                             MachineFunction &MF,
 997                             unsigned Intrinsic) const override;
 998
 999     /// Returns true if the target can instruction select the
1000     /// specified FP immediate natively. If false, the legalizer will
1001     /// materialize the FP immediate as a load from a constant pool.
1002     bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
1003
1004     /// Targets can use this to indicate that they only support *some*
1005     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1006     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1007     /// be legal.
1008     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1009
1010     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1011     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1012     /// constant pool entry.
1013     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1014
1015     /// Returns true if lowering to a jump table is allowed.
1016     bool areJTsAllowed(const Function *Fn) const override;
1017
1018     /// If true, then instruction selection should
1019     /// seek to shrink the FP constant of the specified type to a smaller type
1020     /// in order to save space and / or reduce runtime.
1021     bool ShouldShrinkFPConstant(EVT VT) const override {
1022       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1023       // expensive than a straight movsd. On the other hand, it's important to
1024       // shrink long double fp constant since fldt is very slow.
1025       return !X86ScalarSSEf64 || VT == MVT::f80;
1026     }
1027
1028     /// Return true if we believe it is correct and profitable to reduce the
1029     /// load node to a smaller type.
1030     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1031                                EVT NewVT) const override;
1032
1033     /// Return true if the specified scalar FP type is computed in an SSE
1034     /// register, not on the X87 floating point stack.
1035     bool isScalarFPTypeInSSEReg(EVT VT) const {
1036       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1037              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1038     }
1039
1040     /// Returns true if it is beneficial to convert a load of a constant
1041     /// to just the constant itself.
1042     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1043                                            Type *Ty) const override;
1044
1045     bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
1046
1047     bool convertSelectOfConstantsToMath(EVT VT) const override;
1048
1049     bool decomposeMulByConstant(EVT VT, SDValue C) const override;
1050
1051     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1052                                   bool IsSigned) const override;
1053
1054     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1055     /// with this index.
1056     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1057                                  unsigned Index) const override;
1058
1059     /// Scalar ops always have equal or better analysis/performance/power than
1060     /// the vector equivalent, so this always makes sense if the scalar op is
1061     /// supported.
1062     bool shouldScalarizeBinop(SDValue) const override;
1063
1064     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1065                                       unsigned AddrSpace) const override {
1066       // If we can replace more than 2 scalar stores, there will be a reduction
1067       // in instructions even after we add a vector constant load.
1068       return NumElem > 2;
1069     }
1070
1071     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
1072
1073     /// Intel processors have a unified instruction and data cache
1074     const char * getClearCacheBuiltinName() const override {
1075       return nullptr; // nothing to do, move along.
1076     }
1077
1078     unsigned getRegisterByName(const char* RegName, EVT VT,
1079                                SelectionDAG &DAG) const override;
1080
1081     /// If a physical register, this returns the register that receives the
1082     /// exception address on entry to an EH pad.
1083     unsigned
1084     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1085
1086     /// If a physical register, this returns the register that receives the
1087     /// exception typeid on entry to a landing pad.
1088     unsigned
1089     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1090
1091     virtual bool needsFixedCatchObjects() const override;
1092
1093     /// This method returns a target specific FastISel object,
1094     /// or null if the target does not support "fast" ISel.
1095     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1096                              const TargetLibraryInfo *libInfo) const override;
1097
1098     /// If the target has a standard location for the stack protector cookie,
1099     /// returns the address of that location. Otherwise, returns nullptr.
1100     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1101
1102     bool useLoadStackGuardNode() const override;
1103     bool useStackGuardXorFP() const override;
1104     void insertSSPDeclarations(Module &M) const override;
1105     Value *getSDagStackGuard(const Module &M) const override;
1106     Value *getSSPStackGuardCheck(const Module &M) const override;
1107     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1108                                 const SDLoc &DL) const override;
1109
1110
1111     /// Return true if the target stores SafeStack pointer at a fixed offset in
1112     /// some non-standard address space, and populates the address space and
1113     /// offset as appropriate.
1114     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1115
1116     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1117                       SelectionDAG &DAG) const;
1118
1119     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1120
1121     /// Customize the preferred legalization strategy for certain types.
1122     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1123
1124     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1125                                       EVT VT) const override;
1126
1127     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1128                                            CallingConv::ID CC,
1129                                            EVT VT) const override;
1130
1131     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1132
1133     bool supportSwiftError() const override;
1134
1135     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1136
1137     bool hasVectorBlend() const override { return true; }
1138
1139     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1140
1141     /// Lower interleaved load(s) into target specific
1142     /// instructions/intrinsics.
1143     bool lowerInterleavedLoad(LoadInst *LI,
1144                               ArrayRef<ShuffleVectorInst *> Shuffles,
1145                               ArrayRef<unsigned> Indices,
1146                               unsigned Factor) const override;
1147
1148     /// Lower interleaved store(s) into target specific
1149     /// instructions/intrinsics.
1150     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1151                                unsigned Factor) const override;
1152
1153     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1154                                    SDValue Addr, SelectionDAG &DAG)
1155                                    const override;
1156
1157   protected:
1158     std::pair<const TargetRegisterClass *, uint8_t>
1159     findRepresentativeClass(const TargetRegisterInfo *TRI,
1160                             MVT VT) const override;
1161
1162   private:
1163     /// Keep a reference to the X86Subtarget around so that we can
1164     /// make the right decision when generating code for different targets.
1165     const X86Subtarget &Subtarget;
1166
1167     /// Select between SSE or x87 floating point ops.
1168     /// When SSE is available, use it for f32 operations.
1169     /// When SSE2 is available, use it for f64 operations.
1170     bool X86ScalarSSEf32;
1171     bool X86ScalarSSEf64;
1172
1173     /// A list of legal FP immediates.
1174     std::vector<APFloat> LegalFPImmediates;
1175
1176     /// Indicate that this x86 target can instruction
1177     /// select the specified FP immediate natively.
1178     void addLegalFPImmediate(const APFloat& Imm) {
1179       LegalFPImmediates.push_back(Imm);
1180     }
1181
1182     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1183                             CallingConv::ID CallConv, bool isVarArg,
1184                             const SmallVectorImpl<ISD::InputArg> &Ins,
1185                             const SDLoc &dl, SelectionDAG &DAG,
1186                             SmallVectorImpl<SDValue> &InVals,
1187                             uint32_t *RegMask) const;
1188     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1189                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1190                              const SDLoc &dl, SelectionDAG &DAG,
1191                              const CCValAssign &VA, MachineFrameInfo &MFI,
1192                              unsigned i) const;
1193     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1194                              const SDLoc &dl, SelectionDAG &DAG,
1195                              const CCValAssign &VA,
1196                              ISD::ArgFlagsTy Flags) const;
1197
1198     // Call lowering helpers.
1199
1200     /// Check whether the call is eligible for tail call optimization. Targets
1201     /// that want to do tail call optimization should implement this function.
1202     bool IsEligibleForTailCallOptimization(SDValue Callee,
1203                                            CallingConv::ID CalleeCC,
1204                                            bool isVarArg,
1205                                            bool isCalleeStructRet,
1206                                            bool isCallerStructRet,
1207                                            Type *RetTy,
1208                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1209                                     const SmallVectorImpl<SDValue> &OutVals,
1210                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1211                                            SelectionDAG& DAG) const;
1212     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1213                                     SDValue Chain, bool IsTailCall,
1214                                     bool Is64Bit, int FPDiff,
1215                                     const SDLoc &dl) const;
1216
1217     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1218                                          SelectionDAG &DAG) const;
1219
1220     unsigned getAddressSpace(void) const;
1221
1222     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
1223                                                bool isSigned,
1224                                                bool isReplace) const;
1225
1226     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1227     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1228     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1229     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1230
1231     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1232                                   const unsigned char OpFlags = 0) const;
1233     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1234     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1235     SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1236                                int64_t Offset, SelectionDAG &DAG) const;
1237     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1238     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1239     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1240
1241     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1242     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1243     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1244     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1245     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1246     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1247     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1248     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1249     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1250     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1251     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1252     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1253     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1254     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1255     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1256     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1257     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1258     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1259     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1260     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1261     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1262     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1263     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1264     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1265     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1266     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1267
1268     SDValue
1269     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1270                          const SmallVectorImpl<ISD::InputArg> &Ins,
1271                          const SDLoc &dl, SelectionDAG &DAG,
1272                          SmallVectorImpl<SDValue> &InVals) const override;
1273     SDValue LowerCall(CallLoweringInfo &CLI,
1274                       SmallVectorImpl<SDValue> &InVals) const override;
1275
1276     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1277                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1278                         const SmallVectorImpl<SDValue> &OutVals,
1279                         const SDLoc &dl, SelectionDAG &DAG) const override;
1280
1281     bool supportSplitCSR(MachineFunction *MF) const override {
1282       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1283           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1284     }
1285     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1286     void insertCopiesSplitCSR(
1287       MachineBasicBlock *Entry,
1288       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1289
1290     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1291
1292     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1293
1294     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1295                             ISD::NodeType ExtendKind) const override;
1296
1297     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1298                         bool isVarArg,
1299                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1300                         LLVMContext &Context) const override;
1301
1302     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1303
1304     TargetLoweringBase::AtomicExpansionKind
1305     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1306     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1307     TargetLoweringBase::AtomicExpansionKind
1308     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1309
1310     LoadInst *
1311     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1312
1313     bool needsCmpXchgNb(Type *MemType) const;
1314
1315     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1316                                 MachineBasicBlock *DispatchBB, int FI) const;
1317
1318     // Utility function to emit the low-level va_arg code for X86-64.
1319     MachineBasicBlock *
1320     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1321                                   MachineBasicBlock *MBB) const;
1322
1323     /// Utility function to emit the xmm reg save portion of va_start.
1324     MachineBasicBlock *
1325     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1326                                              MachineBasicBlock *BB) const;
1327
1328     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1329                                                  MachineInstr &MI2,
1330                                                  MachineBasicBlock *BB) const;
1331
1332     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1333                                          MachineBasicBlock *BB) const;
1334
1335     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1336                                            MachineBasicBlock *BB) const;
1337
1338     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1339                                            MachineBasicBlock *BB) const;
1340
1341     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1342                                            MachineBasicBlock *BB) const;
1343
1344     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1345                                             MachineBasicBlock *BB) const;
1346
1347     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1348                                           MachineBasicBlock *BB) const;
1349
1350     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1351                                           MachineBasicBlock *BB) const;
1352
1353     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1354                                             MachineBasicBlock *BB) const;
1355
1356     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1357                                         MachineBasicBlock *MBB) const;
1358
1359     void emitSetJmpShadowStackFix(MachineInstr &MI,
1360                                   MachineBasicBlock *MBB) const;
1361
1362     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1363                                          MachineBasicBlock *MBB) const;
1364
1365     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1366                                                  MachineBasicBlock *MBB) const;
1367
1368     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1369                                      MachineBasicBlock *MBB) const;
1370
1371     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1372                                              MachineBasicBlock *MBB) const;
1373
1374     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1375     /// equivalent, for use with the given x86 condition code.
1376     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1377                     SelectionDAG &DAG) const;
1378
1379     /// Convert a comparison if required by the subtarget.
1380     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1381
1382     /// Emit flags for the given setcc condition and operands. Also returns the
1383     /// corresponding X86 condition code constant in X86CC.
1384     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1385                               ISD::CondCode CC, const SDLoc &dl,
1386                               SelectionDAG &DAG,
1387                               SDValue &X86CC) const;
1388
1389     /// Check if replacement of SQRT with RSQRT should be disabled.
1390     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1391
1392     /// Use rsqrt* to speed up sqrt calculations.
1393     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1394                             int &RefinementSteps, bool &UseOneConstNR,
1395                             bool Reciprocal) const override;
1396
1397     /// Use rcp* to speed up fdiv calculations.
1398     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1399                              int &RefinementSteps) const override;
1400
1401     /// Reassociate floating point divisions into multiply by reciprocal.
1402     unsigned combineRepeatedFPDivisors() const override;
1403   };
1404
1405   namespace X86 {
1406     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1407                              const TargetLibraryInfo *libInfo);
1408   } // end namespace X86
1409
1410   // Base class for all X86 non-masked store operations.
1411   class X86StoreSDNode : public MemSDNode {
1412   public:
1413     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1414                    SDVTList VTs, EVT MemVT,
1415                    MachineMemOperand *MMO)
1416       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1417     const SDValue &getValue() const { return getOperand(1); }
1418     const SDValue &getBasePtr() const { return getOperand(2); }
1419
1420     static bool classof(const SDNode *N) {
1421       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1422         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1423     }
1424   };
1425
1426   // Base class for all X86 masked store operations.
1427   // The class has the same order of operands as MaskedStoreSDNode for
1428   // convenience.
1429   class X86MaskedStoreSDNode : public MemSDNode {
1430   public:
1431     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1432                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1433                          MachineMemOperand *MMO)
1434       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1435
1436     const SDValue &getValue()   const { return getOperand(1); }
1437     const SDValue &getBasePtr() const { return getOperand(2); }
1438     const SDValue &getMask()    const { return getOperand(3); }
1439
1440     static bool classof(const SDNode *N) {
1441       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1442         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1443     }
1444   };
1445
1446   // X86 Truncating Store with Signed saturation.
1447   class TruncSStoreSDNode : public X86StoreSDNode {
1448   public:
1449     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1450                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1451       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1452
1453     static bool classof(const SDNode *N) {
1454       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1455     }
1456   };
1457
1458   // X86 Truncating Store with Unsigned saturation.
1459   class TruncUSStoreSDNode : public X86StoreSDNode {
1460   public:
1461     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1462                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1463       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1464
1465     static bool classof(const SDNode *N) {
1466       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1467     }
1468   };
1469
1470   // X86 Truncating Masked Store with Signed saturation.
1471   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1472   public:
1473     MaskedTruncSStoreSDNode(unsigned Order,
1474                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1475                          MachineMemOperand *MMO)
1476       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1477
1478     static bool classof(const SDNode *N) {
1479       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1480     }
1481   };
1482
1483   // X86 Truncating Masked Store with Unsigned saturation.
1484   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1485   public:
1486     MaskedTruncUSStoreSDNode(unsigned Order,
1487                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1488                             MachineMemOperand *MMO)
1489       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1490
1491     static bool classof(const SDNode *N) {
1492       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1493     }
1494   };
1495
1496   // X86 specific Gather/Scatter nodes.
1497   // The class has the same order of operands as MaskedGatherScatterSDNode for
1498   // convenience.
1499   class X86MaskedGatherScatterSDNode : public MemSDNode {
1500   public:
1501     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1502                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1503                                  MachineMemOperand *MMO)
1504         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1505
1506     const SDValue &getBasePtr() const { return getOperand(3); }
1507     const SDValue &getIndex()   const { return getOperand(4); }
1508     const SDValue &getMask()    const { return getOperand(2); }
1509     const SDValue &getScale()   const { return getOperand(5); }
1510
1511     static bool classof(const SDNode *N) {
1512       return N->getOpcode() == X86ISD::MGATHER ||
1513              N->getOpcode() == X86ISD::MSCATTER;
1514     }
1515   };
1516
1517   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1518   public:
1519     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1520                           EVT MemVT, MachineMemOperand *MMO)
1521         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1522                                        MMO) {}
1523
1524     const SDValue &getPassThru() const { return getOperand(1); }
1525
1526     static bool classof(const SDNode *N) {
1527       return N->getOpcode() == X86ISD::MGATHER;
1528     }
1529   };
1530
1531   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1532   public:
1533     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1534                            EVT MemVT, MachineMemOperand *MMO)
1535         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1536                                        MMO) {}
1537
1538     const SDValue &getValue() const { return getOperand(1); }
1539
1540     static bool classof(const SDNode *N) {
1541       return N->getOpcode() == X86ISD::MSCATTER;
1542     }
1543   };
1544
1545   /// Generate unpacklo/unpackhi shuffle mask.
1546   template <typename T = int>
1547   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1548                                bool Unary) {
1549     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1550     int NumElts = VT.getVectorNumElements();
1551     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1552     for (int i = 0; i < NumElts; ++i) {
1553       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1554       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1555       Pos += (Unary ? 0 : NumElts * (i % 2));
1556       Pos += (Lo ? 0 : NumEltsInLane / 2);
1557       Mask.push_back(Pos);
1558     }
1559   }
1560
1561   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1562   /// mask index with the scaled sequential indices for an equivalent narrowed
1563   /// mask. This is the reverse process to canWidenShuffleElements, but can
1564   /// always succeed.
1565   template <typename T>
1566   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1567                         SmallVectorImpl<T> &ScaledMask) {
1568     assert(0 < Scale && "Unexpected scaling factor");
1569     int NumElts = Mask.size();
1570     ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1571
1572     for (int i = 0; i != NumElts; ++i) {
1573       int M = Mask[i];
1574
1575       // Repeat sentinel values in every mask element.
1576       if (M < 0) {
1577         for (int s = 0; s != Scale; ++s)
1578           ScaledMask[(Scale * i) + s] = M;
1579         continue;
1580       }
1581
1582       // Scale mask element and increment across each mask element.
1583       for (int s = 0; s != Scale; ++s)
1584         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1585     }
1586   }
1587 } // end namespace llvm
1588
1589 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H