contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the interfaces that X86 uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16
  17 #include "llvm/CodeGen/CallingConvLower.h"
  18 #include "llvm/CodeGen/SelectionDAG.h"
  19 #include "llvm/CodeGen/TargetLowering.h"
  20 #include "llvm/Target/TargetOptions.h"
  21
  22 namespace llvm {
  23   class X86Subtarget;
  24   class X86TargetMachine;
  25
  26   namespace X86ISD {
  27     // X86 Specific DAG Nodes
  28     enum NodeType : unsigned {
  29       // Start the numbering where the builtin ops leave off.
  30       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  31
  32       /// Bit scan forward.
  33       BSF,
  34       /// Bit scan reverse.
  35       BSR,
  36
  37       /// Double shift instructions. These correspond to
  38       /// X86::SHLDxx and X86::SHRDxx instructions.
  39       SHLD,
  40       SHRD,
  41
  42       /// Bitwise logical AND of floating point values. This corresponds
  43       /// to X86::ANDPS or X86::ANDPD.
  44       FAND,
  45
  46       /// Bitwise logical OR of floating point values. This corresponds
  47       /// to X86::ORPS or X86::ORPD.
  48       FOR,
  49
  50       /// Bitwise logical XOR of floating point values. This corresponds
  51       /// to X86::XORPS or X86::XORPD.
  52       FXOR,
  53
  54       ///  Bitwise logical ANDNOT of floating point values. This
  55       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  56       FANDN,
  57
  58       /// These operations represent an abstract X86 call
  59       /// instruction, which includes a bunch of information.  In particular the
  60       /// operands of these node are:
  61       ///
  62       ///     #0 - The incoming token chain
  63       ///     #1 - The callee
  64       ///     #2 - The number of arg bytes the caller pushes on the stack.
  65       ///     #3 - The number of arg bytes the callee pops off the stack.
  66       ///     #4 - The value to pass in AL/AX/EAX (optional)
  67       ///     #5 - The value to pass in DL/DX/EDX (optional)
  68       ///
  69       /// The result values of these nodes are:
  70       ///
  71       ///     #0 - The outgoing token chain
  72       ///     #1 - The first register result value (optional)
  73       ///     #2 - The second register result value (optional)
  74       ///
  75       CALL,
  76
  77       /// Same as call except it adds the NoTrack prefix.
  78       NT_CALL,
  79
  80       /// X86 compare and logical compare instructions.
  81       CMP, COMI, UCOMI,
  82
  83       /// X86 bit-test instructions.
  84       BT,
  85
  86       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  87       /// operand, usually produced by a CMP instruction.
  88       SETCC,
  89
  90       /// X86 Select
  91       SELECTS,
  92
  93       // Same as SETCC except it's materialized with a sbb and the value is all
  94       // one's or all zero's.
  95       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
  96
  97       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
  98       /// Operands are two FP values to compare; result is a mask of
  99       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
 100       FSETCC,
 101
 102       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 103       /// and a version with SAE.
 104       FSETCCM, FSETCCM_SAE,
 105
 106       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 107       /// to select from. Operand 2 is the condition code, and operand 3 is the
 108       /// flag operand produced by a CMP or TEST instruction.
 109       CMOV,
 110
 111       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 112       /// is the block to branch if condition is true, operand 2 is the
 113       /// condition code, and operand 3 is the flag operand produced by a CMP
 114       /// or TEST instruction.
 115       BRCOND,
 116
 117       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
 118       /// operand 1 is the target address.
 119       NT_BRIND,
 120
 121       /// Return with a flag operand. Operand 0 is the chain operand, operand
 122       /// 1 is the number of bytes of stack to pop.
 123       RET_FLAG,
 124
 125       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 126       IRET,
 127
 128       /// Repeat fill, corresponds to X86::REP_STOSx.
 129       REP_STOS,
 130
 131       /// Repeat move, corresponds to X86::REP_MOVSx.
 132       REP_MOVS,
 133
 134       /// On Darwin, this node represents the result of the popl
 135       /// at function entry, used for PIC code.
 136       GlobalBaseReg,
 137
 138       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 139       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 140       /// MCSymbol and TargetBlockAddress.
 141       Wrapper,
 142
 143       /// Special wrapper used under X86-64 PIC mode for RIP
 144       /// relative displacements.
 145       WrapperRIP,
 146
 147       /// Copies a 64-bit value from the low word of an XMM vector
 148       /// to an MMX vector.
 149       MOVDQ2Q,
 150
 151       /// Copies a 32-bit value from the low word of a MMX
 152       /// vector to a GPR.
 153       MMX_MOVD2W,
 154
 155       /// Copies a GPR into the low 32-bit word of a MMX vector
 156       /// and zero out the high word.
 157       MMX_MOVW2D,
 158
 159       /// Extract an 8-bit value from a vector and zero extend it to
 160       /// i32, corresponds to X86::PEXTRB.
 161       PEXTRB,
 162
 163       /// Extract a 16-bit value from a vector and zero extend it to
 164       /// i32, corresponds to X86::PEXTRW.
 165       PEXTRW,
 166
 167       /// Insert any element of a 4 x float vector into any element
 168       /// of a destination 4 x floatvector.
 169       INSERTPS,
 170
 171       /// Insert the lower 8-bits of a 32-bit value to a vector,
 172       /// corresponds to X86::PINSRB.
 173       PINSRB,
 174
 175       /// Insert the lower 16-bits of a 32-bit value to a vector,
 176       /// corresponds to X86::PINSRW.
 177       PINSRW,
 178
 179       /// Shuffle 16 8-bit values within a vector.
 180       PSHUFB,
 181
 182       /// Compute Sum of Absolute Differences.
 183       PSADBW,
 184       /// Compute Double Block Packed Sum-Absolute-Differences
 185       DBPSADBW,
 186
 187       /// Bitwise Logical AND NOT of Packed FP values.
 188       ANDNP,
 189
 190       /// Blend where the selector is an immediate.
 191       BLENDI,
 192
 193       /// Dynamic (non-constant condition) vector blend where only the sign bits
 194       /// of the condition elements are used. This is used to enforce that the
 195       /// condition mask is not valid for generic VSELECT optimizations. This
 196       /// is also used to implement the intrinsics.
 197       /// Operands are in VSELECT order: MASK, TRUE, FALSE
 198       BLENDV,
 199
 200       /// Combined add and sub on an FP vector.
 201       ADDSUB,
 202
 203       //  FP vector ops with rounding mode.
 204       FADD_RND, FADDS, FADDS_RND,
 205       FSUB_RND, FSUBS, FSUBS_RND,
 206       FMUL_RND, FMULS, FMULS_RND,
 207       FDIV_RND, FDIVS, FDIVS_RND,
 208       FMAX_SAE, FMAXS_SAE,
 209       FMIN_SAE, FMINS_SAE,
 210       FSQRT_RND, FSQRTS, FSQRTS_RND,
 211
 212       // FP vector get exponent.
 213       FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
 214       // Extract Normalized Mantissas.
 215       VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
 216       // FP Scale.
 217       SCALEF, SCALEF_RND,
 218       SCALEFS, SCALEFS_RND,
 219
 220       // Unsigned Integer average.
 221       AVG,
 222
 223       /// Integer horizontal add/sub.
 224       HADD,
 225       HSUB,
 226
 227       /// Floating point horizontal add/sub.
 228       FHADD,
 229       FHSUB,
 230
 231       // Detect Conflicts Within a Vector
 232       CONFLICT,
 233
 234       /// Floating point max and min.
 235       FMAX, FMIN,
 236
 237       /// Commutative FMIN and FMAX.
 238       FMAXC, FMINC,
 239
 240       /// Scalar intrinsic floating point max and min.
 241       FMAXS, FMINS,
 242
 243       /// Floating point reciprocal-sqrt and reciprocal approximation.
 244       /// Note that these typically require refinement
 245       /// in order to obtain suitable precision.
 246       FRSQRT, FRCP,
 247
 248       // AVX-512 reciprocal approximations with a little more precision.
 249       RSQRT14, RSQRT14S, RCP14, RCP14S,
 250
 251       // Thread Local Storage.
 252       TLSADDR,
 253
 254       // Thread Local Storage. A call to get the start address
 255       // of the TLS block for the current module.
 256       TLSBASEADDR,
 257
 258       // Thread Local Storage.  When calling to an OS provided
 259       // thunk at the address from an earlier relocation.
 260       TLSCALL,
 261
 262       // Exception Handling helpers.
 263       EH_RETURN,
 264
 265       // SjLj exception handling setjmp.
 266       EH_SJLJ_SETJMP,
 267
 268       // SjLj exception handling longjmp.
 269       EH_SJLJ_LONGJMP,
 270
 271       // SjLj exception handling dispatch.
 272       EH_SJLJ_SETUP_DISPATCH,
 273
 274       /// Tail call return. See X86TargetLowering::LowerCall for
 275       /// the list of operands.
 276       TC_RETURN,
 277
 278       // Vector move to low scalar and zero higher vector elements.
 279       VZEXT_MOVL,
 280
 281       // Vector integer truncate.
 282       VTRUNC,
 283       // Vector integer truncate with unsigned/signed saturation.
 284       VTRUNCUS, VTRUNCS,
 285
 286       // Masked version of the above. Used when less than a 128-bit result is
 287       // produced since the mask only applies to the lower elements and can't
 288       // be represented by a select.
 289       // SRC, PASSTHRU, MASK
 290       VMTRUNC, VMTRUNCUS, VMTRUNCS,
 291
 292       // Vector FP extend.
 293       VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
 294
 295       // Vector FP round.
 296       VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
 297
 298       // Masked version of above. Used for v2f64->v4f32.
 299       // SRC, PASSTHRU, MASK
 300       VMFPROUND,
 301
 302       // 128-bit vector logical left / right shift
 303       VSHLDQ, VSRLDQ,
 304
 305       // Vector shift elements
 306       VSHL, VSRL, VSRA,
 307
 308       // Vector variable shift
 309       VSHLV, VSRLV, VSRAV,
 310
 311       // Vector shift elements by immediate
 312       VSHLI, VSRLI, VSRAI,
 313
 314       // Shifts of mask registers.
 315       KSHIFTL, KSHIFTR,
 316
 317       // Bit rotate by immediate
 318       VROTLI, VROTRI,
 319
 320       // Vector packed double/float comparison.
 321       CMPP,
 322
 323       // Vector integer comparisons.
 324       PCMPEQ, PCMPGT,
 325
 326       // v8i16 Horizontal minimum and position.
 327       PHMINPOS,
 328
 329       MULTISHIFT,
 330
 331       /// Vector comparison generating mask bits for fp and
 332       /// integer signed and unsigned data types.
 333       CMPM,
 334       // Vector comparison with SAE for FP values
 335       CMPM_SAE,
 336
 337       // Arithmetic operations with FLAGS results.
 338       ADD, SUB, ADC, SBB, SMUL, UMUL,
 339       OR, XOR, AND,
 340
 341       // Bit field extract.
 342       BEXTR,
 343
 344       // Zero High Bits Starting with Specified Bit Position.
 345       BZHI,
 346
 347       // X86-specific multiply by immediate.
 348       MUL_IMM,
 349
 350       // Vector sign bit extraction.
 351       MOVMSK,
 352
 353       // Vector bitwise comparisons.
 354       PTEST,
 355
 356       // Vector packed fp sign bitwise comparisons.
 357       TESTP,
 358
 359       // OR/AND test for masks.
 360       KORTEST,
 361       KTEST,
 362
 363       // ADD for masks.
 364       KADD,
 365
 366       // Several flavors of instructions with vector shuffle behaviors.
 367       // Saturated signed/unnsigned packing.
 368       PACKSS,
 369       PACKUS,
 370       // Intra-lane alignr.
 371       PALIGNR,
 372       // AVX512 inter-lane alignr.
 373       VALIGN,
 374       PSHUFD,
 375       PSHUFHW,
 376       PSHUFLW,
 377       SHUFP,
 378       // VBMI2 Concat & Shift.
 379       VSHLD,
 380       VSHRD,
 381       VSHLDV,
 382       VSHRDV,
 383       //Shuffle Packed Values at 128-bit granularity.
 384       SHUF128,
 385       MOVDDUP,
 386       MOVSHDUP,
 387       MOVSLDUP,
 388       MOVLHPS,
 389       MOVHLPS,
 390       MOVSD,
 391       MOVSS,
 392       UNPCKL,
 393       UNPCKH,
 394       VPERMILPV,
 395       VPERMILPI,
 396       VPERMI,
 397       VPERM2X128,
 398
 399       // Variable Permute (VPERM).
 400       // Res = VPERMV MaskV, V0
 401       VPERMV,
 402
 403       // 3-op Variable Permute (VPERMT2).
 404       // Res = VPERMV3 V0, MaskV, V1
 405       VPERMV3,
 406
 407       // Bitwise ternary logic.
 408       VPTERNLOG,
 409       // Fix Up Special Packed Float32/64 values.
 410       VFIXUPIMM, VFIXUPIMM_SAE,
 411       VFIXUPIMMS, VFIXUPIMMS_SAE,
 412       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 413       VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
 414       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 415       VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
 416       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 417       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 418       // scaling part of the immediate.
 419       VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
 420       // Tests Types Of a FP Values for packed types.
 421       VFPCLASS,
 422       // Tests Types Of a FP Values for scalar types.
 423       VFPCLASSS,
 424
 425       // Broadcast (splat) scalar or element 0 of a vector. If the operand is
 426       // a vector, this node may change the vector length as part of the splat.
 427       VBROADCAST,
 428       // Broadcast mask to vector.
 429       VBROADCASTM,
 430       // Broadcast subvector to vector.
 431       SUBV_BROADCAST,
 432
 433       /// SSE4A Extraction and Insertion.
 434       EXTRQI, INSERTQI,
 435
 436       // XOP arithmetic/logical shifts.
 437       VPSHA, VPSHL,
 438       // XOP signed/unsigned integer comparisons.
 439       VPCOM, VPCOMU,
 440       // XOP packed permute bytes.
 441       VPPERM,
 442       // XOP two source permutation.
 443       VPERMIL2,
 444
 445       // Vector multiply packed unsigned doubleword integers.
 446       PMULUDQ,
 447       // Vector multiply packed signed doubleword integers.
 448       PMULDQ,
 449       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 450       MULHRS,
 451
 452       // Multiply and Add Packed Integers.
 453       VPMADDUBSW, VPMADDWD,
 454
 455       // AVX512IFMA multiply and add.
 456       // NOTE: These are different than the instruction and perform
 457       // op0 x op1 + op2.
 458       VPMADD52L, VPMADD52H,
 459
 460       // VNNI
 461       VPDPBUSD,
 462       VPDPBUSDS,
 463       VPDPWSSD,
 464       VPDPWSSDS,
 465
 466       // FMA nodes.
 467       // We use the target independent ISD::FMA for the non-inverted case.
 468       FNMADD,
 469       FMSUB,
 470       FNMSUB,
 471       FMADDSUB,
 472       FMSUBADD,
 473
 474       // FMA with rounding mode.
 475       FMADD_RND,
 476       FNMADD_RND,
 477       FMSUB_RND,
 478       FNMSUB_RND,
 479       FMADDSUB_RND,
 480       FMSUBADD_RND,
 481
 482       // Compress and expand.
 483       COMPRESS,
 484       EXPAND,
 485
 486       // Bits shuffle
 487       VPSHUFBITQMB,
 488
 489       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 490       SINT_TO_FP_RND, UINT_TO_FP_RND,
 491       SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
 492       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 493
 494       // Vector float/double to signed/unsigned integer.
 495       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 496       // Scalar float/double to signed/unsigned integer.
 497       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 498
 499       // Vector float/double to signed/unsigned integer with truncation.
 500       CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
 501       // Scalar float/double to signed/unsigned integer with truncation.
 502       CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
 503
 504       // Vector signed/unsigned integer to float/double.
 505       CVTSI2P, CVTUI2P,
 506
 507       // Masked versions of above. Used for v2f64->v4f32.
 508       // SRC, PASSTHRU, MASK
 509       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
 510       MCVTSI2P, MCVTUI2P,
 511
 512       // Vector float to bfloat16.
 513       // Convert TWO packed single data to one packed BF16 data
 514       CVTNE2PS2BF16,
 515       // Convert packed single data to packed BF16 data
 516       CVTNEPS2BF16,
 517       // Masked version of above.
 518       // SRC, PASSTHRU, MASK
 519       MCVTNEPS2BF16,
 520
 521       // Dot product of BF16 pairs to accumulated into
 522       // packed single precision.
 523       DPBF16PS,
 524
 525       // Save xmm argument registers to the stack, according to %al. An operator
 526       // is needed so that this can be expanded with control flow.
 527       VASTART_SAVE_XMM_REGS,
 528
 529       // Windows's _chkstk call to do stack probing.
 530       WIN_ALLOCA,
 531
 532       // For allocating variable amounts of stack space when using
 533       // segmented stacks. Check if the current stacklet has enough space, and
 534       // falls back to heap allocation if not.
 535       SEG_ALLOCA,
 536
 537       // Memory barriers.
 538       MEMBARRIER,
 539       MFENCE,
 540
 541       // Store FP status word into i16 register.
 542       FNSTSW16r,
 543
 544       // Store contents of %ah into %eflags.
 545       SAHF,
 546
 547       // Get a random integer and indicate whether it is valid in CF.
 548       RDRAND,
 549
 550       // Get a NIST SP800-90B & C compliant random integer and
 551       // indicate whether it is valid in CF.
 552       RDSEED,
 553
 554       // Protection keys
 555       // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
 556       // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
 557       // value for ECX.
 558       RDPKRU, WRPKRU,
 559
 560       // SSE42 string comparisons.
 561       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
 562       // will emit one or two instructions based on which results are used. If
 563       // flags and index/mask this allows us to use a single instruction since
 564       // we won't have to pick and opcode for flags. Instead we can rely on the
 565       // DAG to CSE everything and decide at isel.
 566       PCMPISTR,
 567       PCMPESTR,
 568
 569       // Test if in transactional execution.
 570       XTEST,
 571
 572       // ERI instructions.
 573       RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
 574       RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
 575
 576       // Conversions between float and half-float.
 577       CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
 578
 579       // Masked version of above.
 580       // SRC, RND, PASSTHRU, MASK
 581       MCVTPS2PH,
 582
 583       // Galois Field Arithmetic Instructions
 584       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 585
 586       // LWP insert record.
 587       LWPINS,
 588
 589       // User level wait
 590       UMWAIT, TPAUSE,
 591
 592       // Enqueue Stores Instructions
 593       ENQCMD, ENQCMDS,
 594
 595       // For avx512-vp2intersect
 596       VP2INTERSECT,
 597
 598       // Compare and swap.
 599       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 600       LCMPXCHG8_DAG,
 601       LCMPXCHG16_DAG,
 602       LCMPXCHG8_SAVE_EBX_DAG,
 603       LCMPXCHG16_SAVE_RBX_DAG,
 604
 605       /// LOCK-prefixed arithmetic read-modify-write instructions.
 606       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 607       LADD, LSUB, LOR, LXOR, LAND,
 608
 609       // Load, scalar_to_vector, and zero extend.
 610       VZEXT_LOAD,
 611
 612       // extract_vector_elt, store.
 613       VEXTRACT_STORE,
 614
 615       // Store FP control world into i16 memory.
 616       FNSTCW16m,
 617
 618       /// This instruction implements FP_TO_SINT with the
 619       /// integer destination in memory and a FP reg source.  This corresponds
 620       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 621       /// has two inputs (token chain and address) and two outputs (int value
 622       /// and token chain). Memory VT specifies the type to store to.
 623       FP_TO_INT_IN_MEM,
 624
 625       /// This instruction implements SINT_TO_FP with the
 626       /// integer source in memory and FP reg result.  This corresponds to the
 627       /// X86::FILD*m instructions. It has two inputs (token chain and address)
 628       /// and two outputs (FP value and token chain). FILD_FLAG also produces a
 629       /// flag). The integer source type is specified by the memory VT.
 630       FILD,
 631       FILD_FLAG,
 632
 633       /// This instruction implements a fp->int store from FP stack
 634       /// slots. This corresponds to the fist instruction. It takes a
 635       /// chain operand, value to store, address, and glue. The memory VT
 636       /// specifies the type to store as.
 637       FIST,
 638
 639       /// This instruction implements an extending load to FP stack slots.
 640       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 641       /// operand, and ptr to load from. The memory VT specifies the type to
 642       /// load from.
 643       FLD,
 644
 645       /// This instruction implements a truncating store from FP stack
 646       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 647       /// chain operand, value to store, address, and glue. The memory VT
 648       /// specifies the type to store as.
 649       FST,
 650
 651       /// This instruction grabs the address of the next argument
 652       /// from a va_list. (reads and modifies the va_list in memory)
 653       VAARG_64,
 654
 655       // Vector truncating store with unsigned/signed saturation
 656       VTRUNCSTOREUS, VTRUNCSTORES,
 657       // Vector truncating masked store with unsigned/signed saturation
 658       VMTRUNCSTOREUS, VMTRUNCSTORES,
 659
 660       // X86 specific gather and scatter
 661       MGATHER, MSCATTER,
 662
 663       // WARNING: Do not add anything in the end unless you want the node to
 664       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 665       // opcodes will be thought as target memory ops!
 666     };
 667   } // end namespace X86ISD
 668
 669   /// Define some predicates that are used for node matching.
 670   namespace X86 {
 671     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 672     bool isZeroNode(SDValue Elt);
 673
 674     /// Returns true of the given offset can be
 675     /// fit into displacement field of the instruction.
 676     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 677                                       bool hasSymbolicDisplacement = true);
 678
 679     /// Determines whether the callee is required to pop its
 680     /// own arguments. Callee pop is necessary to support tail calls.
 681     bool isCalleePop(CallingConv::ID CallingConv,
 682                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 683
 684   } // end namespace X86
 685
 686   //===--------------------------------------------------------------------===//
 687   //  X86 Implementation of the TargetLowering interface
 688   class X86TargetLowering final : public TargetLowering {
 689   public:
 690     explicit X86TargetLowering(const X86TargetMachine &TM,
 691                                const X86Subtarget &STI);
 692
 693     unsigned getJumpTableEncoding() const override;
 694     bool useSoftFloat() const override;
 695
 696     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 697                                ArgListTy &Args) const override;
 698
 699     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 700       return MVT::i8;
 701     }
 702
 703     const MCExpr *
 704     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 705                               const MachineBasicBlock *MBB, unsigned uid,
 706                               MCContext &Ctx) const override;
 707
 708     /// Returns relocation base for the given PIC jumptable.
 709     SDValue getPICJumpTableRelocBase(SDValue Table,
 710                                      SelectionDAG &DAG) const override;
 711     const MCExpr *
 712     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 713                                  unsigned JTI, MCContext &Ctx) const override;
 714
 715     /// Return the desired alignment for ByVal aggregate
 716     /// function arguments in the caller parameter area. For X86, aggregates
 717     /// that contains are placed at 16-byte boundaries while the rest are at
 718     /// 4-byte boundaries.
 719     unsigned getByValTypeAlignment(Type *Ty,
 720                                    const DataLayout &DL) const override;
 721
 722     /// Returns the target specific optimal type for load
 723     /// and store operations as a result of memset, memcpy, and memmove
 724     /// lowering. If DstAlign is zero that means it's safe to destination
 725     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 726     /// means there isn't a need to check it against alignment requirement,
 727     /// probably because the source does not need to be loaded. If 'IsMemset' is
 728     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 729     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 730     /// source is constant so it does not need to be loaded.
 731     /// It returns EVT::Other if the type should be determined using generic
 732     /// target-independent logic.
 733     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 734                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 735                             const AttributeList &FuncAttributes) const override;
 736
 737     /// Returns true if it's safe to use load / store of the
 738     /// specified type to expand memcpy / memset inline. This is mostly true
 739     /// for all types except for some special cases. For example, on X86
 740     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 741     /// also does type conversion. Note the specified type doesn't have to be
 742     /// legal as the hook is used before type legalization.
 743     bool isSafeMemOpType(MVT VT) const override;
 744
 745     /// Returns true if the target allows unaligned memory accesses of the
 746     /// specified type. Returns whether it is "fast" in the last argument.
 747     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 748                                         MachineMemOperand::Flags Flags,
 749                                         bool *Fast) const override;
 750
 751     /// Provide custom lowering hooks for some operations.
 752     ///
 753     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 754
 755     /// Places new result values for the node in Results (their number
 756     /// and types must exactly match those of the original return values of
 757     /// the node), or leaves Results empty, which indicates that the node is not
 758     /// to be custom lowered after all.
 759     void LowerOperationWrapper(SDNode *N,
 760                                SmallVectorImpl<SDValue> &Results,
 761                                SelectionDAG &DAG) const override;
 762
 763     /// Replace the results of node with an illegal result
 764     /// type with new values built out of custom code.
 765     ///
 766     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 767                             SelectionDAG &DAG) const override;
 768
 769     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 770
 771     // Return true if it is profitable to combine a BUILD_VECTOR with a
 772     // stride-pattern to a shuffle and a truncate.
 773     // Example of such a combine:
 774     // v4i32 build_vector((extract_elt V, 1),
 775     //                    (extract_elt V, 3),
 776     //                    (extract_elt V, 5),
 777     //                    (extract_elt V, 7))
 778     //  -->
 779     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 780     // v4i64)
 781     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 782         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 783
 784     /// Return true if the target has native support for
 785     /// the specified value type and it is 'desirable' to use the type for the
 786     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 787     /// instruction encodings are longer and some i16 instructions are slow.
 788     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 789
 790     /// Return true if the target has native support for the
 791     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 792     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 793     /// and some i16 instructions are slow.
 794     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 795
 796     MachineBasicBlock *
 797     EmitInstrWithCustomInserter(MachineInstr &MI,
 798                                 MachineBasicBlock *MBB) const override;
 799
 800     /// This method returns the name of a target specific DAG node.
 801     const char *getTargetNodeName(unsigned Opcode) const override;
 802
 803     /// Do not merge vector stores after legalization because that may conflict
 804     /// with x86-specific store splitting optimizations.
 805     bool mergeStoresAfterLegalization(EVT MemVT) const override {
 806       return !MemVT.isVector();
 807     }
 808
 809     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 810                           const SelectionDAG &DAG) const override;
 811
 812     bool isCheapToSpeculateCttz() const override;
 813
 814     bool isCheapToSpeculateCtlz() const override;
 815
 816     bool isCtlzFast() const override;
 817
 818     bool hasBitPreservingFPLogic(EVT VT) const override {
 819       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 820     }
 821
 822     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 823       // If the pair to store is a mixture of float and int values, we will
 824       // save two bitwise instructions and one float-to-int instruction and
 825       // increase one store instruction. There is potentially a more
 826       // significant benefit because it avoids the float->int domain switch
 827       // for input value. So It is more likely a win.
 828       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 829           (LTy.isInteger() && HTy.isFloatingPoint()))
 830         return true;
 831       // If the pair only contains int values, we will save two bitwise
 832       // instructions and increase one store instruction (costing one more
 833       // store buffer). Since the benefit is more blurred so we leave
 834       // such pair out until we get testcase to prove it is a win.
 835       return false;
 836     }
 837
 838     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 839
 840     bool hasAndNotCompare(SDValue Y) const override;
 841
 842     bool hasAndNot(SDValue Y) const override;
 843
 844     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
 845                                            CombineLevel Level) const override;
 846
 847     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
 848
 849     bool
 850     shouldTransformSignedTruncationCheck(EVT XVT,
 851                                          unsigned KeptBits) const override {
 852       // For vectors, we don't have a preference..
 853       if (XVT.isVector())
 854         return false;
 855
 856       auto VTIsOk = [](EVT VT) -> bool {
 857         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 858                VT == MVT::i64;
 859       };
 860
 861       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
 862       // XVT will be larger than KeptBitsVT.
 863       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
 864       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
 865     }
 866
 867     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
 868
 869     bool shouldSplatInsEltVarIndex(EVT VT) const override;
 870
 871     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 872       return VT.isScalarInteger();
 873     }
 874
 875     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 876     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 877
 878     /// Return the value type to use for ISD::SETCC.
 879     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 880                            EVT VT) const override;
 881
 882     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 883                                       TargetLoweringOpt &TLO) const override;
 884
 885     /// Determine which of the bits specified in Mask are known to be either
 886     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 887     void computeKnownBitsForTargetNode(const SDValue Op,
 888                                        KnownBits &Known,
 889                                        const APInt &DemandedElts,
 890                                        const SelectionDAG &DAG,
 891                                        unsigned Depth = 0) const override;
 892
 893     /// Determine the number of bits in the operation that are sign bits.
 894     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 895                                              const APInt &DemandedElts,
 896                                              const SelectionDAG &DAG,
 897                                              unsigned Depth) const override;
 898
 899     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
 900                                                  const APInt &DemandedElts,
 901                                                  APInt &KnownUndef,
 902                                                  APInt &KnownZero,
 903                                                  TargetLoweringOpt &TLO,
 904                                                  unsigned Depth) const override;
 905
 906     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
 907                                            const APInt &DemandedBits,
 908                                            const APInt &DemandedElts,
 909                                            KnownBits &Known,
 910                                            TargetLoweringOpt &TLO,
 911                                            unsigned Depth) const override;
 912
 913     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
 914
 915     SDValue unwrapAddress(SDValue N) const override;
 916
 917     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 918
 919     bool ExpandInlineAsm(CallInst *CI) const override;
 920
 921     ConstraintType getConstraintType(StringRef Constraint) const override;
 922
 923     /// Examine constraint string and operand type and determine a weight value.
 924     /// The operand object must already have been set up with the operand type.
 925     ConstraintWeight
 926       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 927                                      const char *constraint) const override;
 928
 929     const char *LowerXConstraint(EVT ConstraintVT) const override;
 930
 931     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 932     /// add anything to Ops. If hasMemory is true it means one of the asm
 933     /// constraint of the inline asm instruction being processed is 'm'.
 934     void LowerAsmOperandForConstraint(SDValue Op,
 935                                       std::string &Constraint,
 936                                       std::vector<SDValue> &Ops,
 937                                       SelectionDAG &DAG) const override;
 938
 939     unsigned
 940     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 941       if (ConstraintCode == "i")
 942         return InlineAsm::Constraint_i;
 943       else if (ConstraintCode == "o")
 944         return InlineAsm::Constraint_o;
 945       else if (ConstraintCode == "v")
 946         return InlineAsm::Constraint_v;
 947       else if (ConstraintCode == "X")
 948         return InlineAsm::Constraint_X;
 949       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 950     }
 951
 952     /// Handle Lowering flag assembly outputs.
 953     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
 954                                         const AsmOperandInfo &Constraint,
 955                                         SelectionDAG &DAG) const override;
 956
 957     /// Given a physical register constraint
 958     /// (e.g. {edx}), return the register number and the register class for the
 959     /// register.  This should only be used for C_Register constraints.  On
 960     /// error, this returns a register number of 0.
 961     std::pair<unsigned, const TargetRegisterClass *>
 962     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 963                                  StringRef Constraint, MVT VT) const override;
 964
 965     /// Return true if the addressing mode represented
 966     /// by AM is legal for this target, for a load/store of the specified type.
 967     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 968                                Type *Ty, unsigned AS,
 969                                Instruction *I = nullptr) const override;
 970
 971     /// Return true if the specified immediate is legal
 972     /// icmp immediate, that is the target has icmp instructions which can
 973     /// compare a register against the immediate without having to materialize
 974     /// the immediate into a register.
 975     bool isLegalICmpImmediate(int64_t Imm) const override;
 976
 977     /// Return true if the specified immediate is legal
 978     /// add immediate, that is the target has add instructions which can
 979     /// add a register and the immediate without having to materialize
 980     /// the immediate into a register.
 981     bool isLegalAddImmediate(int64_t Imm) const override;
 982
 983     bool isLegalStoreImmediate(int64_t Imm) const override;
 984
 985     /// Return the cost of the scaling factor used in the addressing
 986     /// mode represented by AM for this target, for a load/store
 987     /// of the specified type.
 988     /// If the AM is supported, the return value must be >= 0.
 989     /// If the AM is not supported, it returns a negative value.
 990     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
 991                              unsigned AS) const override;
 992
 993     bool isVectorShiftByScalarCheap(Type *Ty) const override;
 994
 995     /// Add x86-specific opcodes to the default list.
 996     bool isBinOp(unsigned Opcode) const override;
 997
 998     /// Returns true if the opcode is a commutative binary operation.
 999     bool isCommutativeBinOp(unsigned Opcode) const override;
1000
1001     /// Return true if it's free to truncate a value of
1002     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1003     /// register EAX to i16 by referencing its sub-register AX.
1004     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1005     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1006
1007     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1008
1009     /// Return true if any actual instruction that defines a
1010     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1011     /// register. This does not necessarily include registers defined in
1012     /// unknown ways, such as incoming arguments, or copies from unknown
1013     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1014     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1015     /// all instructions that define 32-bit values implicit zero-extend the
1016     /// result out to 64 bits.
1017     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1018     bool isZExtFree(EVT VT1, EVT VT2) const override;
1019     bool isZExtFree(SDValue Val, EVT VT2) const override;
1020
1021     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1022     /// extend node) is profitable.
1023     bool isVectorLoadExtDesirable(SDValue) const override;
1024
1025     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1026     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1027     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1028     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
1029
1030     /// Return true if it's profitable to narrow
1031     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1032     /// from i32 to i8 but not from i32 to i16.
1033     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1034
1035     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1036     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1037     /// true and stores the intrinsic information into the IntrinsicInfo that was
1038     /// passed to the function.
1039     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1040                             MachineFunction &MF,
1041                             unsigned Intrinsic) const override;
1042
1043     /// Returns true if the target can instruction select the
1044     /// specified FP immediate natively. If false, the legalizer will
1045     /// materialize the FP immediate as a load from a constant pool.
1046     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1047                       bool ForCodeSize) const override;
1048
1049     /// Targets can use this to indicate that they only support *some*
1050     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1051     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1052     /// be legal.
1053     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1054
1055     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1056     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1057     /// constant pool entry.
1058     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1059
1060     /// Returns true if lowering to a jump table is allowed.
1061     bool areJTsAllowed(const Function *Fn) const override;
1062
1063     /// If true, then instruction selection should
1064     /// seek to shrink the FP constant of the specified type to a smaller type
1065     /// in order to save space and / or reduce runtime.
1066     bool ShouldShrinkFPConstant(EVT VT) const override {
1067       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1068       // expensive than a straight movsd. On the other hand, it's important to
1069       // shrink long double fp constant since fldt is very slow.
1070       return !X86ScalarSSEf64 || VT == MVT::f80;
1071     }
1072
1073     /// Return true if we believe it is correct and profitable to reduce the
1074     /// load node to a smaller type.
1075     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1076                                EVT NewVT) const override;
1077
1078     /// Return true if the specified scalar FP type is computed in an SSE
1079     /// register, not on the X87 floating point stack.
1080     bool isScalarFPTypeInSSEReg(EVT VT) const {
1081       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1082              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1083     }
1084
1085     /// Returns true if it is beneficial to convert a load of a constant
1086     /// to just the constant itself.
1087     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1088                                            Type *Ty) const override;
1089
1090     bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
1091
1092     bool convertSelectOfConstantsToMath(EVT VT) const override;
1093
1094     bool decomposeMulByConstant(EVT VT, SDValue C) const override;
1095
1096     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1097                                   bool IsSigned) const override;
1098
1099     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1100     /// with this index.
1101     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1102                                  unsigned Index) const override;
1103
1104     /// Scalar ops always have equal or better analysis/performance/power than
1105     /// the vector equivalent, so this always makes sense if the scalar op is
1106     /// supported.
1107     bool shouldScalarizeBinop(SDValue) const override;
1108
1109     /// Extract of a scalar FP value from index 0 of a vector is free.
1110     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1111       EVT EltVT = VT.getScalarType();
1112       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1113     }
1114
1115     /// Overflow nodes should get combined/lowered to optimal instructions
1116     /// (they should allow eliminating explicit compares by getting flags from
1117     /// math ops).
1118     bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1119
1120     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1121                                       unsigned AddrSpace) const override {
1122       // If we can replace more than 2 scalar stores, there will be a reduction
1123       // in instructions even after we add a vector constant load.
1124       return NumElem > 2;
1125     }
1126
1127     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1128                                  const SelectionDAG &DAG,
1129                                  const MachineMemOperand &MMO) const override;
1130
1131     /// Intel processors have a unified instruction and data cache
1132     const char * getClearCacheBuiltinName() const override {
1133       return nullptr; // nothing to do, move along.
1134     }
1135
1136     unsigned getRegisterByName(const char* RegName, EVT VT,
1137                                SelectionDAG &DAG) const override;
1138
1139     /// If a physical register, this returns the register that receives the
1140     /// exception address on entry to an EH pad.
1141     unsigned
1142     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1143
1144     /// If a physical register, this returns the register that receives the
1145     /// exception typeid on entry to a landing pad.
1146     unsigned
1147     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1148
1149     virtual bool needsFixedCatchObjects() const override;
1150
1151     /// This method returns a target specific FastISel object,
1152     /// or null if the target does not support "fast" ISel.
1153     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1154                              const TargetLibraryInfo *libInfo) const override;
1155
1156     /// If the target has a standard location for the stack protector cookie,
1157     /// returns the address of that location. Otherwise, returns nullptr.
1158     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1159
1160     bool useLoadStackGuardNode() const override;
1161     bool useStackGuardXorFP() const override;
1162     void insertSSPDeclarations(Module &M) const override;
1163     Value *getSDagStackGuard(const Module &M) const override;
1164     Function *getSSPStackGuardCheck(const Module &M) const override;
1165     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1166                                 const SDLoc &DL) const override;
1167
1168
1169     /// Return true if the target stores SafeStack pointer at a fixed offset in
1170     /// some non-standard address space, and populates the address space and
1171     /// offset as appropriate.
1172     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1173
1174     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1175                       SelectionDAG &DAG) const;
1176
1177     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1178
1179     /// Customize the preferred legalization strategy for certain types.
1180     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1181
1182     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1183                                       EVT VT) const override;
1184
1185     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1186                                            CallingConv::ID CC,
1187                                            EVT VT) const override;
1188
1189     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1190
1191     bool supportSwiftError() const override;
1192
1193     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1194
1195     bool hasVectorBlend() const override { return true; }
1196
1197     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1198
1199     /// Lower interleaved load(s) into target specific
1200     /// instructions/intrinsics.
1201     bool lowerInterleavedLoad(LoadInst *LI,
1202                               ArrayRef<ShuffleVectorInst *> Shuffles,
1203                               ArrayRef<unsigned> Indices,
1204                               unsigned Factor) const override;
1205
1206     /// Lower interleaved store(s) into target specific
1207     /// instructions/intrinsics.
1208     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1209                                unsigned Factor) const override;
1210
1211     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1212                                    SDValue Addr, SelectionDAG &DAG)
1213                                    const override;
1214
1215   protected:
1216     std::pair<const TargetRegisterClass *, uint8_t>
1217     findRepresentativeClass(const TargetRegisterInfo *TRI,
1218                             MVT VT) const override;
1219
1220   private:
1221     /// Keep a reference to the X86Subtarget around so that we can
1222     /// make the right decision when generating code for different targets.
1223     const X86Subtarget &Subtarget;
1224
1225     /// Select between SSE or x87 floating point ops.
1226     /// When SSE is available, use it for f32 operations.
1227     /// When SSE2 is available, use it for f64 operations.
1228     bool X86ScalarSSEf32;
1229     bool X86ScalarSSEf64;
1230
1231     /// A list of legal FP immediates.
1232     std::vector<APFloat> LegalFPImmediates;
1233
1234     /// Indicate that this x86 target can instruction
1235     /// select the specified FP immediate natively.
1236     void addLegalFPImmediate(const APFloat& Imm) {
1237       LegalFPImmediates.push_back(Imm);
1238     }
1239
1240     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1241                             CallingConv::ID CallConv, bool isVarArg,
1242                             const SmallVectorImpl<ISD::InputArg> &Ins,
1243                             const SDLoc &dl, SelectionDAG &DAG,
1244                             SmallVectorImpl<SDValue> &InVals,
1245                             uint32_t *RegMask) const;
1246     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1247                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1248                              const SDLoc &dl, SelectionDAG &DAG,
1249                              const CCValAssign &VA, MachineFrameInfo &MFI,
1250                              unsigned i) const;
1251     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1252                              const SDLoc &dl, SelectionDAG &DAG,
1253                              const CCValAssign &VA,
1254                              ISD::ArgFlagsTy Flags) const;
1255
1256     // Call lowering helpers.
1257
1258     /// Check whether the call is eligible for tail call optimization. Targets
1259     /// that want to do tail call optimization should implement this function.
1260     bool IsEligibleForTailCallOptimization(SDValue Callee,
1261                                            CallingConv::ID CalleeCC,
1262                                            bool isVarArg,
1263                                            bool isCalleeStructRet,
1264                                            bool isCallerStructRet,
1265                                            Type *RetTy,
1266                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1267                                     const SmallVectorImpl<SDValue> &OutVals,
1268                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1269                                            SelectionDAG& DAG) const;
1270     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1271                                     SDValue Chain, bool IsTailCall,
1272                                     bool Is64Bit, int FPDiff,
1273                                     const SDLoc &dl) const;
1274
1275     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1276                                          SelectionDAG &DAG) const;
1277
1278     unsigned getAddressSpace(void) const;
1279
1280     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1281
1282     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1283     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1284     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1285     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1286
1287     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1288                                   const unsigned char OpFlags = 0) const;
1289     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1290     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1291     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1292     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1293     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1294
1295     /// Creates target global address or external symbol nodes for calls or
1296     /// other uses.
1297     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1298                                   bool ForCall) const;
1299
1300     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1301     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1302     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1303     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1304     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1305     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1306     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1307     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1308     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1309     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1310     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1311     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1312     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1313     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1314     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1315     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1316     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1317     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1318     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1319     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1320     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1321     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1322     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1323     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1324     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1325     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1326
1327     SDValue
1328     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1329                          const SmallVectorImpl<ISD::InputArg> &Ins,
1330                          const SDLoc &dl, SelectionDAG &DAG,
1331                          SmallVectorImpl<SDValue> &InVals) const override;
1332     SDValue LowerCall(CallLoweringInfo &CLI,
1333                       SmallVectorImpl<SDValue> &InVals) const override;
1334
1335     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1336                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1337                         const SmallVectorImpl<SDValue> &OutVals,
1338                         const SDLoc &dl, SelectionDAG &DAG) const override;
1339
1340     bool supportSplitCSR(MachineFunction *MF) const override {
1341       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1342           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1343     }
1344     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1345     void insertCopiesSplitCSR(
1346       MachineBasicBlock *Entry,
1347       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1348
1349     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1350
1351     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1352
1353     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1354                             ISD::NodeType ExtendKind) const override;
1355
1356     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1357                         bool isVarArg,
1358                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1359                         LLVMContext &Context) const override;
1360
1361     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1362
1363     TargetLoweringBase::AtomicExpansionKind
1364     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1365     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1366     TargetLoweringBase::AtomicExpansionKind
1367     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1368
1369     LoadInst *
1370     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1371
1372     bool needsCmpXchgNb(Type *MemType) const;
1373
1374     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1375                                 MachineBasicBlock *DispatchBB, int FI) const;
1376
1377     // Utility function to emit the low-level va_arg code for X86-64.
1378     MachineBasicBlock *
1379     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1380                                   MachineBasicBlock *MBB) const;
1381
1382     /// Utility function to emit the xmm reg save portion of va_start.
1383     MachineBasicBlock *
1384     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1385                                              MachineBasicBlock *BB) const;
1386
1387     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1388                                                  MachineInstr &MI2,
1389                                                  MachineBasicBlock *BB) const;
1390
1391     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1392                                          MachineBasicBlock *BB) const;
1393
1394     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1395                                            MachineBasicBlock *BB) const;
1396
1397     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1398                                            MachineBasicBlock *BB) const;
1399
1400     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1401                                            MachineBasicBlock *BB) const;
1402
1403     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1404                                             MachineBasicBlock *BB) const;
1405
1406     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1407                                           MachineBasicBlock *BB) const;
1408
1409     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1410                                           MachineBasicBlock *BB) const;
1411
1412     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1413                                             MachineBasicBlock *BB) const;
1414
1415     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1416                                         MachineBasicBlock *MBB) const;
1417
1418     void emitSetJmpShadowStackFix(MachineInstr &MI,
1419                                   MachineBasicBlock *MBB) const;
1420
1421     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1422                                          MachineBasicBlock *MBB) const;
1423
1424     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1425                                                  MachineBasicBlock *MBB) const;
1426
1427     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1428                                      MachineBasicBlock *MBB) const;
1429
1430     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1431                                              MachineBasicBlock *MBB) const;
1432
1433     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1434     /// equivalent, for use with the given x86 condition code.
1435     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1436                     SelectionDAG &DAG) const;
1437
1438     /// Convert a comparison if required by the subtarget.
1439     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1440
1441     /// Emit flags for the given setcc condition and operands. Also returns the
1442     /// corresponding X86 condition code constant in X86CC.
1443     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1444                               ISD::CondCode CC, const SDLoc &dl,
1445                               SelectionDAG &DAG,
1446                               SDValue &X86CC) const;
1447
1448     /// Check if replacement of SQRT with RSQRT should be disabled.
1449     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1450
1451     /// Use rsqrt* to speed up sqrt calculations.
1452     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1453                             int &RefinementSteps, bool &UseOneConstNR,
1454                             bool Reciprocal) const override;
1455
1456     /// Use rcp* to speed up fdiv calculations.
1457     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1458                              int &RefinementSteps) const override;
1459
1460     /// Reassociate floating point divisions into multiply by reciprocal.
1461     unsigned combineRepeatedFPDivisors() const override;
1462   };
1463
1464   namespace X86 {
1465     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1466                              const TargetLibraryInfo *libInfo);
1467   } // end namespace X86
1468
1469   // Base class for all X86 non-masked store operations.
1470   class X86StoreSDNode : public MemSDNode {
1471   public:
1472     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1473                    SDVTList VTs, EVT MemVT,
1474                    MachineMemOperand *MMO)
1475       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1476     const SDValue &getValue() const { return getOperand(1); }
1477     const SDValue &getBasePtr() const { return getOperand(2); }
1478
1479     static bool classof(const SDNode *N) {
1480       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1481         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1482     }
1483   };
1484
1485   // Base class for all X86 masked store operations.
1486   // The class has the same order of operands as MaskedStoreSDNode for
1487   // convenience.
1488   class X86MaskedStoreSDNode : public MemSDNode {
1489   public:
1490     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1491                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1492                          MachineMemOperand *MMO)
1493       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1494
1495     const SDValue &getValue()   const { return getOperand(1); }
1496     const SDValue &getBasePtr() const { return getOperand(2); }
1497     const SDValue &getMask()    const { return getOperand(3); }
1498
1499     static bool classof(const SDNode *N) {
1500       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1501         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1502     }
1503   };
1504
1505   // X86 Truncating Store with Signed saturation.
1506   class TruncSStoreSDNode : public X86StoreSDNode {
1507   public:
1508     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1509                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1510       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1511
1512     static bool classof(const SDNode *N) {
1513       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1514     }
1515   };
1516
1517   // X86 Truncating Store with Unsigned saturation.
1518   class TruncUSStoreSDNode : public X86StoreSDNode {
1519   public:
1520     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1521                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1522       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1523
1524     static bool classof(const SDNode *N) {
1525       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1526     }
1527   };
1528
1529   // X86 Truncating Masked Store with Signed saturation.
1530   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1531   public:
1532     MaskedTruncSStoreSDNode(unsigned Order,
1533                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1534                          MachineMemOperand *MMO)
1535       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1536
1537     static bool classof(const SDNode *N) {
1538       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1539     }
1540   };
1541
1542   // X86 Truncating Masked Store with Unsigned saturation.
1543   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1544   public:
1545     MaskedTruncUSStoreSDNode(unsigned Order,
1546                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1547                             MachineMemOperand *MMO)
1548       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1549
1550     static bool classof(const SDNode *N) {
1551       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1552     }
1553   };
1554
1555   // X86 specific Gather/Scatter nodes.
1556   // The class has the same order of operands as MaskedGatherScatterSDNode for
1557   // convenience.
1558   class X86MaskedGatherScatterSDNode : public MemSDNode {
1559   public:
1560     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1561                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1562                                  MachineMemOperand *MMO)
1563         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1564
1565     const SDValue &getBasePtr() const { return getOperand(3); }
1566     const SDValue &getIndex()   const { return getOperand(4); }
1567     const SDValue &getMask()    const { return getOperand(2); }
1568     const SDValue &getScale()   const { return getOperand(5); }
1569
1570     static bool classof(const SDNode *N) {
1571       return N->getOpcode() == X86ISD::MGATHER ||
1572              N->getOpcode() == X86ISD::MSCATTER;
1573     }
1574   };
1575
1576   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1577   public:
1578     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1579                           EVT MemVT, MachineMemOperand *MMO)
1580         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1581                                        MMO) {}
1582
1583     const SDValue &getPassThru() const { return getOperand(1); }
1584
1585     static bool classof(const SDNode *N) {
1586       return N->getOpcode() == X86ISD::MGATHER;
1587     }
1588   };
1589
1590   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1591   public:
1592     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1593                            EVT MemVT, MachineMemOperand *MMO)
1594         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1595                                        MMO) {}
1596
1597     const SDValue &getValue() const { return getOperand(1); }
1598
1599     static bool classof(const SDNode *N) {
1600       return N->getOpcode() == X86ISD::MSCATTER;
1601     }
1602   };
1603
1604   /// Generate unpacklo/unpackhi shuffle mask.
1605   template <typename T = int>
1606   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1607                                bool Unary) {
1608     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1609     int NumElts = VT.getVectorNumElements();
1610     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1611     for (int i = 0; i < NumElts; ++i) {
1612       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1613       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1614       Pos += (Unary ? 0 : NumElts * (i % 2));
1615       Pos += (Lo ? 0 : NumEltsInLane / 2);
1616       Mask.push_back(Pos);
1617     }
1618   }
1619
1620   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1621   /// mask index with the scaled sequential indices for an equivalent narrowed
1622   /// mask. This is the reverse process to canWidenShuffleElements, but can
1623   /// always succeed.
1624   template <typename T>
1625   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1626                         SmallVectorImpl<T> &ScaledMask) {
1627     assert(0 < Scale && "Unexpected scaling factor");
1628     size_t NumElts = Mask.size();
1629     ScaledMask.assign(NumElts * Scale, -1);
1630
1631     for (int i = 0; i != (int)NumElts; ++i) {
1632       int M = Mask[i];
1633
1634       // Repeat sentinel values in every mask element.
1635       if (M < 0) {
1636         for (int s = 0; s != Scale; ++s)
1637           ScaledMask[(Scale * i) + s] = M;
1638         continue;
1639       }
1640
1641       // Scale mask element and increment across each mask element.
1642       for (int s = 0; s != Scale; ++s)
1643         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1644     }
1645   }
1646 } // end namespace llvm
1647
1648 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H