1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/Analysis/VectorUtils.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31 #include "llvm/CodeGen/ValueTypes.h"
32 #include "llvm/IR/DiagnosticInfo.h"
33 #include "llvm/IR/DiagnosticPrinter.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicsRISCV.h"
37 #include "llvm/IR/PatternMatch.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/KnownBits.h"
42 #include "llvm/Support/MathExtras.h"
43 #include "llvm/Support/raw_ostream.h"
48 #define DEBUG_TYPE "riscv-lower"
50 STATISTIC(NumTailCalls, "Number of tail calls");
52 static cl::opt<unsigned> ExtensionMaxWebSize(
53 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
54 cl::desc("Give the maximum size (in number of nodes) of the web of "
55 "instructions that we will consider for VW expansion"),
59 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
60 cl::desc("Allow the formation of VW_W operations (e.g., "
61 "VWADD_W) with splat constants"),
64 static cl::opt<unsigned> NumRepeatedDivisors(
65 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
66 cl::desc("Set the minimum number of repetitions of a divisor to allow "
67 "transformation to multiplications by the reciprocal"),
71 FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
72 cl::desc("Give the maximum number of instructions that we will "
73 "use for creating a floating-point immediate value"),
76 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
77 const RISCVSubtarget &STI)
78 : TargetLowering(TM), Subtarget(STI) {
80 if (Subtarget.isRVE())
81 report_fatal_error("Codegen not yet implemented for RVE");
83 RISCVABI::ABI ABI = Subtarget.getTargetABI();
84 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
86 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
87 !Subtarget.hasStdExtF()) {
88 errs() << "Hard-float 'f' ABI can't be used for a target that "
89 "doesn't support the F instruction set extension (ignoring "
91 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
92 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
93 !Subtarget.hasStdExtD()) {
94 errs() << "Hard-float 'd' ABI can't be used for a target that "
95 "doesn't support the D instruction set extension (ignoring "
97 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 report_fatal_error("Don't know how to lower this ABI");
103 case RISCVABI::ABI_ILP32:
104 case RISCVABI::ABI_ILP32F:
105 case RISCVABI::ABI_ILP32D:
106 case RISCVABI::ABI_LP64:
107 case RISCVABI::ABI_LP64F:
108 case RISCVABI::ABI_LP64D:
112 MVT XLenVT = Subtarget.getXLenVT();
114 // Set up the register classes.
115 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
117 if (Subtarget.hasStdExtZfhOrZfhmin())
118 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
119 if (Subtarget.hasStdExtZfbfmin())
120 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
121 if (Subtarget.hasStdExtF())
122 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
123 if (Subtarget.hasStdExtD())
124 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
125 if (Subtarget.hasStdExtZhinxOrZhinxmin())
126 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
127 if (Subtarget.hasStdExtZfinx())
128 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
129 if (Subtarget.hasStdExtZdinx()) {
130 if (Subtarget.is64Bit())
131 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
133 addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass);
136 static const MVT::SimpleValueType BoolVecVTs[] = {
137 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
138 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
139 static const MVT::SimpleValueType IntVecVTs[] = {
140 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
141 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
142 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
143 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
144 MVT::nxv4i64, MVT::nxv8i64};
145 static const MVT::SimpleValueType F16VecVTs[] = {
146 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
147 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
148 static const MVT::SimpleValueType F32VecVTs[] = {
149 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
150 static const MVT::SimpleValueType F64VecVTs[] = {
151 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
153 if (Subtarget.hasVInstructions()) {
154 auto addRegClassForRVV = [this](MVT VT) {
155 // Disable the smallest fractional LMUL types if ELEN is less than
157 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
158 if (VT.getVectorMinNumElements() < MinElts)
161 unsigned Size = VT.getSizeInBits().getKnownMinValue();
162 const TargetRegisterClass *RC;
163 if (Size <= RISCV::RVVBitsPerBlock)
164 RC = &RISCV::VRRegClass;
165 else if (Size == 2 * RISCV::RVVBitsPerBlock)
166 RC = &RISCV::VRM2RegClass;
167 else if (Size == 4 * RISCV::RVVBitsPerBlock)
168 RC = &RISCV::VRM4RegClass;
169 else if (Size == 8 * RISCV::RVVBitsPerBlock)
170 RC = &RISCV::VRM8RegClass;
172 llvm_unreachable("Unexpected size");
174 addRegisterClass(VT, RC);
177 for (MVT VT : BoolVecVTs)
178 addRegClassForRVV(VT);
179 for (MVT VT : IntVecVTs) {
180 if (VT.getVectorElementType() == MVT::i64 &&
181 !Subtarget.hasVInstructionsI64())
183 addRegClassForRVV(VT);
186 if (Subtarget.hasVInstructionsF16())
187 for (MVT VT : F16VecVTs)
188 addRegClassForRVV(VT);
190 if (Subtarget.hasVInstructionsF32())
191 for (MVT VT : F32VecVTs)
192 addRegClassForRVV(VT);
194 if (Subtarget.hasVInstructionsF64())
195 for (MVT VT : F64VecVTs)
196 addRegClassForRVV(VT);
198 if (Subtarget.useRVVForFixedLengthVectors()) {
199 auto addRegClassForFixedVectors = [this](MVT VT) {
200 MVT ContainerVT = getContainerForFixedLengthVector(VT);
201 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
202 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
203 addRegisterClass(VT, TRI.getRegClass(RCID));
205 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
206 if (useRVVForFixedLengthVectorVT(VT))
207 addRegClassForFixedVectors(VT);
209 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
210 if (useRVVForFixedLengthVectorVT(VT))
211 addRegClassForFixedVectors(VT);
215 // Compute derived properties from the register classes.
216 computeRegisterProperties(STI.getRegisterInfo());
218 setStackPointerRegisterToSaveRestore(RISCV::X2);
220 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
222 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
223 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,
226 // TODO: add all necessary setOperationAction calls.
227 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
229 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
230 setOperationAction(ISD::BR_CC, XLenVT, Expand);
231 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
232 setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
234 setCondCodeAction(ISD::SETLE, XLenVT, Expand);
235 setCondCodeAction(ISD::SETGT, XLenVT, Custom);
236 setCondCodeAction(ISD::SETGE, XLenVT, Expand);
237 setCondCodeAction(ISD::SETULE, XLenVT, Expand);
238 setCondCodeAction(ISD::SETUGT, XLenVT, Custom);
239 setCondCodeAction(ISD::SETUGE, XLenVT, Expand);
241 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
243 setOperationAction(ISD::VASTART, MVT::Other, Custom);
244 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
246 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
248 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
250 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
251 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
253 if (Subtarget.is64Bit()) {
254 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
256 setOperationAction(ISD::LOAD, MVT::i32, Custom);
258 setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
261 setOperationAction(ISD::SADDO, MVT::i32, Custom);
262 setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
266 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
268 setLibcallName(RTLIB::MULO_I64, nullptr);
271 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul())
272 setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
273 else if (Subtarget.is64Bit())
274 setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
276 setOperationAction(ISD::MUL, MVT::i64, Custom);
278 if (!Subtarget.hasStdExtM())
279 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
281 else if (Subtarget.is64Bit())
282 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
283 {MVT::i8, MVT::i16, MVT::i32}, Custom);
286 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
289 setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
292 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
293 if (Subtarget.is64Bit())
294 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
295 } else if (Subtarget.hasVendorXTHeadBb()) {
296 if (Subtarget.is64Bit())
297 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
298 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
300 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
303 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
304 // pattern match it directly in isel.
305 setOperationAction(ISD::BSWAP, XLenVT,
306 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
307 Subtarget.hasVendorXTHeadBb())
310 // Zbkb can use rev8+brev8 to implement bitreverse.
311 setOperationAction(ISD::BITREVERSE, XLenVT,
312 Subtarget.hasStdExtZbkb() ? Custom : Expand);
314 if (Subtarget.hasStdExtZbb()) {
315 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
318 if (Subtarget.is64Bit())
320 {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
323 setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand);
326 if (Subtarget.hasVendorXTHeadBb()) {
327 setOperationAction(ISD::CTLZ, XLenVT, Legal);
329 // We need the custom lowering to make sure that the resulting sequence
330 // for the 32bit case is efficient on 64bit targets.
331 if (Subtarget.is64Bit())
332 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
335 if (Subtarget.is64Bit())
336 setOperationAction(ISD::ABS, MVT::i32, Custom);
338 if (!Subtarget.hasVendorXTHeadCondMov())
339 setOperationAction(ISD::SELECT, XLenVT, Custom);
341 static const unsigned FPLegalNodeTypes[] = {
342 ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
343 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
344 ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
345 ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD,
346 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
347 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
349 static const ISD::CondCode FPCCToExpand[] = {
350 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
351 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
352 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
354 static const unsigned FPOpToExpand[] = {
355 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
358 static const unsigned FPRndMode[] = {
359 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
362 if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
363 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
365 if (Subtarget.hasStdExtZfbfmin()) {
366 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
367 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
368 setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);
369 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
370 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
371 setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
374 if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
375 if (Subtarget.hasStdExtZfhOrZhinx()) {
376 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
377 setOperationAction(FPRndMode, MVT::f16,
378 Subtarget.hasStdExtZfa() ? Legal : Custom);
379 setOperationAction(ISD::SELECT, MVT::f16, Custom);
380 setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
382 static const unsigned ZfhminPromoteOps[] = {
383 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
384 ISD::FSUB, ISD::FMUL, ISD::FMA,
385 ISD::FDIV, ISD::FSQRT, ISD::FABS,
386 ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD,
387 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
388 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
389 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
390 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
391 ISD::FROUNDEVEN, ISD::SELECT};
393 setOperationAction(ZfhminPromoteOps, MVT::f16, Promote);
394 setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
395 ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
397 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
398 // DAGCombiner::visitFP_ROUND probably needs improvements first.
399 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
402 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
403 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
404 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
405 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
406 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
408 setOperationAction(ISD::FNEARBYINT, MVT::f16,
409 Subtarget.hasStdExtZfa() ? Legal : Promote);
410 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
411 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
412 ISD::FEXP2, ISD::FLOG, ISD::FLOG2, ISD::FLOG10},
415 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
416 // complete support for all operations in LegalizeDAG.
417 setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
418 ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
419 ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,
423 // We need to custom promote this.
424 if (Subtarget.is64Bit())
425 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
427 if (!Subtarget.hasStdExtZfa())
428 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
431 if (Subtarget.hasStdExtFOrZfinx()) {
432 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
433 setOperationAction(FPRndMode, MVT::f32,
434 Subtarget.hasStdExtZfa() ? Legal : Custom);
435 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
436 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
437 setOperationAction(ISD::SELECT, MVT::f32, Custom);
438 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
439 setOperationAction(FPOpToExpand, MVT::f32, Expand);
440 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
441 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
442 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
443 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
444 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
445 Subtarget.isSoftFPABI() ? LibCall : Custom);
446 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
447 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
449 if (Subtarget.hasStdExtZfa())
450 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
452 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
455 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
456 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
458 if (Subtarget.hasStdExtDOrZdinx()) {
459 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
461 if (Subtarget.hasStdExtZfa()) {
462 setOperationAction(FPRndMode, MVT::f64, Legal);
463 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
464 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
465 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
467 if (Subtarget.is64Bit())
468 setOperationAction(FPRndMode, MVT::f64, Custom);
470 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
473 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
474 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
475 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
476 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
477 setOperationAction(ISD::SELECT, MVT::f64, Custom);
478 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
479 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
480 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
481 setOperationAction(FPOpToExpand, MVT::f64, Expand);
482 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
483 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
484 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
485 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
486 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
487 Subtarget.isSoftFPABI() ? LibCall : Custom);
488 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
489 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
492 if (Subtarget.is64Bit()) {
493 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
494 ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
496 setOperationAction(ISD::LROUND, MVT::i32, Custom);
499 if (Subtarget.hasStdExtFOrZfinx()) {
500 setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
503 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
504 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
507 setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);
508 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
511 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
515 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
517 if (Subtarget.is64Bit())
518 setOperationAction(ISD::Constant, MVT::i64, Custom);
520 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
521 // Unfortunately this can't be determined just from the ISA naming string.
522 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
523 Subtarget.is64Bit() ? Legal : Custom);
525 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
526 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
527 if (Subtarget.is64Bit())
528 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
530 if (Subtarget.hasStdExtZicbop()) {
531 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
534 if (Subtarget.hasStdExtA()) {
535 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
536 setMinCmpXchgSizeInBits(32);
537 } else if (Subtarget.hasForcedAtomics()) {
538 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
540 setMaxAtomicSizeInBitsSupported(0);
543 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
545 setBooleanContents(ZeroOrOneBooleanContent);
547 if (Subtarget.hasVInstructions()) {
548 setBooleanVectorContents(ZeroOrOneBooleanContent);
550 setOperationAction(ISD::VSCALE, XLenVT, Custom);
552 // RVV intrinsics may have illegal operands.
553 // We also need to custom legalize vmv.x.s.
554 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
555 ISD::INTRINSIC_VOID},
556 {MVT::i8, MVT::i16}, Custom);
557 if (Subtarget.is64Bit())
558 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
561 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
564 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
567 static const unsigned IntegerVPOps[] = {
568 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
569 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
570 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
571 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
572 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
573 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
574 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
575 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
576 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
577 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
578 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
581 static const unsigned FloatingPointVPOps[] = {
582 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
583 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
584 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
585 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
586 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
587 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
588 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
589 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
590 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
591 ISD::VP_FRINT, ISD::VP_FNEARBYINT};
593 static const unsigned IntegerVecReduceOps[] = {
594 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
595 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
596 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
598 static const unsigned FloatingPointVecReduceOps[] = {
599 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
600 ISD::VECREDUCE_FMAX};
602 if (!Subtarget.is64Bit()) {
603 // We must custom-lower certain vXi64 operations on RV32 due to the vector
604 // element type being illegal.
605 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
608 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
610 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
611 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
612 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
613 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
617 for (MVT VT : BoolVecVTs) {
618 if (!isTypeLegal(VT))
621 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
623 // Mask VTs are custom-expanded into a series of standard nodes
624 setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
625 ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
626 ISD::SCALAR_TO_VECTOR},
629 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
632 setOperationAction(ISD::SELECT, VT, Custom);
634 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
637 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
640 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
644 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
647 // RVV has native int->float & float->int conversions where the
648 // element type sizes are within one power-of-two of each other. Any
649 // wider distances between type sizes have to be lowered as sequences
650 // which progressively narrow the gap in stages.
651 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
652 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
653 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
654 ISD::STRICT_FP_TO_UINT},
656 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
659 // Expand all extending loads to types larger than this, and truncating
660 // stores from types larger than this.
661 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
662 setTruncStoreAction(OtherVT, VT, Expand);
663 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
667 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
668 ISD::VP_TRUNCATE, ISD::VP_SETCC},
671 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
672 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
674 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
676 setOperationPromotedToType(
677 ISD::VECTOR_SPLICE, VT,
678 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
681 for (MVT VT : IntVecVTs) {
682 if (!isTypeLegal(VT))
685 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
686 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
688 // Vectors implement MULHS/MULHU.
689 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
691 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
692 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
693 setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
695 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
698 setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand);
700 // Custom-lower extensions and truncations from/to mask types.
701 setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
704 // RVV has native int->float & float->int conversions where the
705 // element type sizes are within one power-of-two of each other. Any
706 // wider distances between type sizes have to be lowered as sequences
707 // which progressively narrow the gap in stages.
708 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
709 ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
710 ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
711 ISD::STRICT_FP_TO_UINT},
713 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
717 {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
719 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
720 // nodes which truncate by one power of two at a time.
721 setOperationAction(ISD::TRUNCATE, VT, Custom);
723 // Custom-lower insert/extract operations to simplify patterns.
724 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
727 // Custom-lower reduction operations to set up the corresponding custom
729 setOperationAction(IntegerVecReduceOps, VT, Custom);
731 setOperationAction(IntegerVPOps, VT, Custom);
733 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
735 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
739 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
740 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
743 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
744 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
747 setOperationAction(ISD::SELECT, VT, Custom);
748 setOperationAction(ISD::SELECT_CC, VT, Expand);
750 setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
752 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
753 setTruncStoreAction(VT, OtherVT, Expand);
754 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
758 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
759 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
762 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
764 if (Subtarget.hasStdExtZvbb()) {
765 setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Legal);
766 setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Custom);
767 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
768 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
771 setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Expand);
772 setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Expand);
773 setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
774 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
775 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
778 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
780 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
781 if (isTypeLegal(FloatVT)) {
782 setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
783 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
784 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
788 setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
792 // Expand various CCs to best match the RVV ISA, which natively supports UNE
793 // but no other unordered comparisons, and supports all ordered comparisons
794 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
795 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
796 // and we pattern-match those back to the "original", swapping operands once
797 // more. This way we catch both operations and both "vf" and "fv" forms with
799 static const ISD::CondCode VFPCCToExpand[] = {
800 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
801 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
802 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
805 // Sets common operation actions on RVV floating-point vector types.
806 const auto SetCommonVFPActions = [&](MVT VT) {
807 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
808 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
809 // sizes are within one power-of-two of each other. Therefore conversions
810 // between vXf16 and vXf64 must be lowered as sequences which convert via
812 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
813 // Custom-lower insert/extract operations to simplify patterns.
814 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
816 // Expand various condition codes (explained above).
817 setCondCodeAction(VFPCCToExpand, VT, Expand);
819 setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
821 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
822 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
826 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
828 // Expand FP operations that need libcalls.
829 setOperationAction(ISD::FREM, VT, Expand);
830 setOperationAction(ISD::FPOW, VT, Expand);
831 setOperationAction(ISD::FCOS, VT, Expand);
832 setOperationAction(ISD::FSIN, VT, Expand);
833 setOperationAction(ISD::FSINCOS, VT, Expand);
834 setOperationAction(ISD::FEXP, VT, Expand);
835 setOperationAction(ISD::FEXP2, VT, Expand);
836 setOperationAction(ISD::FLOG, VT, Expand);
837 setOperationAction(ISD::FLOG2, VT, Expand);
838 setOperationAction(ISD::FLOG10, VT, Expand);
840 setOperationAction(ISD::FCOPYSIGN, VT, Legal);
842 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
844 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
848 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
849 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
852 setOperationAction(ISD::SELECT, VT, Custom);
853 setOperationAction(ISD::SELECT_CC, VT, Expand);
855 setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
856 ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
859 setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
860 setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
862 setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
864 setOperationAction(FloatingPointVPOps, VT, Custom);
866 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
868 setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
869 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},
871 setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
872 ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,
873 ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
874 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
878 // Sets common extload/truncstore actions on RVV floating-point vector
880 const auto SetCommonVFPExtLoadTruncStoreActions =
881 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
882 for (auto SmallVT : SmallerVTs) {
883 setTruncStoreAction(VT, SmallVT, Expand);
884 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
888 if (Subtarget.hasVInstructionsF16()) {
889 for (MVT VT : F16VecVTs) {
890 if (!isTypeLegal(VT))
892 SetCommonVFPActions(VT);
896 if (Subtarget.hasVInstructionsF32()) {
897 for (MVT VT : F32VecVTs) {
898 if (!isTypeLegal(VT))
900 SetCommonVFPActions(VT);
901 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
905 if (Subtarget.hasVInstructionsF64()) {
906 for (MVT VT : F64VecVTs) {
907 if (!isTypeLegal(VT))
909 SetCommonVFPActions(VT);
910 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
911 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
915 if (Subtarget.useRVVForFixedLengthVectors()) {
916 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
917 if (!useRVVForFixedLengthVectorVT(VT))
920 // By default everything must be expanded.
921 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
922 setOperationAction(Op, VT, Expand);
923 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
924 setTruncStoreAction(VT, OtherVT, Expand);
925 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD},
926 OtherVT, VT, Expand);
929 // Custom lower fixed vector undefs to scalable vector undefs to avoid
930 // expansion to a build_vector of 0s.
931 setOperationAction(ISD::UNDEF, VT, Custom);
933 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
934 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
937 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
940 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
943 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
945 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
947 setOperationAction(ISD::SETCC, VT, Custom);
949 setOperationAction(ISD::SELECT, VT, Custom);
951 setOperationAction(ISD::TRUNCATE, VT, Custom);
953 setOperationAction(ISD::BITCAST, VT, Custom);
956 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
960 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
969 ISD::STRICT_SINT_TO_FP,
970 ISD::STRICT_UINT_TO_FP,
971 ISD::STRICT_FP_TO_SINT,
972 ISD::STRICT_FP_TO_UINT,
975 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
978 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
980 // Operations below are different for between masks and other vectors.
981 if (VT.getVectorElementType() == MVT::i1) {
982 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
986 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
987 ISD::VP_SETCC, ISD::VP_TRUNCATE},
992 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
993 // it before type legalization for i64 vectors on RV32. It will then be
994 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
995 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
996 // improvements first.
997 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
998 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
999 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
1003 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1005 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1006 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1007 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1011 setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
1012 ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
1013 ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
1017 {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
1019 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1020 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1021 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
1024 {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
1027 setOperationAction(ISD::VSELECT, VT, Custom);
1028 setOperationAction(ISD::SELECT_CC, VT, Expand);
1031 {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
1033 // Custom-lower reduction operations to set up the corresponding custom
1035 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1036 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1037 ISD::VECREDUCE_UMIN},
1040 setOperationAction(IntegerVPOps, VT, Custom);
1042 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1044 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1045 if (isTypeLegal(FloatVT))
1047 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
1051 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
1052 // There are no extending loads or truncating stores.
1053 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1054 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1055 setTruncStoreAction(VT, InnerVT, Expand);
1058 if (!useRVVForFixedLengthVectorVT(VT))
1061 // By default everything must be expanded.
1062 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1063 setOperationAction(Op, VT, Expand);
1065 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1066 // expansion to a build_vector of 0s.
1067 setOperationAction(ISD::UNDEF, VT, Custom);
1069 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1070 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1073 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1074 ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
1075 ISD::EXTRACT_VECTOR_ELT},
1078 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1079 ISD::MGATHER, ISD::MSCATTER},
1082 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1083 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1084 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1088 setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
1089 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1090 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1094 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1096 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1097 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
1100 setCondCodeAction(VFPCCToExpand, VT, Expand);
1102 setOperationAction(ISD::SETCC, VT, Custom);
1103 setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
1104 setOperationAction(ISD::SELECT_CC, VT, Expand);
1106 setOperationAction(ISD::BITCAST, VT, Custom);
1108 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1110 setOperationAction(FloatingPointVPOps, VT, Custom);
1112 setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
1115 {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1116 ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,
1117 ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,
1118 ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1119 ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1123 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1124 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1126 if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1127 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1128 if (Subtarget.hasStdExtFOrZfinx())
1129 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1130 if (Subtarget.hasStdExtDOrZdinx())
1131 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1135 if (Subtarget.hasForcedAtomics()) {
1136 // Set atomic rmw/cas operations to expand to force __sync libcalls.
1138 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1139 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1140 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1141 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1145 if (Subtarget.hasVendorXTHeadMemIdx()) {
1146 for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
1148 setIndexedLoadAction(im, MVT::i8, Legal);
1149 setIndexedStoreAction(im, MVT::i8, Legal);
1150 setIndexedLoadAction(im, MVT::i16, Legal);
1151 setIndexedStoreAction(im, MVT::i16, Legal);
1152 setIndexedLoadAction(im, MVT::i32, Legal);
1153 setIndexedStoreAction(im, MVT::i32, Legal);
1155 if (Subtarget.is64Bit()) {
1156 setIndexedLoadAction(im, MVT::i64, Legal);
1157 setIndexedStoreAction(im, MVT::i64, Legal);
1162 // Function alignments.
1163 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1164 setMinFunctionAlignment(FunctionAlignment);
1165 // Set preferred alignments.
1166 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1167 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1169 setMinimumJumpTableEntries(5);
1171 // Jumps are expensive, compared to logic
1172 setJumpIsExpensive();
1174 setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
1175 ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
1176 ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
1177 if (Subtarget.is64Bit())
1178 setTargetDAGCombine(ISD::SRA);
1180 if (Subtarget.hasStdExtFOrZfinx())
1181 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
1183 if (Subtarget.hasStdExtZbb())
1184 setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
1186 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1187 setTargetDAGCombine(ISD::TRUNCATE);
1189 if (Subtarget.hasStdExtZbkb())
1190 setTargetDAGCombine(ISD::BITREVERSE);
1191 if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1192 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1193 if (Subtarget.hasStdExtFOrZfinx())
1194 setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1195 ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
1196 if (Subtarget.hasVInstructions())
1197 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1198 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1199 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
1200 ISD::CONCAT_VECTORS});
1201 if (Subtarget.hasVendorXTHeadMemPair())
1202 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1203 if (Subtarget.useRVVForFixedLengthVectors())
1204 setTargetDAGCombine(ISD::BITCAST);
1206 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1207 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1209 // Disable strict node mutation.
1210 IsStrictFPEnabled = true;
1213 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1214 LLVMContext &Context,
1217 return getPointerTy(DL);
1218 if (Subtarget.hasVInstructions() &&
1219 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1220 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1221 return VT.changeVectorElementTypeToInteger();
1224 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1225 return Subtarget.getXLenVT();
1228 // Return false if we can lower get_vector_length to a vsetvli intrinsic.
1229 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1231 bool IsScalable) const {
1232 if (!Subtarget.hasVInstructions())
1238 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1241 // Don't allow VF=1 if those types are't legal.
1242 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN())
1245 // VLEN=32 support is incomplete.
1246 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1249 // The maximum VF is for the smallest element width with LMUL=8.
1250 // VF must be a power of 2.
1251 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1252 return VF > MaxVF || !isPowerOf2_32(VF);
1255 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1257 MachineFunction &MF,
1258 unsigned Intrinsic) const {
1259 auto &DL = I.getModule()->getDataLayout();
1261 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1262 bool IsUnitStrided) {
1263 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1264 Info.ptrVal = I.getArgOperand(PtrOp);
1267 // Store value is the first operand.
1268 MemTy = I.getArgOperand(0)->getType();
1270 // Use return type. If it's segment load, return type is a struct.
1271 MemTy = I.getType();
1272 if (MemTy->isStructTy())
1273 MemTy = MemTy->getStructElementType(0);
1276 MemTy = MemTy->getScalarType();
1278 Info.memVT = getValueType(DL, MemTy);
1279 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1280 Info.size = MemoryLocation::UnknownSize;
1282 IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
1286 if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
1287 Info.flags |= MachineMemOperand::MONonTemporal;
1289 Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I);
1290 switch (Intrinsic) {
1293 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1294 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1295 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1296 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1297 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1298 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1299 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1300 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1301 case Intrinsic::riscv_masked_cmpxchg_i32:
1302 Info.opc = ISD::INTRINSIC_W_CHAIN;
1303 Info.memVT = MVT::i32;
1304 Info.ptrVal = I.getArgOperand(0);
1306 Info.align = Align(4);
1307 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
1308 MachineMemOperand::MOVolatile;
1310 case Intrinsic::riscv_masked_strided_load:
1311 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1312 /*IsUnitStrided*/ false);
1313 case Intrinsic::riscv_masked_strided_store:
1314 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1315 /*IsUnitStrided*/ false);
1316 case Intrinsic::riscv_seg2_load:
1317 case Intrinsic::riscv_seg3_load:
1318 case Intrinsic::riscv_seg4_load:
1319 case Intrinsic::riscv_seg5_load:
1320 case Intrinsic::riscv_seg6_load:
1321 case Intrinsic::riscv_seg7_load:
1322 case Intrinsic::riscv_seg8_load:
1323 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1324 /*IsUnitStrided*/ false);
1325 case Intrinsic::riscv_seg2_store:
1326 case Intrinsic::riscv_seg3_store:
1327 case Intrinsic::riscv_seg4_store:
1328 case Intrinsic::riscv_seg5_store:
1329 case Intrinsic::riscv_seg6_store:
1330 case Intrinsic::riscv_seg7_store:
1331 case Intrinsic::riscv_seg8_store:
1332 // Operands are (vec, ..., vec, ptr, vl)
1333 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1335 /*IsUnitStrided*/ false);
1336 case Intrinsic::riscv_vle:
1337 case Intrinsic::riscv_vle_mask:
1338 case Intrinsic::riscv_vleff:
1339 case Intrinsic::riscv_vleff_mask:
1340 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1342 /*IsUnitStrided*/ true);
1343 case Intrinsic::riscv_vse:
1344 case Intrinsic::riscv_vse_mask:
1345 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1347 /*IsUnitStrided*/ true);
1348 case Intrinsic::riscv_vlse:
1349 case Intrinsic::riscv_vlse_mask:
1350 case Intrinsic::riscv_vloxei:
1351 case Intrinsic::riscv_vloxei_mask:
1352 case Intrinsic::riscv_vluxei:
1353 case Intrinsic::riscv_vluxei_mask:
1354 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1356 /*IsUnitStrided*/ false);
1357 case Intrinsic::riscv_vsse:
1358 case Intrinsic::riscv_vsse_mask:
1359 case Intrinsic::riscv_vsoxei:
1360 case Intrinsic::riscv_vsoxei_mask:
1361 case Intrinsic::riscv_vsuxei:
1362 case Intrinsic::riscv_vsuxei_mask:
1363 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1365 /*IsUnitStrided*/ false);
1366 case Intrinsic::riscv_vlseg2:
1367 case Intrinsic::riscv_vlseg3:
1368 case Intrinsic::riscv_vlseg4:
1369 case Intrinsic::riscv_vlseg5:
1370 case Intrinsic::riscv_vlseg6:
1371 case Intrinsic::riscv_vlseg7:
1372 case Intrinsic::riscv_vlseg8:
1373 case Intrinsic::riscv_vlseg2ff:
1374 case Intrinsic::riscv_vlseg3ff:
1375 case Intrinsic::riscv_vlseg4ff:
1376 case Intrinsic::riscv_vlseg5ff:
1377 case Intrinsic::riscv_vlseg6ff:
1378 case Intrinsic::riscv_vlseg7ff:
1379 case Intrinsic::riscv_vlseg8ff:
1380 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1382 /*IsUnitStrided*/ false);
1383 case Intrinsic::riscv_vlseg2_mask:
1384 case Intrinsic::riscv_vlseg3_mask:
1385 case Intrinsic::riscv_vlseg4_mask:
1386 case Intrinsic::riscv_vlseg5_mask:
1387 case Intrinsic::riscv_vlseg6_mask:
1388 case Intrinsic::riscv_vlseg7_mask:
1389 case Intrinsic::riscv_vlseg8_mask:
1390 case Intrinsic::riscv_vlseg2ff_mask:
1391 case Intrinsic::riscv_vlseg3ff_mask:
1392 case Intrinsic::riscv_vlseg4ff_mask:
1393 case Intrinsic::riscv_vlseg5ff_mask:
1394 case Intrinsic::riscv_vlseg6ff_mask:
1395 case Intrinsic::riscv_vlseg7ff_mask:
1396 case Intrinsic::riscv_vlseg8ff_mask:
1397 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1399 /*IsUnitStrided*/ false);
1400 case Intrinsic::riscv_vlsseg2:
1401 case Intrinsic::riscv_vlsseg3:
1402 case Intrinsic::riscv_vlsseg4:
1403 case Intrinsic::riscv_vlsseg5:
1404 case Intrinsic::riscv_vlsseg6:
1405 case Intrinsic::riscv_vlsseg7:
1406 case Intrinsic::riscv_vlsseg8:
1407 case Intrinsic::riscv_vloxseg2:
1408 case Intrinsic::riscv_vloxseg3:
1409 case Intrinsic::riscv_vloxseg4:
1410 case Intrinsic::riscv_vloxseg5:
1411 case Intrinsic::riscv_vloxseg6:
1412 case Intrinsic::riscv_vloxseg7:
1413 case Intrinsic::riscv_vloxseg8:
1414 case Intrinsic::riscv_vluxseg2:
1415 case Intrinsic::riscv_vluxseg3:
1416 case Intrinsic::riscv_vluxseg4:
1417 case Intrinsic::riscv_vluxseg5:
1418 case Intrinsic::riscv_vluxseg6:
1419 case Intrinsic::riscv_vluxseg7:
1420 case Intrinsic::riscv_vluxseg8:
1421 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1423 /*IsUnitStrided*/ false);
1424 case Intrinsic::riscv_vlsseg2_mask:
1425 case Intrinsic::riscv_vlsseg3_mask:
1426 case Intrinsic::riscv_vlsseg4_mask:
1427 case Intrinsic::riscv_vlsseg5_mask:
1428 case Intrinsic::riscv_vlsseg6_mask:
1429 case Intrinsic::riscv_vlsseg7_mask:
1430 case Intrinsic::riscv_vlsseg8_mask:
1431 case Intrinsic::riscv_vloxseg2_mask:
1432 case Intrinsic::riscv_vloxseg3_mask:
1433 case Intrinsic::riscv_vloxseg4_mask:
1434 case Intrinsic::riscv_vloxseg5_mask:
1435 case Intrinsic::riscv_vloxseg6_mask:
1436 case Intrinsic::riscv_vloxseg7_mask:
1437 case Intrinsic::riscv_vloxseg8_mask:
1438 case Intrinsic::riscv_vluxseg2_mask:
1439 case Intrinsic::riscv_vluxseg3_mask:
1440 case Intrinsic::riscv_vluxseg4_mask:
1441 case Intrinsic::riscv_vluxseg5_mask:
1442 case Intrinsic::riscv_vluxseg6_mask:
1443 case Intrinsic::riscv_vluxseg7_mask:
1444 case Intrinsic::riscv_vluxseg8_mask:
1445 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1447 /*IsUnitStrided*/ false);
1448 case Intrinsic::riscv_vsseg2:
1449 case Intrinsic::riscv_vsseg3:
1450 case Intrinsic::riscv_vsseg4:
1451 case Intrinsic::riscv_vsseg5:
1452 case Intrinsic::riscv_vsseg6:
1453 case Intrinsic::riscv_vsseg7:
1454 case Intrinsic::riscv_vsseg8:
1455 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1457 /*IsUnitStrided*/ false);
1458 case Intrinsic::riscv_vsseg2_mask:
1459 case Intrinsic::riscv_vsseg3_mask:
1460 case Intrinsic::riscv_vsseg4_mask:
1461 case Intrinsic::riscv_vsseg5_mask:
1462 case Intrinsic::riscv_vsseg6_mask:
1463 case Intrinsic::riscv_vsseg7_mask:
1464 case Intrinsic::riscv_vsseg8_mask:
1465 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1467 /*IsUnitStrided*/ false);
1468 case Intrinsic::riscv_vssseg2:
1469 case Intrinsic::riscv_vssseg3:
1470 case Intrinsic::riscv_vssseg4:
1471 case Intrinsic::riscv_vssseg5:
1472 case Intrinsic::riscv_vssseg6:
1473 case Intrinsic::riscv_vssseg7:
1474 case Intrinsic::riscv_vssseg8:
1475 case Intrinsic::riscv_vsoxseg2:
1476 case Intrinsic::riscv_vsoxseg3:
1477 case Intrinsic::riscv_vsoxseg4:
1478 case Intrinsic::riscv_vsoxseg5:
1479 case Intrinsic::riscv_vsoxseg6:
1480 case Intrinsic::riscv_vsoxseg7:
1481 case Intrinsic::riscv_vsoxseg8:
1482 case Intrinsic::riscv_vsuxseg2:
1483 case Intrinsic::riscv_vsuxseg3:
1484 case Intrinsic::riscv_vsuxseg4:
1485 case Intrinsic::riscv_vsuxseg5:
1486 case Intrinsic::riscv_vsuxseg6:
1487 case Intrinsic::riscv_vsuxseg7:
1488 case Intrinsic::riscv_vsuxseg8:
1489 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1491 /*IsUnitStrided*/ false);
1492 case Intrinsic::riscv_vssseg2_mask:
1493 case Intrinsic::riscv_vssseg3_mask:
1494 case Intrinsic::riscv_vssseg4_mask:
1495 case Intrinsic::riscv_vssseg5_mask:
1496 case Intrinsic::riscv_vssseg6_mask:
1497 case Intrinsic::riscv_vssseg7_mask:
1498 case Intrinsic::riscv_vssseg8_mask:
1499 case Intrinsic::riscv_vsoxseg2_mask:
1500 case Intrinsic::riscv_vsoxseg3_mask:
1501 case Intrinsic::riscv_vsoxseg4_mask:
1502 case Intrinsic::riscv_vsoxseg5_mask:
1503 case Intrinsic::riscv_vsoxseg6_mask:
1504 case Intrinsic::riscv_vsoxseg7_mask:
1505 case Intrinsic::riscv_vsoxseg8_mask:
1506 case Intrinsic::riscv_vsuxseg2_mask:
1507 case Intrinsic::riscv_vsuxseg3_mask:
1508 case Intrinsic::riscv_vsuxseg4_mask:
1509 case Intrinsic::riscv_vsuxseg5_mask:
1510 case Intrinsic::riscv_vsuxseg6_mask:
1511 case Intrinsic::riscv_vsuxseg7_mask:
1512 case Intrinsic::riscv_vsuxseg8_mask:
1513 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1515 /*IsUnitStrided*/ false);
1519 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1520 const AddrMode &AM, Type *Ty,
1522 Instruction *I) const {
1523 // No global is ever allowed as a base.
1527 // RVV instructions only support register addressing.
1528 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1529 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1531 // Require a 12-bit signed offset.
1532 if (!isInt<12>(AM.BaseOffs))
1536 case 0: // "r+i" or just "i", depending on HasBaseReg.
1539 if (!AM.HasBaseReg) // allow "r+i".
1541 return false; // disallow "r+r" or "r+r+i".
1549 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1550 return isInt<12>(Imm);
1553 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1554 return isInt<12>(Imm);
1557 // On RV32, 64-bit integers are split into their high and low parts and held
1558 // in two different registers, so the trunc is free since the low register can
1560 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1562 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1563 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1565 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1566 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1567 return (SrcBits == 64 && DestBits == 32);
1570 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1571 // We consider i64->i32 free on RV64 since we have good selection of W
1572 // instructions that make promoting operations back to i64 free in many cases.
1573 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1576 unsigned SrcBits = SrcVT.getSizeInBits();
1577 unsigned DestBits = DstVT.getSizeInBits();
1578 return (SrcBits == 64 && DestBits == 32);
1581 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1582 // Zexts are free if they can be combined with a load.
1583 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1584 // poorly with type legalization of compares preferring sext.
1585 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1586 EVT MemVT = LD->getMemoryVT();
1587 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1588 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1589 LD->getExtensionType() == ISD::ZEXTLOAD))
1593 return TargetLowering::isZExtFree(Val, VT2);
1596 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1597 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1600 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
1601 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1604 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
1605 return Subtarget.hasStdExtZbb();
1608 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1609 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
1612 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
1613 const Instruction &AndI) const {
1614 // We expect to be able to match a bit extraction instruction if the Zbs
1615 // extension is supported and the mask is a power of two. However, we
1616 // conservatively return false if the mask would fit in an ANDI instruction,
1617 // on the basis that it's possible the sinking+duplication of the AND in
1618 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1619 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1620 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1622 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1625 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1628 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1629 EVT VT = Y.getValueType();
1631 // FIXME: Support vectors once we have tests.
1635 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1636 !isa<ConstantSDNode>(Y);
1639 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
1640 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1641 if (Subtarget.hasStdExtZbs())
1642 return X.getValueType().isScalarInteger();
1643 auto *C = dyn_cast<ConstantSDNode>(Y);
1644 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1645 if (Subtarget.hasVendorXTHeadBs())
1646 return C != nullptr;
1647 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1648 return C && C->getAPIntValue().ule(10);
1651 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
1653 // Only enable for rvv.
1654 if (!VT.isVector() || !Subtarget.hasVInstructions())
1657 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1663 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
1665 assert(Ty->isIntegerTy());
1667 unsigned BitSize = Ty->getIntegerBitWidth();
1668 if (BitSize > Subtarget.getXLen())
1671 // Fast path, assume 32-bit immediates are cheap.
1672 int64_t Val = Imm.getSExtValue();
1676 // A constant pool entry may be more aligned thant he load we're trying to
1677 // replace. If we don't support unaligned scalar mem, prefer the constant
1679 // TODO: Can the caller pass down the alignment?
1680 if (!Subtarget.enableUnalignedScalarMem())
1683 // Prefer to keep the load if it would require many instructions.
1684 // This uses the same threshold we use for constant pools but doesn't
1685 // check useConstantPoolForLargeInts.
1686 // TODO: Should we keep the load only when we're definitely going to emit a
1689 RISCVMatInt::InstSeq Seq =
1690 RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
1691 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1694 bool RISCVTargetLowering::
1695 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1696 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1697 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1698 SelectionDAG &DAG) const {
1699 // One interesting pattern that we'd want to form is 'bit extract':
1700 // ((1 >> Y) & 1) ==/!= 0
1701 // But we also need to be careful not to try to reverse that fold.
1703 // Is this '((1 >> Y) & 1)'?
1704 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1705 return false; // Keep the 'bit extract' pattern.
1707 // Will this be '((1 >> Y) & 1)' after the transform?
1708 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1709 return true; // Do form the 'bit extract' pattern.
1711 // If 'X' is a constant, and we transform, then we will immediately
1712 // try to undo the fold, thus causing endless combine loop.
1713 // So only do the transform if X is not a constant. This matches the default
1714 // implementation of this function.
1718 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1720 case Instruction::Add:
1721 case Instruction::Sub:
1722 case Instruction::Mul:
1723 case Instruction::And:
1724 case Instruction::Or:
1725 case Instruction::Xor:
1726 case Instruction::FAdd:
1727 case Instruction::FSub:
1728 case Instruction::FMul:
1729 case Instruction::FDiv:
1730 case Instruction::ICmp:
1731 case Instruction::FCmp:
1733 case Instruction::Shl:
1734 case Instruction::LShr:
1735 case Instruction::AShr:
1736 case Instruction::UDiv:
1737 case Instruction::SDiv:
1738 case Instruction::URem:
1739 case Instruction::SRem:
1740 return Operand == 1;
1747 bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
1748 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1751 if (canSplatOperand(I->getOpcode(), Operand))
1754 auto *II = dyn_cast<IntrinsicInst>(I);
1758 switch (II->getIntrinsicID()) {
1759 case Intrinsic::fma:
1760 case Intrinsic::vp_fma:
1761 return Operand == 0 || Operand == 1;
1762 case Intrinsic::vp_shl:
1763 case Intrinsic::vp_lshr:
1764 case Intrinsic::vp_ashr:
1765 case Intrinsic::vp_udiv:
1766 case Intrinsic::vp_sdiv:
1767 case Intrinsic::vp_urem:
1768 case Intrinsic::vp_srem:
1769 return Operand == 1;
1770 // These intrinsics are commutative.
1771 case Intrinsic::vp_add:
1772 case Intrinsic::vp_mul:
1773 case Intrinsic::vp_and:
1774 case Intrinsic::vp_or:
1775 case Intrinsic::vp_xor:
1776 case Intrinsic::vp_fadd:
1777 case Intrinsic::vp_fmul:
1778 case Intrinsic::vp_icmp:
1779 case Intrinsic::vp_fcmp:
1780 // These intrinsics have 'vr' versions.
1781 case Intrinsic::vp_sub:
1782 case Intrinsic::vp_fsub:
1783 case Intrinsic::vp_fdiv:
1784 return Operand == 0 || Operand == 1;
1790 /// Check if sinking \p I's operands to I's basic block is profitable, because
1791 /// the operands can be folded into a target instruction, e.g.
1792 /// splats of scalars can fold into vector instructions.
1793 bool RISCVTargetLowering::shouldSinkOperands(
1794 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1795 using namespace llvm::PatternMatch;
1797 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1800 for (auto OpIdx : enumerate(I->operands())) {
1801 if (!canSplatOperand(I, OpIdx.index()))
1804 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1805 // Make sure we are not already sinking this operand
1806 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1809 // We are looking for a splat that can be sunk.
1810 if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
1811 m_Undef(), m_ZeroMask())))
1814 // Don't sink i1 splats.
1815 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
1818 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1819 // and vector registers
1820 for (Use &U : Op->uses()) {
1821 Instruction *Insn = cast<Instruction>(U.getUser());
1822 if (!canSplatOperand(Insn, U.getOperandNo()))
1826 Ops.push_back(&Op->getOperandUse(0));
1827 Ops.push_back(&OpIdx.value());
1832 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
1833 unsigned Opc = VecOp.getOpcode();
1835 // Assume target opcodes can't be scalarized.
1836 // TODO - do we have any exceptions?
1837 if (Opc >= ISD::BUILTIN_OP_END)
1840 // If the vector op is not supported, try to convert to scalar.
1841 EVT VecVT = VecOp.getValueType();
1842 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
1845 // If the vector op is supported, but the scalar op is not, the transform may
1846 // not be worthwhile.
1847 EVT ScalarVT = VecVT.getScalarType();
1848 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
1851 bool RISCVTargetLowering::isOffsetFoldingLegal(
1852 const GlobalAddressSDNode *GA) const {
1853 // In order to maximise the opportunity for common subexpression elimination,
1854 // keep a separate ADD node for the global address offset instead of folding
1855 // it in the global address node. Later peephole optimisations may choose to
1856 // fold it back in when profitable.
1860 // Returns 0-31 if the fli instruction is available for the type and this is
1861 // legal FP immediate for the type. Returns -1 otherwise.
1862 int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const {
1863 if (!Subtarget.hasStdExtZfa())
1866 bool IsSupportedVT = false;
1867 if (VT == MVT::f16) {
1868 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
1869 } else if (VT == MVT::f32) {
1870 IsSupportedVT = true;
1871 } else if (VT == MVT::f64) {
1872 assert(Subtarget.hasStdExtD() && "Expect D extension");
1873 IsSupportedVT = true;
1879 return RISCVLoadFPImm::getLoadFPImm(Imm);
1882 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
1883 bool ForCodeSize) const {
1884 bool IsLegalVT = false;
1886 IsLegalVT = Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin();
1887 else if (VT == MVT::f32)
1888 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
1889 else if (VT == MVT::f64)
1890 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
1895 if (getLegalZfaFPImm(Imm, VT) >= 0)
1898 // Cannot create a 64 bit floating-point immediate value for rv32.
1899 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
1900 // td can handle +0.0 or -0.0 already.
1901 // -0.0 can be created by fmv + fneg.
1902 return Imm.isZero();
1904 // Special case: the cost for -0.0 is 1.
1905 int Cost = Imm.isNegZero()
1907 : RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
1908 Subtarget.getXLen(),
1909 Subtarget.getFeatureBits());
1910 // If the constantpool data is already in cache, only Cost 1 is cheaper.
1911 return Cost < FPImmCost;
1914 // TODO: This is very conservative.
1915 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1916 unsigned Index) const {
1917 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
1920 // Only support extracting a fixed from a fixed vector for now.
1921 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
1924 unsigned ResElts = ResVT.getVectorNumElements();
1925 unsigned SrcElts = SrcVT.getVectorNumElements();
1927 // Convervatively only handle extracting half of a vector.
1928 // TODO: Relax this.
1929 if ((ResElts * 2) != SrcElts)
1932 // The smallest type we can slide is i8.
1933 // TODO: We can extract index 0 from a mask vector without a slide.
1934 if (ResVT.getVectorElementType() == MVT::i1)
1937 // Slide can support arbitrary index, but we only treat vslidedown.vi as
1942 // TODO: We can do arbitrary slidedowns, but for now only support extracting
1943 // the upper half of a vector until we have more test coverage.
1944 return Index == 0 || Index == ResElts;
1947 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1950 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
1951 // We might still end up using a GPR but that will be decided based on ABI.
1952 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
1953 !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1956 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1959 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1962 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
1963 // We might still end up using a GPR but that will be decided based on ABI.
1964 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
1965 !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1968 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1971 // Changes the condition code and swaps operands if necessary, so the SetCC
1972 // operation matches one of the comparisons supported directly by branches
1973 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1975 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1976 ISD::CondCode &CC, SelectionDAG &DAG) {
1977 // If this is a single bit test that can't be handled by ANDI, shift the
1978 // bit to be tested to the MSB and perform a signed compare with 0.
1979 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1980 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1981 isa<ConstantSDNode>(LHS.getOperand(1))) {
1982 uint64_t Mask = LHS.getConstantOperandVal(1);
1983 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1985 if (isPowerOf2_64(Mask)) {
1986 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1987 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1989 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1992 LHS = LHS.getOperand(0);
1994 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1995 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2000 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2001 int64_t C = RHSC->getSExtValue();
2005 // Convert X > -1 to X >= 0.
2007 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2013 // Convert X < 1 to 0 <= X.
2016 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2031 CC = ISD::getSetCCSwappedOperands(CC);
2032 std::swap(LHS, RHS);
2037 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
2038 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2039 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2040 if (VT.getVectorElementType() == MVT::i1)
2043 switch (KnownSize) {
2045 llvm_unreachable("Invalid LMUL.");
2047 return RISCVII::VLMUL::LMUL_F8;
2049 return RISCVII::VLMUL::LMUL_F4;
2051 return RISCVII::VLMUL::LMUL_F2;
2053 return RISCVII::VLMUL::LMUL_1;
2055 return RISCVII::VLMUL::LMUL_2;
2057 return RISCVII::VLMUL::LMUL_4;
2059 return RISCVII::VLMUL::LMUL_8;
2063 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
2066 llvm_unreachable("Invalid LMUL.");
2067 case RISCVII::VLMUL::LMUL_F8:
2068 case RISCVII::VLMUL::LMUL_F4:
2069 case RISCVII::VLMUL::LMUL_F2:
2070 case RISCVII::VLMUL::LMUL_1:
2071 return RISCV::VRRegClassID;
2072 case RISCVII::VLMUL::LMUL_2:
2073 return RISCV::VRM2RegClassID;
2074 case RISCVII::VLMUL::LMUL_4:
2075 return RISCV::VRM4RegClassID;
2076 case RISCVII::VLMUL::LMUL_8:
2077 return RISCV::VRM8RegClassID;
2081 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2082 RISCVII::VLMUL LMUL = getLMUL(VT);
2083 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2084 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2085 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2086 LMUL == RISCVII::VLMUL::LMUL_1) {
2087 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2088 "Unexpected subreg numbering");
2089 return RISCV::sub_vrm1_0 + Index;
2091 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2092 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2093 "Unexpected subreg numbering");
2094 return RISCV::sub_vrm2_0 + Index;
2096 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2097 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2098 "Unexpected subreg numbering");
2099 return RISCV::sub_vrm4_0 + Index;
2101 llvm_unreachable("Invalid vector type.");
2104 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
2105 if (VT.getVectorElementType() == MVT::i1)
2106 return RISCV::VRRegClassID;
2107 return getRegClassIDForLMUL(getLMUL(VT));
2110 // Attempt to decompose a subvector insert/extract between VecVT and
2111 // SubVecVT via subregister indices. Returns the subregister index that
2112 // can perform the subvector insert/extract with the given element index, as
2113 // well as the index corresponding to any leftover subvectors that must be
2114 // further inserted/extracted within the register class for SubVecVT.
2115 std::pair<unsigned, unsigned>
2116 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2117 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2118 const RISCVRegisterInfo *TRI) {
2119 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2120 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2121 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2122 "Register classes not ordered");
2123 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2124 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2125 // Try to compose a subregister index that takes us from the incoming
2126 // LMUL>1 register class down to the outgoing one. At each step we half
2128 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2129 // Note that this is not guaranteed to find a subregister index, such as
2130 // when we are extracting from one VR type to another.
2131 unsigned SubRegIdx = RISCV::NoSubRegister;
2132 for (const unsigned RCID :
2133 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2134 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2135 VecVT = VecVT.getHalfNumVectorElementsVT();
2137 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2138 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2139 getSubregIndexByMVT(VecVT, IsHi));
2141 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2143 return {SubRegIdx, InsertExtractIdx};
2146 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2147 // stores for those types.
2148 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2149 return !Subtarget.useRVVForFixedLengthVectors() ||
2150 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2153 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
2154 if (!ScalarTy.isSimple())
2156 switch (ScalarTy.getSimpleVT().SimpleTy) {
2158 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2164 return Subtarget.hasVInstructionsI64();
2166 return Subtarget.hasVInstructionsF16();
2168 return Subtarget.hasVInstructionsF32();
2170 return Subtarget.hasVInstructionsF64();
2177 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2178 return NumRepeatedDivisors;
2181 static SDValue getVLOperand(SDValue Op) {
2182 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2183 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2184 "Unexpected opcode");
2185 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2186 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2187 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2188 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2191 return Op.getOperand(II->VLOperand + 1 + HasChain);
2194 static bool useRVVForFixedLengthVectorVT(MVT VT,
2195 const RISCVSubtarget &Subtarget) {
2196 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2197 if (!Subtarget.useRVVForFixedLengthVectors())
2200 // We only support a set of vector types with a consistent maximum fixed size
2201 // across all supported vector element types to avoid legalization issues.
2202 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2203 // fixed-length vector type we support is 1024 bytes.
2204 if (VT.getFixedSizeInBits() > 1024 * 8)
2207 unsigned MinVLen = Subtarget.getRealMinVLen();
2209 MVT EltVT = VT.getVectorElementType();
2211 // Don't use RVV for vectors we cannot scalarize if required.
2212 switch (EltVT.SimpleTy) {
2213 // i1 is supported but has different rules.
2217 // Masks can only use a single register.
2218 if (VT.getVectorNumElements() > MinVLen)
2227 if (!Subtarget.hasVInstructionsI64())
2231 if (!Subtarget.hasVInstructionsF16())
2235 if (!Subtarget.hasVInstructionsF32())
2239 if (!Subtarget.hasVInstructionsF64())
2244 // Reject elements larger than ELEN.
2245 if (EltVT.getSizeInBits() > Subtarget.getELEN())
2248 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2249 // Don't use RVV for types that don't fit.
2250 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2253 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2254 // the base fixed length RVV support in place.
2255 if (!VT.isPow2VectorType())
2261 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2262 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2265 // Return the largest legal scalable vector type that matches VT's element type.
2266 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
2267 const RISCVSubtarget &Subtarget) {
2268 // This may be called before legal types are setup.
2269 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2270 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2271 "Expected legal fixed length vector!");
2273 unsigned MinVLen = Subtarget.getRealMinVLen();
2274 unsigned MaxELen = Subtarget.getELEN();
2276 MVT EltVT = VT.getVectorElementType();
2277 switch (EltVT.SimpleTy) {
2279 llvm_unreachable("unexpected element type for RVV container");
2288 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2289 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2290 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2292 (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
2293 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2294 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2295 return MVT::getScalableVectorVT(EltVT, NumElts);
2300 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
2301 const RISCVSubtarget &Subtarget) {
2302 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
2306 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
2307 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2310 // Grow V to consume an entire RVV register.
2311 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2312 const RISCVSubtarget &Subtarget) {
2313 assert(VT.isScalableVector() &&
2314 "Expected to convert into a scalable vector!");
2315 assert(V.getValueType().isFixedLengthVector() &&
2316 "Expected a fixed length vector operand!");
2318 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2319 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2322 // Shrink V so it's just big enough to maintain a VT's worth of data.
2323 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2324 const RISCVSubtarget &Subtarget) {
2325 assert(VT.isFixedLengthVector() &&
2326 "Expected to convert into a fixed length vector!");
2327 assert(V.getValueType().isScalableVector() &&
2328 "Expected a scalable vector operand!");
2330 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2331 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2334 /// Return the type of the mask type suitable for masking the provided
2335 /// vector type. This is simply an i1 element type vector of the same
2336 /// (possibly scalable) length.
2337 static MVT getMaskTypeFor(MVT VecVT) {
2338 assert(VecVT.isVector());
2339 ElementCount EC = VecVT.getVectorElementCount();
2340 return MVT::getVectorVT(MVT::i1, EC);
2343 /// Creates an all ones mask suitable for masking a vector of type VecTy with
2344 /// vector length VL. .
2345 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2346 SelectionDAG &DAG) {
2347 MVT MaskVT = getMaskTypeFor(VecVT);
2348 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2351 static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG,
2352 const RISCVSubtarget &Subtarget) {
2353 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2356 static std::pair<SDValue, SDValue>
2357 getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2358 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2359 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2360 SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
2361 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2365 // Gets the two common "VL" operands: an all-ones mask and the vector length.
2366 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2367 // the vector type that the fixed-length vector is contained in. Otherwise if
2368 // VecVT is scalable, then ContainerVT should be the same as VecVT.
2369 static std::pair<SDValue, SDValue>
2370 getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2371 const RISCVSubtarget &Subtarget) {
2372 if (VecVT.isFixedLengthVector())
2373 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2375 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2376 MVT XLenVT = Subtarget.getXLenVT();
2377 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
2378 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2382 // As above but assuming the given type is a scalable vector type.
2383 static std::pair<SDValue, SDValue>
2384 getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
2385 const RISCVSubtarget &Subtarget) {
2386 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2387 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
2390 SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
2391 SelectionDAG &DAG) const {
2392 assert(VecVT.isScalableVector() && "Expected scalable vector");
2393 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2394 VecVT.getVectorElementCount());
2397 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2398 // of either is (currently) supported. This can get us into an infinite loop
2399 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2401 // Until either (or both) of these can reliably lower any node, reporting that
2402 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2403 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2404 // which is not desirable.
2405 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2406 EVT VT, unsigned DefinedValues) const {
2410 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
2411 const RISCVSubtarget &Subtarget) {
2412 // RISC-V FP-to-int conversions saturate to the destination register size, but
2413 // don't produce 0 for nan. We can use a conversion instruction and fix the
2414 // nan case with a compare and a select.
2415 SDValue Src = Op.getOperand(0);
2417 MVT DstVT = Op.getSimpleValueType();
2418 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2420 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2422 if (!DstVT.isVector()) {
2423 // In absense of Zfh, promote f16 to f32, then saturate the result.
2424 if (Src.getSimpleValueType() == MVT::f16 &&
2425 !Subtarget.hasStdExtZfhOrZhinx()) {
2426 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2431 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2432 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2433 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
2436 // FIXME: Support other SatVTs by clamping before or after the conversion.
2439 SDValue FpToInt = DAG.getNode(
2440 Opc, DL, DstVT, Src,
2441 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
2443 if (Opc == RISCVISD::FCVT_WU_RV64)
2444 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2446 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2447 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2448 ISD::CondCode::SETUO);
2453 MVT DstEltVT = DstVT.getVectorElementType();
2454 MVT SrcVT = Src.getSimpleValueType();
2455 MVT SrcEltVT = SrcVT.getVectorElementType();
2456 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2457 unsigned DstEltSize = DstEltVT.getSizeInBits();
2459 // Only handle saturating to the destination type.
2460 if (SatVT != DstEltVT)
2463 // FIXME: Don't support narrowing by more than 1 steps for now.
2464 if (SrcEltSize > (2 * DstEltSize))
2467 MVT DstContainerVT = DstVT;
2468 MVT SrcContainerVT = SrcVT;
2469 if (DstVT.isFixedLengthVector()) {
2470 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2471 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2472 assert(DstContainerVT.getVectorElementCount() ==
2473 SrcContainerVT.getVectorElementCount() &&
2474 "Expected same element count");
2475 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2480 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2482 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2483 {Src, Src, DAG.getCondCode(ISD::SETNE),
2484 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2486 // Need to widen by more than 1 step, promote the FP type, then do a widening
2488 if (DstEltSize > (2 * SrcEltSize)) {
2489 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2490 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2491 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2495 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
2496 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2498 SDValue SplatZero = DAG.getNode(
2499 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2500 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2501 Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero,
2504 if (DstVT.isFixedLengthVector())
2505 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2510 static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
2512 case ISD::FROUNDEVEN:
2513 case ISD::STRICT_FROUNDEVEN:
2514 case ISD::VP_FROUNDEVEN:
2515 return RISCVFPRndMode::RNE;
2517 case ISD::STRICT_FTRUNC:
2518 case ISD::VP_FROUNDTOZERO:
2519 return RISCVFPRndMode::RTZ;
2521 case ISD::STRICT_FFLOOR:
2522 case ISD::VP_FFLOOR:
2523 return RISCVFPRndMode::RDN;
2525 case ISD::STRICT_FCEIL:
2527 return RISCVFPRndMode::RUP;
2529 case ISD::STRICT_FROUND:
2530 case ISD::VP_FROUND:
2531 return RISCVFPRndMode::RMM;
2533 return RISCVFPRndMode::DYN;
2536 return RISCVFPRndMode::Invalid;
2539 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2540 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2541 // the integer domain and back. Taking care to avoid converting values that are
2542 // nan or already correct.
2544 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2545 const RISCVSubtarget &Subtarget) {
2546 MVT VT = Op.getSimpleValueType();
2547 assert(VT.isVector() && "Unexpected type");
2551 SDValue Src = Op.getOperand(0);
2553 MVT ContainerVT = VT;
2554 if (VT.isFixedLengthVector()) {
2555 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2556 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2560 if (Op->isVPOpcode()) {
2561 Mask = Op.getOperand(1);
2562 if (VT.isFixedLengthVector())
2563 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2565 VL = Op.getOperand(2);
2567 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2570 // Freeze the source since we are increasing the number of uses.
2571 Src = DAG.getFreeze(Src);
2573 // We do the conversion on the absolute value and fix the sign at the end.
2574 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2576 // Determine the largest integer that can be represented exactly. This and
2577 // values larger than it don't have any fractional bits so don't need to
2579 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2580 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2581 APFloat MaxVal = APFloat(FltSem);
2582 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2583 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2584 SDValue MaxValNode =
2585 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2586 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2587 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2589 // If abs(Src) was larger than MaxVal or nan, keep it.
2590 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2592 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2593 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2596 // Truncate to integer and convert back to FP.
2597 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2598 MVT XLenVT = Subtarget.getXLenVT();
2601 switch (Op.getOpcode()) {
2603 llvm_unreachable("Unexpected opcode");
2607 case ISD::VP_FFLOOR:
2609 case ISD::FROUNDEVEN:
2610 case ISD::VP_FROUND:
2611 case ISD::VP_FROUNDEVEN:
2612 case ISD::VP_FROUNDTOZERO: {
2613 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2614 assert(FRM != RISCVFPRndMode::Invalid);
2615 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
2616 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
2620 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
2625 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
2627 case ISD::FNEARBYINT:
2628 case ISD::VP_FNEARBYINT:
2629 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
2634 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2635 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
2636 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
2639 // Restore the original sign so that -0.0 is preserved.
2640 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2641 Src, Src, Mask, VL);
2643 if (!VT.isFixedLengthVector())
2646 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2649 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
2650 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
2651 // qNan and coverting the new source to integer and back to FP.
2653 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2654 const RISCVSubtarget &Subtarget) {
2656 MVT VT = Op.getSimpleValueType();
2657 SDValue Chain = Op.getOperand(0);
2658 SDValue Src = Op.getOperand(1);
2660 MVT ContainerVT = VT;
2661 if (VT.isFixedLengthVector()) {
2662 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2663 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2666 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2668 // Freeze the source since we are increasing the number of uses.
2669 Src = DAG.getFreeze(Src);
2671 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
2672 MVT MaskVT = Mask.getSimpleValueType();
2673 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
2674 DAG.getVTList(MaskVT, MVT::Other),
2675 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
2676 DAG.getUNDEF(MaskVT), Mask, VL});
2677 Chain = Unorder.getValue(1);
2678 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
2679 DAG.getVTList(ContainerVT, MVT::Other),
2680 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
2681 Chain = Src.getValue(1);
2683 // We do the conversion on the absolute value and fix the sign at the end.
2684 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2686 // Determine the largest integer that can be represented exactly. This and
2687 // values larger than it don't have any fractional bits so don't need to
2689 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2690 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2691 APFloat MaxVal = APFloat(FltSem);
2692 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2693 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2694 SDValue MaxValNode =
2695 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2696 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2697 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2699 // If abs(Src) was larger than MaxVal or nan, keep it.
2701 RISCVISD::SETCC_VL, DL, MaskVT,
2702 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
2704 // Truncate to integer and convert back to FP.
2705 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2706 MVT XLenVT = Subtarget.getXLenVT();
2709 switch (Op.getOpcode()) {
2711 llvm_unreachable("Unexpected opcode");
2712 case ISD::STRICT_FCEIL:
2713 case ISD::STRICT_FFLOOR:
2714 case ISD::STRICT_FROUND:
2715 case ISD::STRICT_FROUNDEVEN: {
2716 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2717 assert(FRM != RISCVFPRndMode::Invalid);
2718 Truncated = DAG.getNode(
2719 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
2720 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
2723 case ISD::STRICT_FTRUNC:
2725 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
2726 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
2728 case ISD::STRICT_FNEARBYINT:
2729 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
2730 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
2734 Chain = Truncated.getValue(1);
2736 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2737 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
2738 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
2739 DAG.getVTList(ContainerVT, MVT::Other), Chain,
2740 Truncated, Mask, VL);
2741 Chain = Truncated.getValue(1);
2744 // Restore the original sign so that -0.0 is preserved.
2745 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2746 Src, Src, Mask, VL);
2748 if (VT.isFixedLengthVector())
2749 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2750 return DAG.getMergeValues({Truncated, Chain}, DL);
2754 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2755 const RISCVSubtarget &Subtarget) {
2756 MVT VT = Op.getSimpleValueType();
2758 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
2760 if (DAG.shouldOptForSize())
2764 SDValue Src = Op.getOperand(0);
2766 // Create an integer the size of the mantissa with the MSB set. This and all
2767 // values larger than it don't have any fractional bits so don't need to be
2769 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
2770 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2771 APFloat MaxVal = APFloat(FltSem);
2772 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2773 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2774 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
2776 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2777 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
2778 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
2782 getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
2783 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
2784 SDValue Offset, SDValue Mask, SDValue VL,
2785 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
2786 if (Merge.isUndef())
2787 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
2788 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
2789 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
2790 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
2794 getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
2795 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
2797 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
2798 if (Merge.isUndef())
2799 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
2800 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
2801 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
2802 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
2805 struct VIDSequence {
2806 int64_t StepNumerator;
2807 unsigned StepDenominator;
2811 static std::optional<uint64_t> getExactInteger(const APFloat &APF,
2812 uint32_t BitWidth) {
2813 APSInt ValInt(BitWidth, !APF.isNegative());
2814 // We use an arbitrary rounding mode here. If a floating-point is an exact
2815 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
2816 // the rounding mode changes the output value, then it is not an exact
2818 RoundingMode ArbitraryRM = RoundingMode::TowardZero;
2820 // If it is out of signed integer range, it will return an invalid operation.
2821 // If it is not an exact integer, IsExact is false.
2822 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
2823 APFloatBase::opInvalidOp) ||
2825 return std::nullopt;
2826 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
2829 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
2830 // to the (non-zero) step S and start value X. This can be then lowered as the
2831 // RVV sequence (VID * S) + X, for example.
2832 // The step S is represented as an integer numerator divided by a positive
2833 // denominator. Note that the implementation currently only identifies
2834 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
2835 // cannot detect 2/3, for example.
2836 // Note that this method will also match potentially unappealing index
2837 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
2838 // determine whether this is worth generating code for.
2839 static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
2840 unsigned NumElts = Op.getNumOperands();
2841 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
2842 bool IsInteger = Op.getValueType().isInteger();
2844 std::optional<unsigned> SeqStepDenom;
2845 std::optional<int64_t> SeqStepNum, SeqAddend;
2846 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
2847 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
2848 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
2849 // Assume undef elements match the sequence; we just have to be careful
2850 // when interpolating across them.
2851 if (Op.getOperand(Idx).isUndef())
2856 // The BUILD_VECTOR must be all constants.
2857 if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
2858 return std::nullopt;
2859 Val = Op.getConstantOperandVal(Idx) &
2860 maskTrailingOnes<uint64_t>(EltSizeInBits);
2862 // The BUILD_VECTOR must be all constants.
2863 if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
2864 return std::nullopt;
2865 if (auto ExactInteger = getExactInteger(
2866 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
2868 Val = *ExactInteger;
2870 return std::nullopt;
2874 // Calculate the step since the last non-undef element, and ensure
2875 // it's consistent across the entire sequence.
2876 unsigned IdxDiff = Idx - PrevElt->second;
2877 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
2879 // A zero-value value difference means that we're somewhere in the middle
2880 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
2881 // step change before evaluating the sequence.
2885 int64_t Remainder = ValDiff % IdxDiff;
2886 // Normalize the step if it's greater than 1.
2887 if (Remainder != ValDiff) {
2888 // The difference must cleanly divide the element span.
2890 return std::nullopt;
2896 SeqStepNum = ValDiff;
2897 else if (ValDiff != SeqStepNum)
2898 return std::nullopt;
2901 SeqStepDenom = IdxDiff;
2902 else if (IdxDiff != *SeqStepDenom)
2903 return std::nullopt;
2906 // Record this non-undef element for later.
2907 if (!PrevElt || PrevElt->first != Val)
2908 PrevElt = std::make_pair(Val, Idx);
2911 // We need to have logged a step for this to count as a legal index sequence.
2912 if (!SeqStepNum || !SeqStepDenom)
2913 return std::nullopt;
2915 // Loop back through the sequence and validate elements we might have skipped
2916 // while waiting for a valid step. While doing this, log any sequence addend.
2917 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
2918 if (Op.getOperand(Idx).isUndef())
2922 Val = Op.getConstantOperandVal(Idx) &
2923 maskTrailingOnes<uint64_t>(EltSizeInBits);
2925 Val = *getExactInteger(
2926 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
2929 uint64_t ExpectedVal =
2930 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
2931 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
2934 else if (Addend != SeqAddend)
2935 return std::nullopt;
2938 assert(SeqAddend && "Must have an addend if we have a step");
2940 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
2943 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
2944 // and lower it as a VRGATHER_VX_VL from the source vector.
2945 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
2947 const RISCVSubtarget &Subtarget) {
2948 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2950 SDValue Vec = SplatVal.getOperand(0);
2951 // Only perform this optimization on vectors of the same size for simplicity.
2952 // Don't perform this optimization for i1 vectors.
2953 // FIXME: Support i1 vectors, maybe by promoting to i8?
2954 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
2956 SDValue Idx = SplatVal.getOperand(1);
2957 // The index must be a legal type.
2958 if (Idx.getValueType() != Subtarget.getXLenVT())
2961 MVT ContainerVT = VT;
2962 if (VT.isFixedLengthVector()) {
2963 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2964 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2967 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2969 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
2970 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
2972 if (!VT.isFixedLengthVector())
2975 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2978 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
2979 const RISCVSubtarget &Subtarget) {
2980 MVT VT = Op.getSimpleValueType();
2981 assert(VT.isFixedLengthVector() && "Unexpected vector!");
2983 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2986 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2988 MVT XLenVT = Subtarget.getXLenVT();
2989 unsigned NumElts = Op.getNumOperands();
2991 if (VT.getVectorElementType() == MVT::i1) {
2992 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
2993 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
2994 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
2997 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
2998 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
2999 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3002 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3003 // scalar integer chunks whose bit-width depends on the number of mask
3005 // First, determine the most appropriate scalar integer type to use. This
3006 // is at most XLenVT, but may be shrunk to a smaller vector element type
3007 // according to the size of the final vector - use i8 chunks rather than
3008 // XLenVT if we're producing a v8i1. This results in more consistent
3009 // codegen across RV32 and RV64.
3010 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3011 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
3012 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
3013 // If we have to use more than one INSERT_VECTOR_ELT then this
3014 // optimization is likely to increase code size; avoid peforming it in
3015 // such a case. We can use a load from a constant pool in this case.
3016 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3018 // Now we can create our integer vector type. Note that it may be larger
3019 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3020 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3021 MVT IntegerViaVecVT =
3022 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3026 unsigned BitPos = 0, IntegerEltIdx = 0;
3027 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3029 for (unsigned I = 0; I < NumElts;) {
3030 SDValue V = Op.getOperand(I);
3031 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
3032 Bits |= ((uint64_t)BitValue << BitPos);
3036 // Once we accumulate enough bits to fill our scalar type or process the
3037 // last element, insert into our vector and clear our accumulated data.
3038 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3039 if (NumViaIntegerBits <= 32)
3040 Bits = SignExtend64<32>(Bits);
3041 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3042 Elts[IntegerEltIdx] = Elt;
3049 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3051 if (NumElts < NumViaIntegerBits) {
3052 // If we're producing a smaller vector than our minimum legal integer
3053 // type, bitcast to the equivalent (known-legal) mask type, and extract
3055 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3056 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3057 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3058 DAG.getConstant(0, DL, XLenVT));
3060 // Else we must have produced an integer type with the same size as the
3061 // mask type; bitcast for the final result.
3062 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3063 Vec = DAG.getBitcast(VT, Vec);
3069 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3070 // vector type, we have a legal equivalently-sized i8 type, so we can use
3072 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3073 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3076 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3077 // For a splat, perform a scalar truncate before creating the wider
3079 assert(Splat.getValueType() == XLenVT &&
3080 "Unexpected type for i1 splat value");
3081 Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
3082 DAG.getConstant(1, DL, XLenVT));
3083 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3085 SmallVector<SDValue, 8> Ops(Op->op_values());
3086 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3087 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3088 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3091 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3094 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3095 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3097 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3098 : RISCVISD::VMV_V_X_VL;
3100 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3101 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3104 // Try and match index sequences, which we can lower to the vid instruction
3105 // with optional modifications. An all-undef vector is matched by
3106 // getSplatValue, above.
3107 if (auto SimpleVID = isSimpleVIDSequence(Op)) {
3108 int64_t StepNumerator = SimpleVID->StepNumerator;
3109 unsigned StepDenominator = SimpleVID->StepDenominator;
3110 int64_t Addend = SimpleVID->Addend;
3112 assert(StepNumerator != 0 && "Invalid step");
3113 bool Negate = false;
3114 int64_t SplatStepVal = StepNumerator;
3115 unsigned StepOpcode = ISD::MUL;
3116 if (StepNumerator != 1) {
3117 if (isPowerOf2_64(std::abs(StepNumerator))) {
3118 Negate = StepNumerator < 0;
3119 StepOpcode = ISD::SHL;
3120 SplatStepVal = Log2_64(std::abs(StepNumerator));
3124 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3125 // threshold since it's the immediate value many RVV instructions accept.
3126 // There is no vmul.vi instruction so ensure multiply constant can fit in
3127 // a single addi instruction.
3128 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3129 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3130 isPowerOf2_32(StepDenominator) &&
3131 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3133 VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3134 MVT VIDContainerVT =
3135 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3136 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3137 // Convert right out of the scalable type so we can use standard ISD
3138 // nodes for the rest of the computation. If we used scalable types with
3139 // these, we'd lose the fixed-length vector info and generate worse
3141 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3142 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3143 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3144 SDValue SplatStep = DAG.getSplatBuildVector(
3145 VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
3146 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3148 if (StepDenominator != 1) {
3149 SDValue SplatStep = DAG.getSplatBuildVector(
3150 VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
3151 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3153 if (Addend != 0 || Negate) {
3154 SDValue SplatAddend = DAG.getSplatBuildVector(
3155 VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT));
3156 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3159 if (VT.isFloatingPoint()) {
3160 // TODO: Use vfwcvt to reduce register pressure.
3161 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3167 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3168 // when re-interpreted as a vector with a larger element type. For example,
3169 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3170 // could be instead splat as
3171 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3172 // TODO: This optimization could also work on non-constant splats, but it
3173 // would require bit-manipulation instructions to construct the splat value.
3174 SmallVector<SDValue> Sequence;
3175 unsigned EltBitSize = VT.getScalarSizeInBits();
3176 const auto *BV = cast<BuildVectorSDNode>(Op);
3177 if (VT.isInteger() && EltBitSize < 64 &&
3178 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
3179 BV->getRepeatedSequence(Sequence) &&
3180 (Sequence.size() * EltBitSize) <= 64) {
3181 unsigned SeqLen = Sequence.size();
3182 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3183 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
3184 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3185 ViaIntVT == MVT::i64) &&
3186 "Unexpected sequence type");
3188 unsigned EltIdx = 0;
3189 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3190 uint64_t SplatValue = 0;
3191 // Construct the amalgamated value which can be splatted as this larger
3193 for (const auto &SeqV : Sequence) {
3194 if (!SeqV.isUndef())
3195 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3196 << (EltIdx * EltBitSize));
3200 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3201 // achieve better constant materializion.
3202 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3203 SplatValue = SignExtend64<32>(SplatValue);
3205 // Since we can't introduce illegal i64 types at this stage, we can only
3206 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3207 // way we can use RVV instructions to splat.
3208 assert((ViaIntVT.bitsLE(XLenVT) ||
3209 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3210 "Unexpected bitcast sequence");
3211 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3213 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3214 MVT ViaContainerVT =
3215 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3217 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3218 DAG.getUNDEF(ViaContainerVT),
3219 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3220 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3221 return DAG.getBitcast(VT, Splat);
3225 // Try and optimize BUILD_VECTORs with "dominant values" - these are values
3226 // which constitute a large proportion of the elements. In such cases we can
3227 // splat a vector with the dominant element and make up the shortfall with
3228 // INSERT_VECTOR_ELTs.
3229 // Note that this includes vectors of 2 elements by association. The
3230 // upper-most element is the "dominant" one, allowing us to use a splat to
3231 // "insert" the upper element, and an insert of the lower element at position
3232 // 0, which improves codegen.
3233 SDValue DominantValue;
3234 unsigned MostCommonCount = 0;
3235 DenseMap<SDValue, unsigned> ValueCounts;
3236 unsigned NumUndefElts =
3237 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3239 // Track the number of scalar loads we know we'd be inserting, estimated as
3240 // any non-zero floating-point constant. Other kinds of element are either
3241 // already in registers or are materialized on demand. The threshold at which
3242 // a vector load is more desirable than several scalar materializion and
3243 // vector-insertion instructions is not known.
3244 unsigned NumScalarLoads = 0;
3246 for (SDValue V : Op->op_values()) {
3250 ValueCounts.insert(std::make_pair(V, 0));
3251 unsigned &Count = ValueCounts[V];
3253 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3254 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3256 // Is this value dominant? In case of a tie, prefer the highest element as
3257 // it's cheaper to insert near the beginning of a vector than it is at the
3259 if (++Count >= MostCommonCount) {
3261 MostCommonCount = Count;
3265 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3266 unsigned NumDefElts = NumElts - NumUndefElts;
3267 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3269 // Don't perform this optimization when optimizing for size, since
3270 // materializing elements and inserting them tends to cause code bloat.
3271 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3272 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3273 ((MostCommonCount > DominantValueCountThreshold) ||
3274 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3275 // Start by splatting the most common element.
3276 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3278 DenseSet<SDValue> Processed{DominantValue};
3279 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3280 for (const auto &OpIdx : enumerate(Op->ops())) {
3281 const SDValue &V = OpIdx.value();
3282 if (V.isUndef() || !Processed.insert(V).second)
3284 if (ValueCounts[V] == 1) {
3285 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3286 DAG.getConstant(OpIdx.index(), DL, XLenVT));
3288 // Blend in all instances of this value using a VSELECT, using a
3289 // mask where each bit signals whether that element is the one
3291 SmallVector<SDValue> Ops;
3292 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3293 return DAG.getConstant(V == V1, DL, XLenVT);
3295 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3296 DAG.getBuildVector(SelMaskTy, DL, Ops),
3297 DAG.getSplatBuildVector(VT, DL, V), Vec);
3304 // For constant vectors, use generic constant pool lowering. Otherwise,
3305 // we'd have to materialize constants in GPRs just to move them into the
3307 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3308 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
3311 assert((!VT.isFloatingPoint() ||
3312 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
3313 "Illegal type which will result in reserved encoding");
3315 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3317 SDValue Vec = DAG.getUNDEF(ContainerVT);
3318 unsigned UndefCount = 0;
3319 for (const SDValue &V : Op->ops()) {
3325 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3326 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3327 Vec, Offset, Mask, VL, Policy);
3331 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3332 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3336 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3337 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3338 Vec, Offset, Mask, VL, Policy);
3340 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3343 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3344 SDValue Lo, SDValue Hi, SDValue VL,
3345 SelectionDAG &DAG) {
3347 Passthru = DAG.getUNDEF(VT);
3348 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3349 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3350 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3351 // If Hi constant is all the same sign bit as Lo, lower this as a custom
3352 // node in order to try and match RVV vector/scalar instructions.
3353 if ((LoC >> 31) == HiC)
3354 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3356 // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
3357 // vmv.v.x whose EEW = 32 to lower it.
3358 if (LoC == HiC && isAllOnesConstant(VL)) {
3359 MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
3360 // TODO: if vl <= min(VLMAX), we can also do this. But we could not
3361 // access the subtarget here now.
3362 auto InterVec = DAG.getNode(
3363 RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
3364 DAG.getRegister(RISCV::X0, MVT::i32));
3365 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
3369 // Fall back to a stack store and stride x0 vector load.
3370 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
3374 // Called by type legalization to handle splat of i64 on RV32.
3375 // FIXME: We can optimize this when the type has sign or zero bits in one
3377 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3378 SDValue Scalar, SDValue VL,
3379 SelectionDAG &DAG) {
3380 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
3382 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
3383 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
3386 // This function lowers a splat of a scalar operand Splat with the vector
3387 // length VL. It ensures the final sequence is type legal, which is useful when
3388 // lowering a splat after type legalization.
3389 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
3390 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
3391 const RISCVSubtarget &Subtarget) {
3392 bool HasPassthru = Passthru && !Passthru.isUndef();
3393 if (!HasPassthru && !Passthru)
3394 Passthru = DAG.getUNDEF(VT);
3395 if (VT.isFloatingPoint()) {
3396 // If VL is 1, we could use vfmv.s.f.
3397 if (isOneConstant(VL))
3398 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
3399 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
3402 MVT XLenVT = Subtarget.getXLenVT();
3404 // Simplest case is that the operand needs to be promoted to XLenVT.
3405 if (Scalar.getValueType().bitsLE(XLenVT)) {
3406 // If the operand is a constant, sign extend to increase our chances
3407 // of being able to use a .vi instruction. ANY_EXTEND would become a
3408 // a zero extend and the simm5 check in isel would fail.
3409 // FIXME: Should we ignore the upper bits in isel instead?
3411 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3412 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3413 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
3414 // If VL is 1 and the scalar value won't benefit from immediate, we could
3416 if (isOneConstant(VL) &&
3417 (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
3418 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
3419 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
3422 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
3423 "Unexpected scalar for splat lowering!");
3425 if (isOneConstant(VL) && isNullConstant(Scalar))
3426 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
3427 DAG.getConstant(0, DL, XLenVT), VL);
3429 // Otherwise use the more complicated splatting algorithm.
3430 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
3433 static MVT getLMUL1VT(MVT VT) {
3434 assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3435 "Unexpected vector MVT");
3436 return MVT::getScalableVectorVT(
3437 VT.getVectorElementType(),
3438 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
3441 // This function lowers an insert of a scalar operand Scalar into lane
3442 // 0 of the vector regardless of the value of VL. The contents of the
3443 // remaining lanes of the result vector are unspecified. VL is assumed
3445 static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
3446 const SDLoc &DL, SelectionDAG &DAG,
3447 const RISCVSubtarget &Subtarget) {
3448 const MVT XLenVT = Subtarget.getXLenVT();
3450 SDValue Passthru = DAG.getUNDEF(VT);
3451 if (VT.isFloatingPoint()) {
3452 // TODO: Use vmv.v.i for appropriate constants
3453 // Use M1 or smaller to avoid over constraining register allocation
3454 const MVT M1VT = getLMUL1VT(VT);
3455 auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
3456 SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT,
3457 DAG.getUNDEF(InnerVT), Scalar, VL);
3459 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3461 Result, DAG.getConstant(0, DL, XLenVT));
3466 // Avoid the tricky legalization cases by falling back to using the
3467 // splat code which already handles it gracefully.
3468 if (!Scalar.getValueType().bitsLE(XLenVT))
3469 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
3470 DAG.getConstant(1, DL, XLenVT),
3471 VT, DL, DAG, Subtarget);
3473 // If the operand is a constant, sign extend to increase our chances
3474 // of being able to use a .vi instruction. ANY_EXTEND would become a
3475 // a zero extend and the simm5 check in isel would fail.
3476 // FIXME: Should we ignore the upper bits in isel instead?
3478 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3479 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3480 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
3481 // higher would involve overly constraining the register allocator for
3483 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar)) {
3484 if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) &&
3485 VT.bitsLE(getLMUL1VT(VT)))
3486 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
3488 // Use M1 or smaller to avoid over constraining register allocation
3489 const MVT M1VT = getLMUL1VT(VT);
3490 auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
3491 SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT,
3492 DAG.getUNDEF(InnerVT), Scalar, VL);
3494 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3496 Result, DAG.getConstant(0, DL, XLenVT));
3500 // Is this a shuffle extracts either the even or odd elements of a vector?
3501 // That is, specifically, either (a) or (b) below.
3502 // t34: v8i8 = extract_subvector t11, Constant:i64<0>
3503 // t33: v8i8 = extract_subvector t11, Constant:i64<8>
3504 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
3505 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
3506 // Returns {Src Vector, Even Elements} om success
3507 static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
3508 SDValue V2, ArrayRef<int> Mask,
3509 const RISCVSubtarget &Subtarget) {
3510 // Need to be able to widen the vector.
3511 if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
3514 // Both input must be extracts.
3515 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
3516 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
3519 // Extracting from the same source.
3520 SDValue Src = V1.getOperand(0);
3521 if (Src != V2.getOperand(0))
3524 // Src needs to have twice the number of elements.
3525 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
3528 // The extracts must extract the two halves of the source.
3529 if (V1.getConstantOperandVal(1) != 0 ||
3530 V2.getConstantOperandVal(1) != Mask.size())
3533 // First index must be the first even or odd element from V1.
3534 if (Mask[0] != 0 && Mask[0] != 1)
3537 // The others must increase by 2 each time.
3538 // TODO: Support undef elements?
3539 for (unsigned i = 1; i != Mask.size(); ++i)
3540 if (Mask[i] != Mask[i - 1] + 2)
3546 /// Is this shuffle interleaving contiguous elements from one vector into the
3547 /// even elements and contiguous elements from another vector into the odd
3548 /// elements. \p EvenSrc will contain the element that should be in the first
3549 /// even element. \p OddSrc will contain the element that should be in the first
3550 /// odd element. These can be the first element in a source or the element half
3551 /// way through the source.
3552 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
3553 int &OddSrc, const RISCVSubtarget &Subtarget) {
3554 // We need to be able to widen elements to the next larger integer type.
3555 if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
3558 int Size = Mask.size();
3559 int NumElts = VT.getVectorNumElements();
3560 assert(Size == (int)NumElts && "Unexpected mask size");
3562 SmallVector<unsigned, 2> StartIndexes;
3563 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
3566 EvenSrc = StartIndexes[0];
3567 OddSrc = StartIndexes[1];
3569 // One source should be low half of first vector.
3570 if (EvenSrc != 0 && OddSrc != 0)
3573 // Subvectors will be subtracted from either at the start of the two input
3574 // vectors, or at the start and middle of the first vector if it's an unary
3576 // In both cases, HalfNumElts will be extracted.
3577 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
3578 // we'll create an illegal extract_subvector.
3579 // FIXME: We could support other values using a slidedown first.
3580 int HalfNumElts = NumElts / 2;
3581 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
3584 /// Match shuffles that concatenate two vectors, rotate the concatenation,
3585 /// and then extract the original number of elements from the rotated result.
3586 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
3587 /// returned rotation amount is for a rotate right, where elements move from
3588 /// higher elements to lower elements. \p LoSrc indicates the first source
3589 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
3590 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
3591 /// 0 or 1 if a rotation is found.
3593 /// NOTE: We talk about rotate to the right which matches how bit shift and
3594 /// rotate instructions are described where LSBs are on the right, but LLVM IR
3595 /// and the table below write vectors with the lowest elements on the left.
3596 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
3597 int Size = Mask.size();
3599 // We need to detect various ways of spelling a rotation:
3600 // [11, 12, 13, 14, 15, 0, 1, 2]
3601 // [-1, 12, 13, 14, -1, -1, 1, -1]
3602 // [-1, -1, -1, -1, -1, -1, 1, 2]
3603 // [ 3, 4, 5, 6, 7, 8, 9, 10]
3604 // [-1, 4, 5, 6, -1, -1, 9, -1]
3605 // [-1, 4, 5, 6, -1, -1, -1, -1]
3609 for (int i = 0; i != Size; ++i) {
3614 // Determine where a rotate vector would have started.
3615 int StartIdx = i - (M % Size);
3616 // The identity rotation isn't interesting, stop.
3620 // If we found the tail of a vector the rotation must be the missing
3621 // front. If we found the head of a vector, it must be how much of the
3623 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
3626 Rotation = CandidateRotation;
3627 else if (Rotation != CandidateRotation)
3628 // The rotations don't match, so we can't match this mask.
3631 // Compute which value this mask is pointing at.
3632 int MaskSrc = M < Size ? 0 : 1;
3634 // Compute which of the two target values this index should be assigned to.
3635 // This reflects whether the high elements are remaining or the low elemnts
3637 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
3639 // Either set up this value if we've not encountered it before, or check
3640 // that it remains consistent.
3642 TargetSrc = MaskSrc;
3643 else if (TargetSrc != MaskSrc)
3644 // This may be a rotation, but it pulls from the inputs in some
3645 // unsupported interleaving.
3649 // Check that we successfully analyzed the mask, and normalize the results.
3650 assert(Rotation != 0 && "Failed to locate a viable rotation!");
3651 assert((LoSrc >= 0 || HiSrc >= 0) &&
3652 "Failed to find a rotated input vector!");
3657 // Lower a deinterleave shuffle to vnsrl.
3658 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
3659 // -> [p, q, r, s] (EvenElts == false)
3660 // VT is the type of the vector to return, <[vscale x ]n x ty>
3661 // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
3662 static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,
3664 const RISCVSubtarget &Subtarget,
3665 SelectionDAG &DAG) {
3666 // The result is a vector of type <m x n x ty>
3667 MVT ContainerVT = VT;
3668 // Convert fixed vectors to scalable if needed
3669 if (ContainerVT.isFixedLengthVector()) {
3670 assert(Src.getSimpleValueType().isFixedLengthVector());
3671 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
3673 // The source is a vector of type <m x n*2 x ty>
3674 MVT SrcContainerVT =
3675 MVT::getVectorVT(ContainerVT.getVectorElementType(),
3676 ContainerVT.getVectorElementCount() * 2);
3677 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3680 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3682 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
3683 // This also converts FP to int.
3684 unsigned EltBits = ContainerVT.getScalarSizeInBits();
3685 MVT WideSrcContainerVT = MVT::getVectorVT(
3686 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
3687 Src = DAG.getBitcast(WideSrcContainerVT, Src);
3689 // The integer version of the container type.
3690 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
3692 // If we want even elements, then the shift amount is 0. Otherwise, shift by
3693 // the original element size.
3694 unsigned Shift = EvenElts ? 0 : EltBits;
3695 SDValue SplatShift = DAG.getNode(
3696 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
3697 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
3699 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
3700 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
3701 // Cast back to FP if needed.
3702 Res = DAG.getBitcast(ContainerVT, Res);
3704 if (VT.isFixedLengthVector())
3705 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
3709 // Lower the following shuffle to vslidedown.
3711 // t49: v8i8 = extract_subvector t13, Constant:i64<0>
3712 // t109: v8i8 = extract_subvector t13, Constant:i64<8>
3713 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
3715 // t69: v16i16 = extract_subvector t68, Constant:i64<0>
3716 // t23: v8i16 = extract_subvector t69, Constant:i64<0>
3717 // t29: v4i16 = extract_subvector t23, Constant:i64<4>
3718 // t26: v8i16 = extract_subvector t69, Constant:i64<8>
3719 // t30: v4i16 = extract_subvector t26, Constant:i64<0>
3720 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
3721 static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,
3722 SDValue V1, SDValue V2,
3724 const RISCVSubtarget &Subtarget,
3725 SelectionDAG &DAG) {
3726 auto findNonEXTRACT_SUBVECTORParent =
3727 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
3728 uint64_t Offset = 0;
3729 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
3730 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
3731 // a scalable vector. But we don't want to match the case.
3732 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
3733 Offset += Parent.getConstantOperandVal(1);
3734 Parent = Parent.getOperand(0);
3736 return std::make_pair(Parent, Offset);
3739 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
3740 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
3742 // Extracting from the same source.
3743 SDValue Src = V1Src;
3747 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
3748 SmallVector<int, 16> NewMask(Mask);
3749 for (size_t i = 0; i != NewMask.size(); ++i) {
3750 if (NewMask[i] == -1)
3753 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
3754 NewMask[i] = NewMask[i] + V1IndexOffset;
3756 // Minus NewMask.size() is needed. Otherwise, the b case would be
3757 // <5,6,7,12> instead of <5,6,7,8>.
3758 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
3762 // First index must be known and non-zero. It will be used as the slidedown
3764 if (NewMask[0] <= 0)
3767 // NewMask is also continuous.
3768 for (unsigned i = 1; i != NewMask.size(); ++i)
3769 if (NewMask[i - 1] + 1 != NewMask[i])
3772 MVT XLenVT = Subtarget.getXLenVT();
3773 MVT SrcVT = Src.getSimpleValueType();
3774 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3775 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
3777 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3778 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
3779 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
3781 ISD::EXTRACT_SUBVECTOR, DL, VT,
3782 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
3783 DAG.getConstant(0, DL, XLenVT));
3786 // Because vslideup leaves the destination elements at the start intact, we can
3787 // use it to perform shuffles that insert subvectors:
3789 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
3791 // vsetvli zero, 8, e8, mf2, ta, ma
3792 // vslideup.vi v8, v9, 4
3794 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
3796 // vsetvli zero, 5, e8, mf2, tu, ma
3797 // vslideup.v1 v8, v9, 2
3798 static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
3799 SDValue V1, SDValue V2,
3801 const RISCVSubtarget &Subtarget,
3802 SelectionDAG &DAG) {
3803 unsigned NumElts = VT.getVectorNumElements();
3804 int NumSubElts, Index;
3805 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
3809 bool OpsSwapped = Mask[Index] < (int)NumElts;
3810 SDValue InPlace = OpsSwapped ? V2 : V1;
3811 SDValue ToInsert = OpsSwapped ? V1 : V2;
3813 MVT XLenVT = Subtarget.getXLenVT();
3814 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3815 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
3816 // We slide up by the index that the subvector is being inserted at, and set
3817 // VL to the index + the number of elements being inserted.
3818 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC;
3819 // If the we're adding a suffix to the in place vector, i.e. inserting right
3820 // up to the very end of it, then we don't actually care about the tail.
3821 if (NumSubElts + Index >= (int)NumElts)
3822 Policy |= RISCVII::TAIL_AGNOSTIC;
3824 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
3825 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
3826 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
3829 // If we're inserting into the lowest elements, use a tail undisturbed
3832 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
3835 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
3836 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
3837 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3840 /// Match v(f)slide1up/down idioms. These operations involve sliding
3841 /// N-1 elements to make room for an inserted scalar at one end.
3842 static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
3843 SDValue V1, SDValue V2,
3845 const RISCVSubtarget &Subtarget,
3846 SelectionDAG &DAG) {
3847 bool OpsSwapped = false;
3848 if (!isa<BuildVectorSDNode>(V1)) {
3849 if (!isa<BuildVectorSDNode>(V2))
3854 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
3858 // Return true if the mask could describe a slide of Mask.size() - 1
3859 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
3860 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
3861 const unsigned S = (Offset > 0) ? 0 : -Offset;
3862 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
3863 for (unsigned i = S; i != E; ++i)
3864 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
3869 const unsigned NumElts = VT.getVectorNumElements();
3870 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
3871 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
3874 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
3875 // Inserted lane must come from splat, undef scalar is legal but not profitable.
3876 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
3879 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3880 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3881 auto OpCode = IsVSlidedown ?
3882 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
3883 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
3884 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
3885 DAG.getUNDEF(ContainerVT),
3886 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
3887 Splat, TrueMask, VL);
3888 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3891 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
3892 // to create an interleaved vector of <[vscale x] n*2 x ty>.
3893 // This requires that the size of ty is less than the subtarget's maximum ELEN.
3894 static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
3895 const SDLoc &DL, SelectionDAG &DAG,
3896 const RISCVSubtarget &Subtarget) {
3897 MVT VecVT = EvenV.getSimpleValueType();
3898 MVT VecContainerVT = VecVT; // <vscale x n x ty>
3899 // Convert fixed vectors to scalable if needed
3900 if (VecContainerVT.isFixedLengthVector()) {
3901 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
3902 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
3903 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
3906 assert(VecVT.getScalarSizeInBits() < Subtarget.getELEN());
3908 // We're working with a vector of the same size as the resulting
3909 // interleaved vector, but with half the number of elements and
3910 // twice the SEW (Hence the restriction on not using the maximum
3913 MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),
3914 VecVT.getVectorElementCount());
3915 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
3916 if (WideContainerVT.isFixedLengthVector())
3917 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
3919 // Bitcast the input vectors to integers in case they are FP
3920 VecContainerVT = VecContainerVT.changeTypeToInteger();
3921 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
3922 OddV = DAG.getBitcast(VecContainerVT, OddV);
3924 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
3925 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
3927 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
3929 SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT,
3930 EvenV, OddV, Passthru, Mask, VL);
3932 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
3933 SDValue AllOnesVec = DAG.getSplatVector(
3934 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
3935 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, OddV,
3936 AllOnesVec, Passthru, Mask, VL);
3938 // Add the two together so we get
3939 // (OddV * 0xff...ff) + (OddV + EvenV)
3940 // = (OddV * 0x100...00) + EvenV
3941 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
3942 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
3943 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, Interleaved,
3944 OddsMul, Passthru, Mask, VL);
3946 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
3947 MVT ResultContainerVT = MVT::getVectorVT(
3948 VecVT.getVectorElementType(), // Make sure to use original type
3949 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
3950 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
3952 // Convert back to a fixed vector if needed
3954 MVT::getVectorVT(VecVT.getVectorElementType(),
3955 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
3956 if (ResultVT.isFixedLengthVector())
3958 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
3963 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
3964 const RISCVSubtarget &Subtarget) {
3965 SDValue V1 = Op.getOperand(0);
3966 SDValue V2 = Op.getOperand(1);
3968 MVT XLenVT = Subtarget.getXLenVT();
3969 MVT VT = Op.getSimpleValueType();
3970 unsigned NumElts = VT.getVectorNumElements();
3971 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
3973 // Promote i1 shuffle to i8 shuffle.
3974 if (VT.getVectorElementType() == MVT::i1) {
3975 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
3976 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
3977 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
3978 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
3979 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
3980 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
3984 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3986 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3988 if (SVN->isSplat()) {
3989 const int Lane = SVN->getSplatIndex();
3991 MVT SVT = VT.getVectorElementType();
3993 // Turn splatted vector load into a strided load with an X0 stride.
3995 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
3997 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
3999 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4001 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4002 V = V.getOperand(Offset / OpElements);
4003 Offset %= OpElements;
4006 // We need to ensure the load isn't atomic or volatile.
4007 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4008 auto *Ld = cast<LoadSDNode>(V);
4009 Offset *= SVT.getStoreSize();
4010 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
4011 TypeSize::Fixed(Offset), DL);
4013 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4014 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4015 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4017 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4018 SDValue Ops[] = {Ld->getChain(),
4020 DAG.getUNDEF(ContainerVT),
4022 DAG.getRegister(RISCV::X0, XLenVT),
4024 SDValue NewLoad = DAG.getMemIntrinsicNode(
4025 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4026 DAG.getMachineFunction().getMachineMemOperand(
4027 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4028 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4029 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4032 // Otherwise use a scalar load and splat. This will give the best
4033 // opportunity to fold a splat into the operation. ISel can turn it into
4034 // the x0 strided load if we aren't able to fold away the select.
4035 if (SVT.isFloatingPoint())
4036 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4037 Ld->getPointerInfo().getWithOffset(Offset),
4038 Ld->getOriginalAlign(),
4039 Ld->getMemOperand()->getFlags());
4041 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4042 Ld->getPointerInfo().getWithOffset(Offset), SVT,
4043 Ld->getOriginalAlign(),
4044 Ld->getMemOperand()->getFlags());
4045 DAG.makeEquivalentMemoryOrdering(Ld, V);
4048 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4050 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4051 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4054 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4055 assert(Lane < (int)NumElts && "Unexpected lane!");
4056 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4057 V1, DAG.getConstant(Lane, DL, XLenVT),
4058 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4059 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4063 ArrayRef<int> Mask = SVN->getMask();
4066 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
4070 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
4073 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
4074 // be undef which can be handled with a single SLIDEDOWN/UP.
4076 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
4080 LoV = LoSrc == 0 ? V1 : V2;
4081 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
4084 HiV = HiSrc == 0 ? V1 : V2;
4085 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
4088 // We found a rotation. We need to slide HiV down by Rotation. Then we need
4089 // to slide LoV up by (NumElts - Rotation).
4090 unsigned InvRotate = NumElts - Rotation;
4092 SDValue Res = DAG.getUNDEF(ContainerVT);
4094 // Even though we could use a smaller VL, don't to avoid a vsetivli
4096 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
4097 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
4100 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
4101 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
4102 RISCVII::TAIL_AGNOSTIC);
4104 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4107 // If this is a deinterleave and we can widen the vector, then we can use
4108 // vnsrl to deinterleave.
4109 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
4110 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
4115 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
4118 // Detect an interleave shuffle and lower to
4119 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
4120 int EvenSrc, OddSrc;
4121 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
4122 // Extract the halves of the vectors.
4123 MVT HalfVT = VT.getHalfNumVectorElementsVT();
4125 int Size = Mask.size();
4126 SDValue EvenV, OddV;
4127 assert(EvenSrc >= 0 && "Undef source?");
4128 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
4129 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
4130 DAG.getConstant(EvenSrc % Size, DL, XLenVT));
4132 assert(OddSrc >= 0 && "Undef source?");
4133 OddV = (OddSrc / Size) == 0 ? V1 : V2;
4134 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
4135 DAG.getConstant(OddSrc % Size, DL, XLenVT));
4137 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
4140 // Detect shuffles which can be re-expressed as vector selects; these are
4141 // shuffles in which each element in the destination is taken from an element
4142 // at the corresponding index in either source vectors.
4143 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
4144 int MaskIndex = MaskIdx.value();
4145 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
4148 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
4150 SmallVector<SDValue> MaskVals;
4151 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
4152 // merged with a second vrgather.
4153 SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
4155 // By default we preserve the original operand order, and use a mask to
4156 // select LHS as true and RHS as false. However, since RVV vector selects may
4157 // feature splats but only on the LHS, we may choose to invert our mask and
4158 // instead select between RHS and LHS.
4159 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
4160 bool InvertMask = IsSelect == SwapOps;
4162 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
4164 DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
4166 // Now construct the mask that will be used by the vselect or blended
4167 // vrgather operation. For vrgathers, construct the appropriate indices into
4169 for (int MaskIndex : Mask) {
4170 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
4171 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4173 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
4174 GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
4175 ? DAG.getConstant(MaskIndex, DL, XLenVT)
4176 : DAG.getUNDEF(XLenVT));
4177 GatherIndicesRHS.push_back(
4178 IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
4179 : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
4180 if (IsLHSOrUndefIndex && MaskIndex >= 0)
4181 ++LHSIndexCounts[MaskIndex];
4182 if (!IsLHSOrUndefIndex)
4183 ++RHSIndexCounts[MaskIndex - NumElts];
4189 std::swap(GatherIndicesLHS, GatherIndicesRHS);
4192 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
4193 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4194 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4197 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
4199 if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
4200 // On such a large vector we're unable to use i8 as the index type.
4201 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
4202 // may involve vector splitting if we're already at LMUL=8, or our
4203 // user-supplied maximum fixed-length LMUL.
4207 unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
4208 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
4209 MVT IndexVT = VT.changeTypeToInteger();
4210 // Since we can't introduce illegal index types at this stage, use i16 and
4211 // vrgatherei16 if the corresponding index type for plain vrgather is greater
4213 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
4214 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
4215 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
4218 MVT IndexContainerVT =
4219 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
4222 // TODO: This doesn't trigger for i64 vectors on RV32, since there we
4223 // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
4224 if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
4225 Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
4228 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4229 // If only one index is used, we can use a "splat" vrgather.
4230 // TODO: We can splat the most-common index and fix-up any stragglers, if
4231 // that's beneficial.
4232 if (LHSIndexCounts.size() == 1) {
4233 int SplatIndex = LHSIndexCounts.begin()->getFirst();
4234 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
4235 DAG.getConstant(SplatIndex, DL, XLenVT),
4236 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4238 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
4240 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
4242 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
4243 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4247 // If a second vector operand is used by this shuffle, blend it in with an
4248 // additional vrgather.
4249 if (!V2.isUndef()) {
4250 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4252 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
4254 convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
4256 // If only one index is used, we can use a "splat" vrgather.
4257 // TODO: We can splat the most-common index and fix-up any stragglers, if
4258 // that's beneficial.
4259 if (RHSIndexCounts.size() == 1) {
4260 int SplatIndex = RHSIndexCounts.begin()->getFirst();
4261 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
4262 DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
4265 SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
4267 convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
4268 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
4273 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4276 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
4277 // Support splats for any type. These should type legalize well.
4278 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
4281 // Only support legal VTs for other shuffles for now.
4282 if (!isTypeLegal(VT))
4285 MVT SVT = VT.getSimpleVT();
4287 // Not for i1 vectors.
4288 if (SVT.getScalarType() == MVT::i1)
4292 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
4293 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
4296 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
4299 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
4300 SelectionDAG &DAG) const {
4301 MVT VT = Op.getSimpleValueType();
4302 unsigned EltSize = VT.getScalarSizeInBits();
4303 SDValue Src = Op.getOperand(0);
4305 MVT ContainerVT = VT;
4308 if (Op->isVPOpcode()) {
4309 Mask = Op.getOperand(1);
4310 if (VT.isFixedLengthVector())
4311 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
4313 VL = Op.getOperand(2);
4316 // We choose FP type that can represent the value if possible. Otherwise, we
4317 // use rounding to zero conversion for correct exponent of the result.
4318 // TODO: Use f16 for i8 when possible?
4319 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
4320 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
4321 FloatEltVT = MVT::f32;
4322 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
4324 // Legal types should have been checked in the RISCVTargetLowering
4326 // TODO: Splitting may make sense in some cases.
4327 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
4328 "Expected legal float type!");
4330 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
4331 // The trailing zero count is equal to log2 of this single bit value.
4332 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
4333 SDValue Neg = DAG.getNegative(Src, DL, VT);
4334 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
4335 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
4336 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
4338 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
4341 // We have a legal FP type, convert to it.
4343 if (FloatVT.bitsGT(VT)) {
4344 if (Op->isVPOpcode())
4345 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
4347 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
4349 // Use RTZ to avoid rounding influencing exponent of FloatVal.
4350 if (VT.isFixedLengthVector()) {
4351 ContainerVT = getContainerForFixedLengthVector(VT);
4352 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4354 if (!Op->isVPOpcode())
4355 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4357 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
4358 MVT ContainerFloatVT =
4359 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
4360 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
4361 Src, Mask, RTZRM, VL);
4362 if (VT.isFixedLengthVector())
4363 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
4365 // Bitcast to integer and shift the exponent to the LSB.
4366 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
4367 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
4368 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
4371 // Restore back to original type. Truncation after SRL is to generate vnsrl.
4372 if (Op->isVPOpcode()) {
4373 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
4374 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
4375 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
4377 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
4378 DAG.getConstant(ShiftAmt, DL, IntVT));
4379 if (IntVT.bitsLT(VT))
4380 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
4381 else if (IntVT.bitsGT(VT))
4382 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
4385 // The exponent contains log2 of the value in biased form.
4386 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
4387 // For trailing zeros, we just need to subtract the bias.
4388 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
4389 return DAG.getNode(ISD::SUB, DL, VT, Exp,
4390 DAG.getConstant(ExponentBias, DL, VT));
4391 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
4392 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
4393 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
4395 // For leading zeros, we need to remove the bias and convert from log2 to
4396 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
4397 unsigned Adjust = ExponentBias + (EltSize - 1);
4399 if (Op->isVPOpcode())
4400 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
4403 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
4405 // The above result with zero input equals to Adjust which is greater than
4406 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
4407 if (Op.getOpcode() == ISD::CTLZ)
4408 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
4409 else if (Op.getOpcode() == ISD::VP_CTLZ)
4410 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
4411 DAG.getConstant(EltSize, DL, VT), Mask, VL);
4415 // While RVV has alignment restrictions, we should always be able to load as a
4416 // legal equivalently-sized byte-typed vector instead. This method is
4417 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
4418 // the load is already correctly-aligned, it returns SDValue().
4419 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
4420 SelectionDAG &DAG) const {
4421 auto *Load = cast<LoadSDNode>(Op);
4422 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
4424 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
4425 Load->getMemoryVT(),
4426 *Load->getMemOperand()))
4430 MVT VT = Op.getSimpleValueType();
4431 unsigned EltSizeBits = VT.getScalarSizeInBits();
4432 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
4433 "Unexpected unaligned RVV load type");
4435 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
4436 assert(NewVT.isValid() &&
4437 "Expecting equally-sized RVV vector types to be legal");
4438 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
4439 Load->getPointerInfo(), Load->getOriginalAlign(),
4440 Load->getMemOperand()->getFlags());
4441 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
4444 // While RVV has alignment restrictions, we should always be able to store as a
4445 // legal equivalently-sized byte-typed vector instead. This method is
4446 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
4447 // returns SDValue() if the store is already correctly aligned.
4448 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
4449 SelectionDAG &DAG) const {
4450 auto *Store = cast<StoreSDNode>(Op);
4451 assert(Store && Store->getValue().getValueType().isVector() &&
4452 "Expected vector store");
4454 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
4455 Store->getMemoryVT(),
4456 *Store->getMemOperand()))
4460 SDValue StoredVal = Store->getValue();
4461 MVT VT = StoredVal.getSimpleValueType();
4462 unsigned EltSizeBits = VT.getScalarSizeInBits();
4463 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
4464 "Unexpected unaligned RVV store type");
4466 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
4467 assert(NewVT.isValid() &&
4468 "Expecting equally-sized RVV vector types to be legal");
4469 StoredVal = DAG.getBitcast(NewVT, StoredVal);
4470 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
4471 Store->getPointerInfo(), Store->getOriginalAlign(),
4472 Store->getMemOperand()->getFlags());
4475 static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
4476 const RISCVSubtarget &Subtarget) {
4477 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
4479 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
4481 // All simm32 constants should be handled by isel.
4482 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
4483 // this check redundant, but small immediates are common so this check
4484 // should have better compile time.
4488 // We only need to cost the immediate, if constant pool lowering is enabled.
4489 if (!Subtarget.useConstantPoolForLargeInts())
4492 RISCVMatInt::InstSeq Seq =
4493 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
4494 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
4497 // Special case. See if we can build the constant as (ADD (SLLI X, 32), X) do
4498 // that if it will avoid a constant pool.
4499 // It will require an extra temporary register though.
4500 if (!DAG.shouldOptForSize()) {
4501 int64_t LoVal = SignExtend64<32>(Imm);
4502 int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
4503 if (LoVal == HiVal) {
4504 RISCVMatInt::InstSeq SeqLo =
4505 RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
4506 if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
4511 // Expand to a constant pool using the default expansion code.
4515 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
4516 const RISCVSubtarget &Subtarget) {
4518 AtomicOrdering FenceOrdering =
4519 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4520 SyncScope::ID FenceSSID =
4521 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4523 if (Subtarget.hasStdExtZtso()) {
4524 // The only fence that needs an instruction is a sequentially-consistent
4525 // cross-thread fence.
4526 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4527 FenceSSID == SyncScope::System)
4530 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4531 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
4534 // singlethread fences only synchronize with signal handlers on the same
4535 // thread and thus only need to preserve instruction order, not actually
4536 // enforce memory ordering.
4537 if (FenceSSID == SyncScope::SingleThread)
4538 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4539 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
4544 SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
4545 SelectionDAG &DAG) const {
4547 MVT VT = Op.getSimpleValueType();
4548 MVT XLenVT = Subtarget.getXLenVT();
4549 auto CNode = cast<ConstantSDNode>(Op.getOperand(1));
4550 unsigned Check = CNode->getZExtValue();
4551 unsigned TDCMask = 0;
4553 TDCMask |= RISCV::FPMASK_Signaling_NaN;
4555 TDCMask |= RISCV::FPMASK_Quiet_NaN;
4556 if (Check & fcPosInf)
4557 TDCMask |= RISCV::FPMASK_Positive_Infinity;
4558 if (Check & fcNegInf)
4559 TDCMask |= RISCV::FPMASK_Negative_Infinity;
4560 if (Check & fcPosNormal)
4561 TDCMask |= RISCV::FPMASK_Positive_Normal;
4562 if (Check & fcNegNormal)
4563 TDCMask |= RISCV::FPMASK_Negative_Normal;
4564 if (Check & fcPosSubnormal)
4565 TDCMask |= RISCV::FPMASK_Positive_Subnormal;
4566 if (Check & fcNegSubnormal)
4567 TDCMask |= RISCV::FPMASK_Negative_Subnormal;
4568 if (Check & fcPosZero)
4569 TDCMask |= RISCV::FPMASK_Positive_Zero;
4570 if (Check & fcNegZero)
4571 TDCMask |= RISCV::FPMASK_Negative_Zero;
4573 bool IsOneBitMask = isPowerOf2_32(TDCMask);
4575 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
4577 if (VT.isVector()) {
4578 SDValue Op0 = Op.getOperand(0);
4579 MVT VT0 = Op.getOperand(0).getSimpleValueType();
4581 if (VT.isScalableVector()) {
4582 MVT DstVT = VT0.changeVectorElementTypeToInteger();
4583 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
4584 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
4585 VL, Op->getFlags());
4587 return DAG.getSetCC(DL, VT, FPCLASS,
4588 DAG.getConstant(TDCMask, DL, DstVT),
4589 ISD::CondCode::SETEQ);
4590 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
4591 DAG.getConstant(TDCMask, DL, DstVT));
4592 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
4596 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
4597 MVT ContainerVT = getContainerForFixedLengthVector(VT);
4598 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
4599 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
4601 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
4603 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
4604 Mask, VL, Op->getFlags());
4606 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
4607 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
4610 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
4611 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
4612 DAG.getUNDEF(ContainerVT), Mask, VL});
4613 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
4615 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
4616 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
4618 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
4619 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
4620 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
4622 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
4623 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
4624 DAG.getUNDEF(ContainerVT), Mask, VL});
4625 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
4628 SDValue FPCLASS = DAG.getNode(RISCVISD::FPCLASS, DL, VT, Op.getOperand(0));
4629 SDValue AND = DAG.getNode(ISD::AND, DL, VT, FPCLASS, TDCMaskV);
4630 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, XLenVT),
4631 ISD::CondCode::SETNE);
4634 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
4635 // operations propagate nans.
4636 static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
4637 const RISCVSubtarget &Subtarget) {
4639 EVT VT = Op.getValueType();
4641 SDValue X = Op.getOperand(0);
4642 SDValue Y = Op.getOperand(1);
4644 MVT XLenVT = Subtarget.getXLenVT();
4646 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
4647 // ensures that when one input is a nan, the other will also be a nan allowing
4648 // the nan to propagate. If both inputs are nan, this will swap the inputs
4649 // which is harmless.
4650 // FIXME: Handle nonans FMF and use isKnownNeverNaN.
4651 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
4652 SDValue NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
4654 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
4655 SDValue NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
4658 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
4659 return DAG.getNode(Opc, DL, VT, NewX, NewY);
4662 /// Get a RISCV target specified VL op for a given SDNode.
4663 static unsigned getRISCVVLOp(SDValue Op) {
4664 #define OP_CASE(NODE) \
4666 return RISCVISD::NODE##_VL;
4667 switch (Op.getOpcode()) {
4669 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
4700 OP_CASE(STRICT_FADD)
4701 OP_CASE(STRICT_FSUB)
4702 OP_CASE(STRICT_FMUL)
4703 OP_CASE(STRICT_FDIV)
4704 OP_CASE(STRICT_FSQRT)
4708 return RISCVISD::VFMADD_VL;
4709 case ISD::STRICT_FMA:
4710 return RISCVISD::STRICT_VFMADD_VL;
4712 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
4713 return RISCVISD::VMAND_VL;
4714 return RISCVISD::AND_VL;
4716 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
4717 return RISCVISD::VMOR_VL;
4718 return RISCVISD::OR_VL;
4720 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
4721 return RISCVISD::VMXOR_VL;
4722 return RISCVISD::XOR_VL;
4726 /// Return true if a RISC-V target specified op has a merge operand.
4727 static bool hasMergeOp(unsigned Opcode) {
4728 assert(Opcode > RISCVISD::FIRST_NUMBER &&
4729 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL &&
4730 "not a RISC-V target specific op");
4731 assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 &&
4732 "adding target specific op should update this function");
4733 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::FMAXNUM_VL)
4735 if (Opcode == RISCVISD::FCOPYSIGN_VL)
4737 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
4739 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
4744 /// Return true if a RISC-V target specified op has a mask operand.
4745 static bool hasMaskOp(unsigned Opcode) {
4746 assert(Opcode > RISCVISD::FIRST_NUMBER &&
4747 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL &&
4748 "not a RISC-V target specific op");
4749 assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 &&
4750 "adding target specific op should update this function");
4751 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
4753 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
4755 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
4756 Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
4761 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
4762 SelectionDAG &DAG) const {
4763 switch (Op.getOpcode()) {
4765 report_fatal_error("unimplemented operand");
4766 case ISD::ATOMIC_FENCE:
4767 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
4768 case ISD::GlobalAddress:
4769 return lowerGlobalAddress(Op, DAG);
4770 case ISD::BlockAddress:
4771 return lowerBlockAddress(Op, DAG);
4772 case ISD::ConstantPool:
4773 return lowerConstantPool(Op, DAG);
4774 case ISD::JumpTable:
4775 return lowerJumpTable(Op, DAG);
4776 case ISD::GlobalTLSAddress:
4777 return lowerGlobalTLSAddress(Op, DAG);
4779 return lowerConstant(Op, DAG, Subtarget);
4781 return lowerSELECT(Op, DAG);
4783 return lowerBRCOND(Op, DAG);
4785 return lowerVASTART(Op, DAG);
4786 case ISD::FRAMEADDR:
4787 return lowerFRAMEADDR(Op, DAG);
4788 case ISD::RETURNADDR:
4789 return lowerRETURNADDR(Op, DAG);
4790 case ISD::SHL_PARTS:
4791 return lowerShiftLeftParts(Op, DAG);
4792 case ISD::SRA_PARTS:
4793 return lowerShiftRightParts(Op, DAG, true);
4794 case ISD::SRL_PARTS:
4795 return lowerShiftRightParts(Op, DAG, false);
4798 assert(Subtarget.hasVendorXTHeadBb() &&
4799 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
4800 "Unexpected custom legalization");
4801 // XTHeadBb only supports rotate by constant.
4802 if (!isa<ConstantSDNode>(Op.getOperand(1)))
4805 case ISD::BITCAST: {
4807 EVT VT = Op.getValueType();
4808 SDValue Op0 = Op.getOperand(0);
4809 EVT Op0VT = Op0.getValueType();
4810 MVT XLenVT = Subtarget.getXLenVT();
4811 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
4812 Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
4813 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
4814 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
4817 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
4818 Subtarget.hasStdExtZfbfmin()) {
4819 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
4820 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
4823 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
4824 Subtarget.hasStdExtFOrZfinx()) {
4825 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4827 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
4830 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 &&
4831 Subtarget.hasStdExtZfa()) {
4833 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
4835 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
4839 // Consider other scalar<->scalar casts as legal if the types are legal.
4840 // Otherwise expand them.
4841 if (!VT.isVector() && !Op0VT.isVector()) {
4842 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
4847 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
4848 "Unexpected types");
4850 if (VT.isFixedLengthVector()) {
4851 // We can handle fixed length vector bitcasts with a simple replacement
4853 if (Op0VT.isFixedLengthVector())
4855 // When bitcasting from scalar to fixed-length vector, insert the scalar
4856 // into a one-element vector of the result type, and perform a vector
4858 if (!Op0VT.isVector()) {
4859 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
4860 if (!isTypeLegal(BVT))
4862 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
4863 DAG.getUNDEF(BVT), Op0,
4864 DAG.getConstant(0, DL, XLenVT)));
4868 // Custom-legalize bitcasts from fixed-length vector types to scalar types
4869 // thus: bitcast the vector to a one-element vector type whose element type
4870 // is the same as the result type, and extract the first element.
4871 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
4872 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
4873 if (!isTypeLegal(BVT))
4875 SDValue BVec = DAG.getBitcast(BVT, Op0);
4876 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
4877 DAG.getConstant(0, DL, XLenVT));
4881 case ISD::INTRINSIC_WO_CHAIN:
4882 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4883 case ISD::INTRINSIC_W_CHAIN:
4884 return LowerINTRINSIC_W_CHAIN(Op, DAG);
4885 case ISD::INTRINSIC_VOID:
4886 return LowerINTRINSIC_VOID(Op, DAG);
4887 case ISD::IS_FPCLASS:
4888 return LowerIS_FPCLASS(Op, DAG);
4889 case ISD::BITREVERSE: {
4890 MVT VT = Op.getSimpleValueType();
4892 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
4893 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
4894 // Expand bitreverse to a bswap(rev8) followed by brev8.
4895 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
4896 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
4899 // Only custom-lower vector truncates
4900 if (!Op.getSimpleValueType().isVector())
4902 return lowerVectorTruncLike(Op, DAG);
4903 case ISD::ANY_EXTEND:
4904 case ISD::ZERO_EXTEND:
4905 if (Op.getOperand(0).getValueType().isVector() &&
4906 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
4907 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
4908 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
4909 case ISD::SIGN_EXTEND:
4910 if (Op.getOperand(0).getValueType().isVector() &&
4911 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
4912 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
4913 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
4914 case ISD::SPLAT_VECTOR_PARTS:
4915 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
4916 case ISD::INSERT_VECTOR_ELT:
4917 return lowerINSERT_VECTOR_ELT(Op, DAG);
4918 case ISD::EXTRACT_VECTOR_ELT:
4919 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
4920 case ISD::SCALAR_TO_VECTOR: {
4921 MVT VT = Op.getSimpleValueType();
4923 SDValue Scalar = Op.getOperand(0);
4924 if (VT.getVectorElementType() == MVT::i1) {
4925 MVT WideVT = VT.changeVectorElementType(MVT::i8);
4926 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
4927 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
4929 MVT ContainerVT = VT;
4930 if (VT.isFixedLengthVector())
4931 ContainerVT = getContainerForFixedLengthVector(VT);
4932 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
4933 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
4934 DAG.getUNDEF(ContainerVT), Scalar, VL);
4935 if (VT.isFixedLengthVector())
4936 V = convertFromScalableVector(VT, V, DAG, Subtarget);
4940 MVT VT = Op.getSimpleValueType();
4942 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
4943 // We define our scalable vector types for lmul=1 to use a 64 bit known
4944 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
4945 // vscale as VLENB / 8.
4946 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
4947 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
4948 report_fatal_error("Support for VLEN==32 is incomplete.");
4949 // We assume VLENB is a multiple of 8. We manually choose the best shift
4950 // here because SimplifyDemandedBits isn't always able to simplify it.
4951 uint64_t Val = Op.getConstantOperandVal(0);
4952 if (isPowerOf2_64(Val)) {
4953 uint64_t Log2 = Log2_64(Val);
4955 return DAG.getNode(ISD::SRL, DL, VT, VLENB,
4956 DAG.getConstant(3 - Log2, DL, VT));
4958 return DAG.getNode(ISD::SHL, DL, VT, VLENB,
4959 DAG.getConstant(Log2 - 3, DL, VT));
4962 // If the multiplier is a multiple of 8, scale it down to avoid needing
4963 // to shift the VLENB value.
4965 return DAG.getNode(ISD::MUL, DL, VT, VLENB,
4966 DAG.getConstant(Val / 8, DL, VT));
4968 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
4969 DAG.getConstant(3, DL, VT));
4970 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
4973 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
4974 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
4975 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
4976 Op.getOperand(1).getValueType() == MVT::i32) {
4978 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
4980 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
4981 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
4982 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
4988 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
4989 case ISD::FP_EXTEND: {
4991 EVT VT = Op.getValueType();
4992 SDValue Op0 = Op.getOperand(0);
4993 EVT Op0VT = Op0.getValueType();
4994 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
4995 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
4996 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
4998 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
4999 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
5002 if (!Op.getValueType().isVector())
5004 return lowerVectorFPExtendOrRoundLike(Op, DAG);
5006 case ISD::FP_ROUND: {
5008 EVT VT = Op.getValueType();
5009 SDValue Op0 = Op.getOperand(0);
5010 EVT Op0VT = Op0.getValueType();
5011 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
5012 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
5013 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
5014 Subtarget.hasStdExtDOrZdinx()) {
5016 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
5017 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
5018 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
5021 if (!Op.getValueType().isVector())
5023 return lowerVectorFPExtendOrRoundLike(Op, DAG);
5025 case ISD::STRICT_FP_ROUND:
5026 case ISD::STRICT_FP_EXTEND:
5027 return lowerStrictFPExtendOrRoundLike(Op, DAG);
5028 case ISD::FP_TO_SINT:
5029 case ISD::FP_TO_UINT:
5030 case ISD::SINT_TO_FP:
5031 case ISD::UINT_TO_FP:
5032 case ISD::STRICT_FP_TO_SINT:
5033 case ISD::STRICT_FP_TO_UINT:
5034 case ISD::STRICT_SINT_TO_FP:
5035 case ISD::STRICT_UINT_TO_FP: {
5036 // RVV can only do fp<->int conversions to types half/double the size as
5037 // the source. We custom-lower any conversions that do two hops into
5039 MVT VT = Op.getSimpleValueType();
5043 bool IsStrict = Op->isStrictFPOpcode();
5044 SDValue Src = Op.getOperand(0 + IsStrict);
5045 MVT EltVT = VT.getVectorElementType();
5046 MVT SrcVT = Src.getSimpleValueType();
5047 MVT SrcEltVT = SrcVT.getVectorElementType();
5048 unsigned EltSize = EltVT.getSizeInBits();
5049 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
5050 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
5051 "Unexpected vector element types");
5053 bool IsInt2FP = SrcEltVT.isInteger();
5054 // Widening conversions
5055 if (EltSize > (2 * SrcEltSize)) {
5057 // Do a regular integer sign/zero extension then convert to float.
5058 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
5059 VT.getVectorElementCount());
5060 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
5061 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
5064 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
5066 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
5067 Op.getOperand(0), Ext);
5068 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
5071 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
5072 // Do one doubling fp_extend then complete the operation by converting
5074 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
5076 auto [FExt, Chain] =
5077 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
5078 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
5080 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
5081 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
5084 // Narrowing conversions
5085 if (SrcEltSize > (2 * EltSize)) {
5087 // One narrowing int_to_fp, then an fp_round.
5088 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
5089 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
5091 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
5092 DAG.getVTList(InterimFVT, MVT::Other),
5093 Op.getOperand(0), Src);
5094 SDValue Chain = Int2FP.getValue(1);
5095 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
5097 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
5098 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
5101 // One narrowing fp_to_int, then truncate the integer. If the float isn't
5102 // representable by the integer, the result is poison.
5103 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
5104 VT.getVectorElementCount());
5107 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
5108 Op.getOperand(0), Src);
5109 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
5110 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
5112 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
5113 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
5116 // Scalable vectors can exit here. Patterns will handle equally-sized
5117 // conversions halving/doubling ones.
5118 if (!VT.isFixedLengthVector())
5121 // For fixed-length vectors we lower to a custom "VL" node.
5122 unsigned RVVOpc = 0;
5123 switch (Op.getOpcode()) {
5125 llvm_unreachable("Impossible opcode");
5126 case ISD::FP_TO_SINT:
5127 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
5129 case ISD::FP_TO_UINT:
5130 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
5132 case ISD::SINT_TO_FP:
5133 RVVOpc = RISCVISD::SINT_TO_FP_VL;
5135 case ISD::UINT_TO_FP:
5136 RVVOpc = RISCVISD::UINT_TO_FP_VL;
5138 case ISD::STRICT_FP_TO_SINT:
5139 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
5141 case ISD::STRICT_FP_TO_UINT:
5142 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
5144 case ISD::STRICT_SINT_TO_FP:
5145 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
5147 case ISD::STRICT_UINT_TO_FP:
5148 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
5152 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5153 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
5154 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
5155 "Expected same element count");
5157 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5159 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
5161 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
5162 Op.getOperand(0), Src, Mask, VL);
5163 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
5164 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
5166 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
5167 return convertFromScalableVector(VT, Src, DAG, Subtarget);
5169 case ISD::FP_TO_SINT_SAT:
5170 case ISD::FP_TO_UINT_SAT:
5171 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
5172 case ISD::FP_TO_BF16: {
5173 // Custom lower to ensure the libcall return is passed in an FPR on hard
5175 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
5177 MakeLibCallOptions CallOptions;
5179 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
5181 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
5182 if (Subtarget.is64Bit())
5183 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
5184 return DAG.getBitcast(MVT::i32, Res);
5186 case ISD::BF16_TO_FP: {
5187 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
5188 MVT VT = Op.getSimpleValueType();
5191 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
5192 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
5193 SDValue Res = Subtarget.is64Bit()
5194 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
5195 : DAG.getBitcast(MVT::f32, Op);
5196 // fp_extend if the target VT is bigger than f32.
5198 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
5201 case ISD::FP_TO_FP16: {
5202 // Custom lower to ensure the libcall return is passed in an FPR on hard
5204 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
5206 MakeLibCallOptions CallOptions;
5208 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
5210 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
5211 if (Subtarget.is64Bit())
5212 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
5213 return DAG.getBitcast(MVT::i32, Res);
5215 case ISD::FP16_TO_FP: {
5216 // Custom lower to ensure the libcall argument is passed in an FPR on hard
5218 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
5220 MakeLibCallOptions CallOptions;
5221 SDValue Arg = Subtarget.is64Bit()
5222 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
5224 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
5226 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
5233 case ISD::FNEARBYINT:
5236 case ISD::FROUNDEVEN:
5237 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
5238 case ISD::VECREDUCE_ADD:
5239 case ISD::VECREDUCE_UMAX:
5240 case ISD::VECREDUCE_SMAX:
5241 case ISD::VECREDUCE_UMIN:
5242 case ISD::VECREDUCE_SMIN:
5243 return lowerVECREDUCE(Op, DAG);
5244 case ISD::VECREDUCE_AND:
5245 case ISD::VECREDUCE_OR:
5246 case ISD::VECREDUCE_XOR:
5247 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
5248 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
5249 return lowerVECREDUCE(Op, DAG);
5250 case ISD::VECREDUCE_FADD:
5251 case ISD::VECREDUCE_SEQ_FADD:
5252 case ISD::VECREDUCE_FMIN:
5253 case ISD::VECREDUCE_FMAX:
5254 return lowerFPVECREDUCE(Op, DAG);
5255 case ISD::VP_REDUCE_ADD:
5256 case ISD::VP_REDUCE_UMAX:
5257 case ISD::VP_REDUCE_SMAX:
5258 case ISD::VP_REDUCE_UMIN:
5259 case ISD::VP_REDUCE_SMIN:
5260 case ISD::VP_REDUCE_FADD:
5261 case ISD::VP_REDUCE_SEQ_FADD:
5262 case ISD::VP_REDUCE_FMIN:
5263 case ISD::VP_REDUCE_FMAX:
5264 return lowerVPREDUCE(Op, DAG);
5265 case ISD::VP_REDUCE_AND:
5266 case ISD::VP_REDUCE_OR:
5267 case ISD::VP_REDUCE_XOR:
5268 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
5269 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
5270 return lowerVPREDUCE(Op, DAG);
5272 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
5273 return convertFromScalableVector(Op.getSimpleValueType(),
5274 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
5276 case ISD::INSERT_SUBVECTOR:
5277 return lowerINSERT_SUBVECTOR(Op, DAG);
5278 case ISD::EXTRACT_SUBVECTOR:
5279 return lowerEXTRACT_SUBVECTOR(Op, DAG);
5280 case ISD::VECTOR_DEINTERLEAVE:
5281 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
5282 case ISD::VECTOR_INTERLEAVE:
5283 return lowerVECTOR_INTERLEAVE(Op, DAG);
5284 case ISD::STEP_VECTOR:
5285 return lowerSTEP_VECTOR(Op, DAG);
5286 case ISD::VECTOR_REVERSE:
5287 return lowerVECTOR_REVERSE(Op, DAG);
5288 case ISD::VECTOR_SPLICE:
5289 return lowerVECTOR_SPLICE(Op, DAG);
5290 case ISD::BUILD_VECTOR:
5291 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
5292 case ISD::SPLAT_VECTOR:
5293 if (Op.getValueType().getVectorElementType() == MVT::i1)
5294 return lowerVectorMaskSplat(Op, DAG);
5296 case ISD::VECTOR_SHUFFLE:
5297 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
5298 case ISD::CONCAT_VECTORS: {
5299 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
5300 // better than going through the stack, as the default expansion does.
5302 MVT VT = Op.getSimpleValueType();
5303 unsigned NumOpElts =
5304 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
5305 SDValue Vec = DAG.getUNDEF(VT);
5306 for (const auto &OpIdx : enumerate(Op->ops())) {
5307 SDValue SubVec = OpIdx.value();
5308 // Don't insert undef subvectors.
5309 if (SubVec.isUndef())
5311 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
5312 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
5317 if (auto V = expandUnalignedRVVLoad(Op, DAG))
5319 if (Op.getValueType().isFixedLengthVector())
5320 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
5323 if (auto V = expandUnalignedRVVStore(Op, DAG))
5325 if (Op.getOperand(1).getValueType().isFixedLengthVector())
5326 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
5330 return lowerMaskedLoad(Op, DAG);
5333 return lowerMaskedStore(Op, DAG);
5334 case ISD::SELECT_CC: {
5335 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
5336 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
5337 // into separate SETCC+SELECT just like LegalizeDAG.
5338 SDValue Tmp1 = Op.getOperand(0);
5339 SDValue Tmp2 = Op.getOperand(1);
5340 SDValue True = Op.getOperand(2);
5341 SDValue False = Op.getOperand(3);
5342 EVT VT = Op.getValueType();
5343 SDValue CC = Op.getOperand(4);
5344 EVT CmpVT = Tmp1.getValueType();
5346 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
5349 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
5350 return DAG.getSelect(DL, VT, Cond, True, False);
5353 MVT OpVT = Op.getOperand(0).getSimpleValueType();
5354 if (OpVT.isScalarInteger()) {
5355 MVT VT = Op.getSimpleValueType();
5356 SDValue LHS = Op.getOperand(0);
5357 SDValue RHS = Op.getOperand(1);
5358 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
5359 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
5360 "Unexpected CondCode");
5364 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
5365 // convert this to the equivalent of (set(u)ge X, C+1) by using
5366 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
5368 if (isa<ConstantSDNode>(RHS)) {
5369 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
5370 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
5371 // X > -1 should have been replaced with false.
5372 assert((CCVal != ISD::SETUGT || Imm != -1) &&
5373 "Missing canonicalization");
5374 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
5375 CCVal = ISD::getSetCCSwappedOperands(CCVal);
5376 SDValue SetCC = DAG.getSetCC(
5377 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
5378 return DAG.getLogicalNOT(DL, SetCC, VT);
5382 // Not a constant we could handle, swap the operands and condition code to
5384 CCVal = ISD::getSetCCSwappedOperands(CCVal);
5385 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
5388 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
5402 return lowerToScalableOp(Op, DAG);
5406 if (Op.getSimpleValueType().isFixedLengthVector())
5407 return lowerToScalableOp(Op, DAG);
5408 // This can be called for an i32 shift amount that needs to be promoted.
5409 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
5410 "Unexpected custom legalisation");
5430 return lowerToScalableOp(Op, DAG);
5433 return lowerABS(Op, DAG);
5435 case ISD::CTLZ_ZERO_UNDEF:
5436 case ISD::CTTZ_ZERO_UNDEF:
5437 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
5439 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
5440 case ISD::FCOPYSIGN:
5441 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
5442 case ISD::STRICT_FADD:
5443 case ISD::STRICT_FSUB:
5444 case ISD::STRICT_FMUL:
5445 case ISD::STRICT_FDIV:
5446 case ISD::STRICT_FSQRT:
5447 case ISD::STRICT_FMA:
5448 return lowerToScalableOp(Op, DAG);
5449 case ISD::STRICT_FSETCC:
5450 case ISD::STRICT_FSETCCS:
5451 return lowerVectorStrictFSetcc(Op, DAG);
5452 case ISD::STRICT_FCEIL:
5453 case ISD::STRICT_FRINT:
5454 case ISD::STRICT_FFLOOR:
5455 case ISD::STRICT_FTRUNC:
5456 case ISD::STRICT_FNEARBYINT:
5457 case ISD::STRICT_FROUND:
5458 case ISD::STRICT_FROUNDEVEN:
5459 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
5461 case ISD::VP_GATHER:
5462 return lowerMaskedGather(Op, DAG);
5464 case ISD::VP_SCATTER:
5465 return lowerMaskedScatter(Op, DAG);
5466 case ISD::GET_ROUNDING:
5467 return lowerGET_ROUNDING(Op, DAG);
5468 case ISD::SET_ROUNDING:
5469 return lowerSET_ROUNDING(Op, DAG);
5470 case ISD::EH_DWARF_CFA:
5471 return lowerEH_DWARF_CFA(Op, DAG);
5472 case ISD::VP_SELECT:
5473 return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
5475 return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
5477 return lowerVPOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true);
5479 return lowerVPOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true);
5481 return lowerVPOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true);
5483 return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true);
5485 return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true);
5487 return lowerVPOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true);
5489 return lowerVPOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true);
5491 return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
5493 return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
5495 return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
5497 return lowerVPOp(Op, DAG, RISCVISD::SRA_VL, /*HasMergeOp*/ true);
5499 return lowerVPOp(Op, DAG, RISCVISD::SRL_VL, /*HasMergeOp*/ true);
5501 return lowerVPOp(Op, DAG, RISCVISD::SHL_VL, /*HasMergeOp*/ true);
5503 return lowerVPOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true);
5505 return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true);
5507 return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true);
5509 return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true);
5511 return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
5513 return lowerVPOp(Op, DAG, RISCVISD::FABS_VL);
5515 return lowerVPOp(Op, DAG, RISCVISD::FSQRT_VL);
5517 return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL);
5518 case ISD::VP_FMINNUM:
5519 return lowerVPOp(Op, DAG, RISCVISD::FMINNUM_VL, /*HasMergeOp*/ true);
5520 case ISD::VP_FMAXNUM:
5521 return lowerVPOp(Op, DAG, RISCVISD::FMAXNUM_VL, /*HasMergeOp*/ true);
5522 case ISD::VP_FCOPYSIGN:
5523 return lowerVPOp(Op, DAG, RISCVISD::FCOPYSIGN_VL, /*HasMergeOp*/ true);
5524 case ISD::VP_SIGN_EXTEND:
5525 case ISD::VP_ZERO_EXTEND:
5526 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
5527 return lowerVPExtMaskOp(Op, DAG);
5528 return lowerVPOp(Op, DAG,
5529 Op.getOpcode() == ISD::VP_SIGN_EXTEND
5530 ? RISCVISD::VSEXT_VL
5531 : RISCVISD::VZEXT_VL);
5532 case ISD::VP_TRUNCATE:
5533 return lowerVectorTruncLike(Op, DAG);
5534 case ISD::VP_FP_EXTEND:
5535 case ISD::VP_FP_ROUND:
5536 return lowerVectorFPExtendOrRoundLike(Op, DAG);
5537 case ISD::VP_FP_TO_SINT:
5538 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_X_F_VL);
5539 case ISD::VP_FP_TO_UINT:
5540 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_XU_F_VL);
5541 case ISD::VP_SINT_TO_FP:
5542 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL);
5543 case ISD::VP_UINT_TO_FP:
5544 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL);
5546 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
5547 return lowerVPSetCCMaskOp(Op, DAG);
5548 return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true);
5550 return lowerVPOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true);
5552 return lowerVPOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true);
5554 return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true);
5556 return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true);
5557 case ISD::VP_BITREVERSE:
5558 return lowerVPOp(Op, DAG, RISCVISD::BITREVERSE_VL, /*HasMergeOp*/ true);
5560 return lowerVPOp(Op, DAG, RISCVISD::BSWAP_VL, /*HasMergeOp*/ true);
5562 case ISD::VP_CTLZ_ZERO_UNDEF:
5563 if (Subtarget.hasStdExtZvbb())
5564 return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true);
5565 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
5567 case ISD::VP_CTTZ_ZERO_UNDEF:
5568 if (Subtarget.hasStdExtZvbb())
5569 return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true);
5570 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
5572 return lowerVPOp(Op, DAG, RISCVISD::CTPOP_VL, /*HasMergeOp*/ true);
5573 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
5574 return lowerVPStridedLoad(Op, DAG);
5575 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
5576 return lowerVPStridedStore(Op, DAG);
5578 case ISD::VP_FFLOOR:
5580 case ISD::VP_FNEARBYINT:
5581 case ISD::VP_FROUND:
5582 case ISD::VP_FROUNDEVEN:
5583 case ISD::VP_FROUNDTOZERO:
5584 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
5588 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
5589 SelectionDAG &DAG, unsigned Flags) {
5590 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
5593 static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
5594 SelectionDAG &DAG, unsigned Flags) {
5595 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
5599 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
5600 SelectionDAG &DAG, unsigned Flags) {
5601 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
5602 N->getOffset(), Flags);
5605 static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
5606 SelectionDAG &DAG, unsigned Flags) {
5607 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
5610 template <class NodeTy>
5611 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
5612 bool IsLocal, bool IsExternWeak) const {
5614 EVT Ty = getPointerTy(DAG.getDataLayout());
5616 // When HWASAN is used and tagging of global variables is enabled
5617 // they should be accessed via the GOT, since the tagged address of a global
5618 // is incompatible with existing code models. This also applies to non-pic
5620 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
5621 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
5622 if (IsLocal && !Subtarget.allowTaggedGlobals())
5623 // Use PC-relative addressing to access the symbol. This generates the
5624 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
5625 // %pcrel_lo(auipc)).
5626 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
5628 // Use PC-relative addressing to access the GOT for this symbol, then load
5629 // the address from the GOT. This generates the pattern (PseudoLGA sym),
5630 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
5631 MachineFunction &MF = DAG.getMachineFunction();
5632 MachineMemOperand *MemOp = MF.getMachineMemOperand(
5633 MachinePointerInfo::getGOT(MF),
5634 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
5635 MachineMemOperand::MOInvariant,
5636 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
5638 DAG.getMemIntrinsicNode(RISCVISD::LGA, DL, DAG.getVTList(Ty, MVT::Other),
5639 {DAG.getEntryNode(), Addr}, Ty, MemOp);
5643 switch (getTargetMachine().getCodeModel()) {
5645 report_fatal_error("Unsupported code model for lowering");
5646 case CodeModel::Small: {
5647 // Generate a sequence for accessing addresses within the first 2 GiB of
5648 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
5649 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
5650 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
5651 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
5652 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
5654 case CodeModel::Medium: {
5655 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
5657 // An extern weak symbol may be undefined, i.e. have value 0, which may
5658 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
5659 // symbol. This generates the pattern (PseudoLGA sym), which expands to
5660 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
5661 MachineFunction &MF = DAG.getMachineFunction();
5662 MachineMemOperand *MemOp = MF.getMachineMemOperand(
5663 MachinePointerInfo::getGOT(MF),
5664 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
5665 MachineMemOperand::MOInvariant,
5666 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
5668 DAG.getMemIntrinsicNode(RISCVISD::LGA, DL,
5669 DAG.getVTList(Ty, MVT::Other),
5670 {DAG.getEntryNode(), Addr}, Ty, MemOp);
5674 // Generate a sequence for accessing addresses within any 2GiB range within
5675 // the address space. This generates the pattern (PseudoLLA sym), which
5676 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
5677 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
5682 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
5683 SelectionDAG &DAG) const {
5684 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
5685 assert(N->getOffset() == 0 && "unexpected offset in global node");
5686 const GlobalValue *GV = N->getGlobal();
5687 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
5690 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
5691 SelectionDAG &DAG) const {
5692 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
5694 return getAddr(N, DAG);
5697 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
5698 SelectionDAG &DAG) const {
5699 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
5701 return getAddr(N, DAG);
5704 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
5705 SelectionDAG &DAG) const {
5706 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
5708 return getAddr(N, DAG);
5711 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
5713 bool UseGOT) const {
5715 EVT Ty = getPointerTy(DAG.getDataLayout());
5716 const GlobalValue *GV = N->getGlobal();
5717 MVT XLenVT = Subtarget.getXLenVT();
5720 // Use PC-relative addressing to access the GOT for this TLS symbol, then
5721 // load the address from the GOT and add the thread pointer. This generates
5722 // the pattern (PseudoLA_TLS_IE sym), which expands to
5723 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
5724 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
5725 MachineFunction &MF = DAG.getMachineFunction();
5726 MachineMemOperand *MemOp = MF.getMachineMemOperand(
5727 MachinePointerInfo::getGOT(MF),
5728 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
5729 MachineMemOperand::MOInvariant,
5730 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
5731 SDValue Load = DAG.getMemIntrinsicNode(
5732 RISCVISD::LA_TLS_IE, DL, DAG.getVTList(Ty, MVT::Other),
5733 {DAG.getEntryNode(), Addr}, Ty, MemOp);
5735 // Add the thread pointer.
5736 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
5737 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
5740 // Generate a sequence for accessing the address relative to the thread
5741 // pointer, with the appropriate adjustment for the thread pointer offset.
5742 // This generates the pattern
5743 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
5745 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
5747 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
5749 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
5751 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
5752 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
5754 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
5755 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
5758 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
5759 SelectionDAG &DAG) const {
5761 EVT Ty = getPointerTy(DAG.getDataLayout());
5762 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
5763 const GlobalValue *GV = N->getGlobal();
5765 // Use a PC-relative addressing mode to access the global dynamic GOT address.
5766 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
5767 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
5768 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
5769 SDValue Load = DAG.getNode(RISCVISD::LA_TLS_GD, DL, Ty, Addr);
5771 // Prepare argument list to generate call.
5776 Args.push_back(Entry);
5778 // Setup call to __tls_get_addr.
5779 TargetLowering::CallLoweringInfo CLI(DAG);
5781 .setChain(DAG.getEntryNode())
5782 .setLibCallee(CallingConv::C, CallTy,
5783 DAG.getExternalSymbol("__tls_get_addr", Ty),
5786 return LowerCallTo(CLI).first;
5789 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
5790 SelectionDAG &DAG) const {
5791 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
5792 assert(N->getOffset() == 0 && "unexpected offset in global node");
5794 if (DAG.getTarget().useEmulatedTLS())
5795 return LowerToTLSEmulatedModel(N, DAG);
5797 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
5799 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
5801 report_fatal_error("In GHC calling convention TLS is not supported");
5805 case TLSModel::LocalExec:
5806 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
5808 case TLSModel::InitialExec:
5809 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
5811 case TLSModel::LocalDynamic:
5812 case TLSModel::GeneralDynamic:
5813 Addr = getDynamicTLSAddr(N, DAG);
5820 // Return true if Val is equal to (setcc LHS, RHS, CC).
5821 // Return false if Val is the inverse of (setcc LHS, RHS, CC).
5822 // Otherwise, return std::nullopt.
5823 static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
5824 ISD::CondCode CC, SDValue Val) {
5825 assert(Val->getOpcode() == ISD::SETCC);
5826 SDValue LHS2 = Val.getOperand(0);
5827 SDValue RHS2 = Val.getOperand(1);
5828 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
5830 if (LHS == LHS2 && RHS == RHS2) {
5833 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
5835 } else if (LHS == RHS2 && RHS == LHS2) {
5836 CC2 = ISD::getSetCCSwappedOperands(CC2);
5839 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
5843 return std::nullopt;
5846 static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
5847 const RISCVSubtarget &Subtarget) {
5848 SDValue CondV = N->getOperand(0);
5849 SDValue TrueV = N->getOperand(1);
5850 SDValue FalseV = N->getOperand(2);
5851 MVT VT = N->getSimpleValueType(0);
5854 if (!Subtarget.hasShortForwardBranchOpt()) {
5855 // (select c, -1, y) -> -c | y
5856 if (isAllOnesConstant(TrueV)) {
5857 SDValue Neg = DAG.getNegative(CondV, DL, VT);
5858 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
5860 // (select c, y, -1) -> (c-1) | y
5861 if (isAllOnesConstant(FalseV)) {
5862 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
5863 DAG.getAllOnesConstant(DL, VT));
5864 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
5867 // (select c, 0, y) -> (c-1) & y
5868 if (isNullConstant(TrueV)) {
5869 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
5870 DAG.getAllOnesConstant(DL, VT));
5871 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
5873 // (select c, y, 0) -> -c & y
5874 if (isNullConstant(FalseV)) {
5875 SDValue Neg = DAG.getNegative(CondV, DL, VT);
5876 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
5880 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
5881 // when both truev and falsev are also setcc.
5882 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
5883 FalseV.getOpcode() == ISD::SETCC) {
5884 SDValue LHS = CondV.getOperand(0);
5885 SDValue RHS = CondV.getOperand(1);
5886 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
5888 // (select x, x, y) -> x | y
5889 // (select !x, x, y) -> x & y
5890 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
5891 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
5894 // (select x, y, x) -> x & y
5895 // (select !x, y, x) -> x | y
5896 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
5897 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
5905 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can
5906 /// check for equality with 0. This function emits nodes that convert the
5907 /// seteq/setne into something that can be compared with 0.
5908 /// Based on RISCVDAGToDAGISel::selectSETCC but modified to produce
5909 /// target-independent SelectionDAG nodes rather than machine nodes.
5910 static SDValue selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
5911 SelectionDAG &DAG) {
5912 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
5913 "Unexpected condition code!");
5915 // We're looking for a setcc.
5916 if (N->getOpcode() != ISD::SETCC)
5919 // Must be an equality comparison.
5920 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
5921 if (CCVal != ExpectedCCVal)
5924 SDValue LHS = N->getOperand(0);
5925 SDValue RHS = N->getOperand(1);
5927 if (!LHS.getValueType().isScalarInteger())
5930 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
5931 if (isNullConstant(RHS))
5936 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
5937 int64_t CVal = C->getSExtValue();
5938 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
5939 // non-zero otherwise.
5941 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS,
5942 DAG.getConstant(CVal, DL, N->getValueType(0)));
5943 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
5944 // LHS is equal to the RHS and non-zero otherwise.
5945 if (isInt<12>(CVal) || CVal == 2048)
5946 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), LHS,
5947 DAG.getConstant(-CVal, DL, N->getValueType(0)));
5950 // If nothing else we can XOR the LHS and RHS to produce zero if they are
5951 // equal and a non-zero value if they aren't.
5952 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS, RHS);
5955 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
5956 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
5957 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up
5958 // being `0` or `-1`. In such cases we can replace `select` with `and`.
5959 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
5962 foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
5963 const RISCVSubtarget &Subtarget) {
5964 if (Subtarget.hasShortForwardBranchOpt())
5967 unsigned SelOpNo = 0;
5968 SDValue Sel = BO->getOperand(0);
5969 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
5971 Sel = BO->getOperand(1);
5974 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
5977 unsigned ConstSelOpNo = 1;
5978 unsigned OtherSelOpNo = 2;
5979 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
5983 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
5984 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
5985 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
5988 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
5989 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
5990 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
5994 EVT VT = BO->getValueType(0);
5996 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
5998 std::swap(NewConstOps[0], NewConstOps[1]);
6000 SDValue NewConstOp =
6001 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
6005 const APInt &NewConstAPInt =
6006 cast<ConstantSDNode>(NewConstOp)->getAPIntValue();
6007 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
6010 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
6011 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
6013 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
6014 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
6016 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
6017 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
6018 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
6021 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
6022 SDValue CondV = Op.getOperand(0);
6023 SDValue TrueV = Op.getOperand(1);
6024 SDValue FalseV = Op.getOperand(2);
6026 MVT VT = Op.getSimpleValueType();
6027 MVT XLenVT = Subtarget.getXLenVT();
6029 // Lower vector SELECTs to VSELECTs by splatting the condition.
6030 if (VT.isVector()) {
6031 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
6032 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
6033 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
6036 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
6037 // nodes to implement the SELECT. Performing the lowering here allows for
6038 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
6039 // sequence or RISCVISD::SELECT_CC node (branch-based select).
6040 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
6041 VT.isScalarInteger()) {
6042 if (SDValue NewCondV = selectSETCC(CondV, ISD::SETNE, DAG)) {
6043 // (select (riscv_setne c), t, 0) -> (czero_eqz t, c)
6044 if (isNullConstant(FalseV))
6045 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV);
6046 // (select (riscv_setne c), 0, f) -> (czero_nez f, c)
6047 if (isNullConstant(TrueV))
6048 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV);
6049 // (select (riscv_setne c), t, f) -> (or (czero_eqz t, c), (czero_nez f,
6053 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV),
6054 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV));
6056 if (SDValue NewCondV = selectSETCC(CondV, ISD::SETEQ, DAG)) {
6057 // (select (riscv_seteq c), t, 0) -> (czero_nez t, c)
6058 if (isNullConstant(FalseV))
6059 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV);
6060 // (select (riscv_seteq c), 0, f) -> (czero_eqz f, c)
6061 if (isNullConstant(TrueV))
6062 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV);
6063 // (select (riscv_seteq c), t, f) -> (or (czero_eqz f, c), (czero_nez t,
6067 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV),
6068 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV));
6071 // (select c, t, 0) -> (czero_eqz t, c)
6072 if (isNullConstant(FalseV))
6073 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
6074 // (select c, 0, f) -> (czero_nez f, c)
6075 if (isNullConstant(TrueV))
6076 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
6078 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
6079 if (TrueV.getOpcode() == ISD::AND &&
6080 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
6082 ISD::OR, DL, VT, TrueV,
6083 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
6084 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
6085 if (FalseV.getOpcode() == ISD::AND &&
6086 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
6088 ISD::OR, DL, VT, FalseV,
6089 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
6091 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
6092 return DAG.getNode(ISD::OR, DL, VT,
6093 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
6094 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
6097 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
6100 if (Op.hasOneUse()) {
6101 unsigned UseOpc = Op->use_begin()->getOpcode();
6102 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
6103 SDNode *BinOp = *Op->use_begin();
6104 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
6106 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
6107 return lowerSELECT(NewSel, DAG);
6112 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
6113 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
6114 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
6115 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
6117 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
6118 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
6119 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
6120 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
6121 DAG.getConstant(1, DL, XLenVT));
6122 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
6126 // If the condition is not an integer SETCC which operates on XLenVT, we need
6127 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
6128 // (select condv, truev, falsev)
6129 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
6130 if (CondV.getOpcode() != ISD::SETCC ||
6131 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
6132 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
6133 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
6135 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
6137 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
6140 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
6141 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
6142 // advantage of the integer compare+branch instructions. i.e.:
6143 // (select (setcc lhs, rhs, cc), truev, falsev)
6144 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
6145 SDValue LHS = CondV.getOperand(0);
6146 SDValue RHS = CondV.getOperand(1);
6147 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
6149 // Special case for a select of 2 constants that have a diffence of 1.
6150 // Normally this is done by DAGCombine, but if the select is introduced by
6151 // type legalization or op legalization, we miss it. Restricting to SETLT
6152 // case for now because that is what signed saturating add/sub need.
6153 // FIXME: We don't need the condition to be SETLT or even a SETCC,
6154 // but we would probably want to swap the true/false values if the condition
6155 // is SETGE/SETLE to avoid an XORI.
6156 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6157 CCVal == ISD::SETLT) {
6158 const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
6159 const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
6160 if (TrueVal - 1 == FalseVal)
6161 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
6162 if (TrueVal + 1 == FalseVal)
6163 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
6166 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6167 // 1 < x ? x : 1 -> 0 < x ? x : 1
6168 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
6169 RHS == TrueV && LHS == FalseV) {
6170 LHS = DAG.getConstant(0, DL, VT);
6171 // 0 <u x is the same as x != 0.
6172 if (CCVal == ISD::SETULT) {
6173 std::swap(LHS, RHS);
6178 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
6179 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
6181 RHS = DAG.getConstant(0, DL, VT);
6184 SDValue TargetCC = DAG.getCondCode(CCVal);
6186 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
6187 // (select (setcc lhs, rhs, CC), constant, falsev)
6188 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
6189 std::swap(TrueV, FalseV);
6190 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
6193 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
6194 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
6197 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
6198 SDValue CondV = Op.getOperand(1);
6200 MVT XLenVT = Subtarget.getXLenVT();
6202 if (CondV.getOpcode() == ISD::SETCC &&
6203 CondV.getOperand(0).getValueType() == XLenVT) {
6204 SDValue LHS = CondV.getOperand(0);
6205 SDValue RHS = CondV.getOperand(1);
6206 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
6208 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6210 SDValue TargetCC = DAG.getCondCode(CCVal);
6211 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
6212 LHS, RHS, TargetCC, Op.getOperand(2));
6215 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
6216 CondV, DAG.getConstant(0, DL, XLenVT),
6217 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
6220 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
6221 MachineFunction &MF = DAG.getMachineFunction();
6222 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
6225 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
6226 getPointerTy(MF.getDataLayout()));
6228 // vastart just stores the address of the VarArgsFrameIndex slot into the
6229 // memory location argument.
6230 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
6231 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
6232 MachinePointerInfo(SV));
6235 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
6236 SelectionDAG &DAG) const {
6237 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
6238 MachineFunction &MF = DAG.getMachineFunction();
6239 MachineFrameInfo &MFI = MF.getFrameInfo();
6240 MFI.setFrameAddressIsTaken(true);
6241 Register FrameReg = RI.getFrameRegister(MF);
6242 int XLenInBytes = Subtarget.getXLen() / 8;
6244 EVT VT = Op.getValueType();
6246 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
6247 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6249 int Offset = -(XLenInBytes * 2);
6250 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
6251 DAG.getIntPtrConstant(Offset, DL));
6253 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
6258 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
6259 SelectionDAG &DAG) const {
6260 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
6261 MachineFunction &MF = DAG.getMachineFunction();
6262 MachineFrameInfo &MFI = MF.getFrameInfo();
6263 MFI.setReturnAddressIsTaken(true);
6264 MVT XLenVT = Subtarget.getXLenVT();
6265 int XLenInBytes = Subtarget.getXLen() / 8;
6267 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
6270 EVT VT = Op.getValueType();
6272 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6274 int Off = -XLenInBytes;
6275 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
6276 SDValue Offset = DAG.getConstant(Off, DL, VT);
6277 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
6278 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
6279 MachinePointerInfo());
6282 // Return the value of the return address register, marking it an implicit
6284 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
6285 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
6288 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
6289 SelectionDAG &DAG) const {
6291 SDValue Lo = Op.getOperand(0);
6292 SDValue Hi = Op.getOperand(1);
6293 SDValue Shamt = Op.getOperand(2);
6294 EVT VT = Lo.getValueType();
6296 // if Shamt-XLEN < 0: // Shamt < XLEN
6298 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
6301 // Hi = Lo << (Shamt-XLEN)
6303 SDValue Zero = DAG.getConstant(0, DL, VT);
6304 SDValue One = DAG.getConstant(1, DL, VT);
6305 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
6306 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
6307 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
6308 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
6310 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
6311 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
6312 SDValue ShiftRightLo =
6313 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
6314 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
6315 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
6316 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
6318 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
6320 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
6321 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
6323 SDValue Parts[2] = {Lo, Hi};
6324 return DAG.getMergeValues(Parts, DL);
6327 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
6330 SDValue Lo = Op.getOperand(0);
6331 SDValue Hi = Op.getOperand(1);
6332 SDValue Shamt = Op.getOperand(2);
6333 EVT VT = Lo.getValueType();
6336 // if Shamt-XLEN < 0: // Shamt < XLEN
6337 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
6338 // Hi = Hi >>s Shamt
6340 // Lo = Hi >>s (Shamt-XLEN);
6341 // Hi = Hi >>s (XLEN-1)
6344 // if Shamt-XLEN < 0: // Shamt < XLEN
6345 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
6346 // Hi = Hi >>u Shamt
6348 // Lo = Hi >>u (Shamt-XLEN);
6351 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
6353 SDValue Zero = DAG.getConstant(0, DL, VT);
6354 SDValue One = DAG.getConstant(1, DL, VT);
6355 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
6356 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
6357 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
6358 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
6360 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
6361 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
6362 SDValue ShiftLeftHi =
6363 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
6364 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
6365 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
6366 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
6368 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
6370 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
6372 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
6373 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
6375 SDValue Parts[2] = {Lo, Hi};
6376 return DAG.getMergeValues(Parts, DL);
6379 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
6380 // legal equivalently-sized i8 type, so we can use that as a go-between.
6381 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
6382 SelectionDAG &DAG) const {
6384 MVT VT = Op.getSimpleValueType();
6385 SDValue SplatVal = Op.getOperand(0);
6386 // All-zeros or all-ones splats are handled specially.
6387 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
6388 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
6389 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
6391 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
6392 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
6393 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
6395 MVT XLenVT = Subtarget.getXLenVT();
6396 assert(SplatVal.getValueType() == XLenVT &&
6397 "Unexpected type for i1 splat value");
6398 MVT InterVT = VT.changeVectorElementType(MVT::i8);
6399 SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
6400 DAG.getConstant(1, DL, XLenVT));
6401 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
6402 SDValue Zero = DAG.getConstant(0, DL, InterVT);
6403 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
6406 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
6407 // illegal (currently only vXi64 RV32).
6408 // FIXME: We could also catch non-constant sign-extended i32 values and lower
6409 // them to VMV_V_X_VL.
6410 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
6411 SelectionDAG &DAG) const {
6413 MVT VecVT = Op.getSimpleValueType();
6414 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
6415 "Unexpected SPLAT_VECTOR_PARTS lowering");
6417 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
6418 SDValue Lo = Op.getOperand(0);
6419 SDValue Hi = Op.getOperand(1);
6421 if (VecVT.isFixedLengthVector()) {
6422 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
6424 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
6427 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
6428 return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
6431 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
6432 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
6433 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
6434 // If Hi constant is all the same sign bit as Lo, lower this as a custom
6435 // node in order to try and match RVV vector/scalar instructions.
6436 if ((LoC >> 31) == HiC)
6437 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
6438 Lo, DAG.getRegister(RISCV::X0, MVT::i32));
6441 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
6442 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
6443 isa<ConstantSDNode>(Hi.getOperand(1)) &&
6444 Hi.getConstantOperandVal(1) == 31)
6445 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo,
6446 DAG.getRegister(RISCV::X0, MVT::i32));
6448 // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
6449 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT,
6450 DAG.getUNDEF(VecVT), Lo, Hi,
6451 DAG.getRegister(RISCV::X0, MVT::i32));
6454 // Custom-lower extensions from mask vectors by using a vselect either with 1
6455 // for zero/any-extension or -1 for sign-extension:
6456 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
6457 // Note that any-extension is lowered identically to zero-extension.
6458 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
6459 int64_t ExtTrueVal) const {
6461 MVT VecVT = Op.getSimpleValueType();
6462 SDValue Src = Op.getOperand(0);
6463 // Only custom-lower extensions from mask types
6464 assert(Src.getValueType().isVector() &&
6465 Src.getValueType().getVectorElementType() == MVT::i1);
6467 if (VecVT.isScalableVector()) {
6468 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
6469 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
6470 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
6473 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
6475 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
6477 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
6479 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
6481 MVT XLenVT = Subtarget.getXLenVT();
6482 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6483 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
6485 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6486 DAG.getUNDEF(ContainerVT), SplatZero, VL);
6487 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6488 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
6489 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
6490 SplatTrueVal, SplatZero, VL);
6492 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
6495 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
6496 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
6497 MVT ExtVT = Op.getSimpleValueType();
6498 // Only custom-lower extensions from fixed-length vector types.
6499 if (!ExtVT.isFixedLengthVector())
6501 MVT VT = Op.getOperand(0).getSimpleValueType();
6502 // Grab the canonical container type for the extended type. Infer the smaller
6503 // type from that to ensure the same number of vector elements, as we know
6504 // the LMUL will be sufficient to hold the smaller type.
6505 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
6506 // Get the extended container type manually to ensure the same number of
6507 // vector elements between source and dest.
6508 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
6509 ContainerExtVT.getVectorElementCount());
6512 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
6515 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6517 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
6519 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
6522 // Custom-lower truncations from vectors to mask vectors by using a mask and a
6524 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
6525 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
6526 SelectionDAG &DAG) const {
6527 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
6529 EVT MaskVT = Op.getValueType();
6530 // Only expect to custom-lower truncations to mask types
6531 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
6532 "Unexpected type for vector mask lowering");
6533 SDValue Src = Op.getOperand(0);
6534 MVT VecVT = Src.getSimpleValueType();
6537 Mask = Op.getOperand(1);
6538 VL = Op.getOperand(2);
6540 // If this is a fixed vector, we need to convert it to a scalable vector.
6541 MVT ContainerVT = VecVT;
6543 if (VecVT.isFixedLengthVector()) {
6544 ContainerVT = getContainerForFixedLengthVector(VecVT);
6545 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6547 MVT MaskContainerVT =
6548 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6549 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6554 std::tie(Mask, VL) =
6555 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
6558 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
6559 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
6561 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6562 DAG.getUNDEF(ContainerVT), SplatOne, VL);
6563 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6564 DAG.getUNDEF(ContainerVT), SplatZero, VL);
6566 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
6567 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
6568 DAG.getUNDEF(ContainerVT), Mask, VL);
6569 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
6570 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
6571 DAG.getUNDEF(MaskContainerVT), Mask, VL});
6572 if (MaskVT.isFixedLengthVector())
6573 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
6577 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
6578 SelectionDAG &DAG) const {
6579 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
6582 MVT VT = Op.getSimpleValueType();
6583 // Only custom-lower vector truncates
6584 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
6586 // Truncates to mask types are handled differently
6587 if (VT.getVectorElementType() == MVT::i1)
6588 return lowerVectorMaskTruncLike(Op, DAG);
6590 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
6591 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
6592 // truncate by one power of two at a time.
6593 MVT DstEltVT = VT.getVectorElementType();
6595 SDValue Src = Op.getOperand(0);
6596 MVT SrcVT = Src.getSimpleValueType();
6597 MVT SrcEltVT = SrcVT.getVectorElementType();
6599 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
6600 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
6601 "Unexpected vector truncate lowering");
6603 MVT ContainerVT = SrcVT;
6606 Mask = Op.getOperand(1);
6607 VL = Op.getOperand(2);
6609 if (SrcVT.isFixedLengthVector()) {
6610 ContainerVT = getContainerForFixedLengthVector(SrcVT);
6611 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6613 MVT MaskVT = getMaskTypeFor(ContainerVT);
6614 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6618 SDValue Result = Src;
6620 std::tie(Mask, VL) =
6621 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
6624 LLVMContext &Context = *DAG.getContext();
6625 const ElementCount Count = ContainerVT.getVectorElementCount();
6627 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
6628 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
6629 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
6631 } while (SrcEltVT != DstEltVT);
6633 if (SrcVT.isFixedLengthVector())
6634 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
6640 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
6641 SelectionDAG &DAG) const {
6643 SDValue Chain = Op.getOperand(0);
6644 SDValue Src = Op.getOperand(1);
6645 MVT VT = Op.getSimpleValueType();
6646 MVT SrcVT = Src.getSimpleValueType();
6647 MVT ContainerVT = VT;
6648 if (VT.isFixedLengthVector()) {
6649 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6651 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
6652 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6655 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
6657 // RVV can only widen/truncate fp to types double/half the size as the source.
6658 if ((VT.getVectorElementType() == MVT::f64 &&
6659 SrcVT.getVectorElementType() == MVT::f16) ||
6660 (VT.getVectorElementType() == MVT::f16 &&
6661 SrcVT.getVectorElementType() == MVT::f64)) {
6662 // For double rounding, the intermediate rounding should be round-to-odd.
6663 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
6664 ? RISCVISD::STRICT_FP_EXTEND_VL
6665 : RISCVISD::STRICT_VFNCVT_ROD_VL;
6666 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
6667 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
6668 Chain, Src, Mask, VL);
6669 Chain = Src.getValue(1);
6672 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
6673 ? RISCVISD::STRICT_FP_EXTEND_VL
6674 : RISCVISD::STRICT_FP_ROUND_VL;
6675 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6676 Chain, Src, Mask, VL);
6677 if (VT.isFixedLengthVector()) {
6678 // StrictFP operations have two result values. Their lowered result should
6679 // have same result count.
6680 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
6681 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
6687 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
6688 SelectionDAG &DAG) const {
6690 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
6692 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
6693 // RVV can only do truncate fp to types half the size as the source. We
6694 // custom-lower f64->f16 rounds via RVV's round-to-odd float
6695 // conversion instruction.
6697 MVT VT = Op.getSimpleValueType();
6699 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
6701 SDValue Src = Op.getOperand(0);
6702 MVT SrcVT = Src.getSimpleValueType();
6704 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
6705 SrcVT.getVectorElementType() != MVT::f16);
6706 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
6707 SrcVT.getVectorElementType() != MVT::f64);
6709 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
6711 // Prepare any fixed-length vector operands.
6712 MVT ContainerVT = VT;
6715 Mask = Op.getOperand(1);
6716 VL = Op.getOperand(2);
6718 if (VT.isFixedLengthVector()) {
6719 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6721 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
6722 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6724 MVT MaskVT = getMaskTypeFor(ContainerVT);
6725 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6730 std::tie(Mask, VL) =
6731 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
6733 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
6736 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
6737 if (VT.isFixedLengthVector())
6738 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
6742 unsigned InterConvOpc =
6743 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
6745 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
6746 SDValue IntermediateConv =
6747 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
6749 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
6750 if (VT.isFixedLengthVector())
6751 return convertFromScalableVector(VT, Result, DAG, Subtarget);
6755 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
6756 // first position of a vector, and that vector is slid up to the insert index.
6757 // By limiting the active vector length to index+1 and merging with the
6758 // original vector (with an undisturbed tail policy for elements >= VL), we
6759 // achieve the desired result of leaving all elements untouched except the one
6760 // at VL-1, which is replaced with the desired value.
6761 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6762 SelectionDAG &DAG) const {
6764 MVT VecVT = Op.getSimpleValueType();
6765 SDValue Vec = Op.getOperand(0);
6766 SDValue Val = Op.getOperand(1);
6767 SDValue Idx = Op.getOperand(2);
6769 if (VecVT.getVectorElementType() == MVT::i1) {
6770 // FIXME: For now we just promote to an i8 vector and insert into that,
6771 // but this is probably not optimal.
6772 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
6773 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
6774 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
6775 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
6778 MVT ContainerVT = VecVT;
6779 // If the operand is a fixed-length vector, convert to a scalable one.
6780 if (VecVT.isFixedLengthVector()) {
6781 ContainerVT = getContainerForFixedLengthVector(VecVT);
6782 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
6785 MVT XLenVT = Subtarget.getXLenVT();
6787 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
6788 // Even i64-element vectors on RV32 can be lowered without scalar
6789 // legalization if the most-significant 32 bits of the value are not affected
6790 // by the sign-extension of the lower 32 bits.
6791 // TODO: We could also catch sign extensions of a 32-bit value.
6792 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
6793 const auto *CVal = cast<ConstantSDNode>(Val);
6794 if (isInt<32>(CVal->getSExtValue())) {
6795 IsLegalInsert = true;
6796 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
6800 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
6804 if (IsLegalInsert) {
6806 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
6807 if (isNullConstant(Idx)) {
6808 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
6809 if (!VecVT.isFixedLengthVector())
6811 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
6813 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
6815 // On RV32, i64-element vectors must be specially handled to place the
6816 // value at element 0, by using two vslide1down instructions in sequence on
6817 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
6819 SDValue ValLo, ValHi;
6820 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
6821 MVT I32ContainerVT =
6822 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
6824 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
6825 // Limit the active VL to two.
6826 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
6827 // If the Idx is 0 we can insert directly into the vector.
6828 if (isNullConstant(Idx)) {
6829 // First slide in the lo value, then the hi in above it. We use slide1down
6830 // to avoid the register group overlap constraint of vslide1up.
6831 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
6832 Vec, Vec, ValLo, I32Mask, InsertI64VL);
6833 // If the source vector is undef don't pass along the tail elements from
6834 // the previous slide1down.
6835 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
6836 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
6837 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
6838 // Bitcast back to the right container type.
6839 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
6841 if (!VecVT.isFixedLengthVector())
6843 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
6846 // First slide in the lo value, then the hi in above it. We use slide1down
6847 // to avoid the register group overlap constraint of vslide1up.
6848 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
6849 DAG.getUNDEF(I32ContainerVT),
6850 DAG.getUNDEF(I32ContainerVT), ValLo,
6851 I32Mask, InsertI64VL);
6852 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
6853 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
6854 I32Mask, InsertI64VL);
6855 // Bitcast back to the right container type.
6856 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
6859 // Now that the value is in a vector, slide it into position.
6861 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
6863 // Use tail agnostic policy if Idx is the last index of Vec.
6864 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
6865 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
6866 cast<ConstantSDNode>(Idx)->getZExtValue() + 1 ==
6867 VecVT.getVectorNumElements())
6868 Policy = RISCVII::TAIL_AGNOSTIC;
6869 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
6870 Idx, Mask, InsertVL, Policy);
6871 if (!VecVT.isFixedLengthVector())
6873 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
6876 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
6877 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
6878 // types this is done using VMV_X_S to allow us to glean information about the
6879 // sign bits of the result.
6880 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6881 SelectionDAG &DAG) const {
6883 SDValue Idx = Op.getOperand(1);
6884 SDValue Vec = Op.getOperand(0);
6885 EVT EltVT = Op.getValueType();
6886 MVT VecVT = Vec.getSimpleValueType();
6887 MVT XLenVT = Subtarget.getXLenVT();
6889 if (VecVT.getVectorElementType() == MVT::i1) {
6890 // Use vfirst.m to extract the first bit.
6891 if (isNullConstant(Idx)) {
6892 MVT ContainerVT = VecVT;
6893 if (VecVT.isFixedLengthVector()) {
6894 ContainerVT = getContainerForFixedLengthVector(VecVT);
6895 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
6897 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
6899 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
6900 return DAG.getSetCC(DL, XLenVT, Vfirst, DAG.getConstant(0, DL, XLenVT),
6903 if (VecVT.isFixedLengthVector()) {
6904 unsigned NumElts = VecVT.getVectorNumElements();
6907 unsigned WidenVecLen;
6908 SDValue ExtractElementIdx;
6909 SDValue ExtractBitIdx;
6910 unsigned MaxEEW = Subtarget.getELEN();
6911 MVT LargestEltVT = MVT::getIntegerVT(
6912 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
6913 if (NumElts <= LargestEltVT.getSizeInBits()) {
6914 assert(isPowerOf2_32(NumElts) &&
6915 "the number of elements should be power of 2");
6916 WideEltVT = MVT::getIntegerVT(NumElts);
6918 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
6919 ExtractBitIdx = Idx;
6921 WideEltVT = LargestEltVT;
6922 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
6923 // extract element index = index / element width
6924 ExtractElementIdx = DAG.getNode(
6925 ISD::SRL, DL, XLenVT, Idx,
6926 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
6927 // mask bit index = index % element width
6928 ExtractBitIdx = DAG.getNode(
6929 ISD::AND, DL, XLenVT, Idx,
6930 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
6932 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
6933 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
6934 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
6935 Vec, ExtractElementIdx);
6936 // Extract the bit from GPR.
6937 SDValue ShiftRight =
6938 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
6939 return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
6940 DAG.getConstant(1, DL, XLenVT));
6943 // Otherwise, promote to an i8 vector and extract from that.
6944 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
6945 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
6946 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
6949 // If this is a fixed vector, we need to convert it to a scalable vector.
6950 MVT ContainerVT = VecVT;
6951 if (VecVT.isFixedLengthVector()) {
6952 ContainerVT = getContainerForFixedLengthVector(VecVT);
6953 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
6956 // If the index is 0, the vector is already in the right position.
6957 if (!isNullConstant(Idx)) {
6958 // Use a VL of 1 to avoid processing more elements than we need.
6959 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
6960 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
6961 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
6964 if (!EltVT.isInteger()) {
6965 // Floating-point extracts are handled in TableGen.
6966 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6967 DAG.getConstant(0, DL, XLenVT));
6970 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
6971 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
6974 // Some RVV intrinsics may claim that they want an integer operand to be
6975 // promoted or expanded.
6976 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
6977 const RISCVSubtarget &Subtarget) {
6978 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
6979 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
6980 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
6981 "Unexpected opcode");
6983 if (!Subtarget.hasVInstructions())
6986 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
6987 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
6988 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
6992 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
6993 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
6994 if (!II || !II->hasScalarOperand())
6997 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
6998 assert(SplatOp < Op.getNumOperands());
7000 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
7001 SDValue &ScalarOp = Operands[SplatOp];
7002 MVT OpVT = ScalarOp.getSimpleValueType();
7003 MVT XLenVT = Subtarget.getXLenVT();
7005 // If this isn't a scalar, or its type is XLenVT we're done.
7006 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
7009 // Simplest case is that the operand needs to be promoted to XLenVT.
7010 if (OpVT.bitsLT(XLenVT)) {
7011 // If the operand is a constant, sign extend to increase our chances
7012 // of being able to use a .vi instruction. ANY_EXTEND would become a
7013 // a zero extend and the simm5 check in isel would fail.
7014 // FIXME: Should we ignore the upper bits in isel instead?
7016 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
7017 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
7018 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7021 // Use the previous operand to get the vXi64 VT. The result might be a mask
7022 // VT for compares. Using the previous operand assumes that the previous
7023 // operand will never have a smaller element size than a scalar operand and
7024 // that a widening operation never uses SEW=64.
7025 // NOTE: If this fails the below assert, we can probably just find the
7026 // element count from any operand or result and use it to construct the VT.
7027 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
7028 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
7030 // The more complex case is when the scalar is larger than XLenVT.
7031 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
7032 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
7034 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
7035 // instruction to sign-extend since SEW>XLEN.
7036 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
7037 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
7038 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7042 case Intrinsic::riscv_vslide1up:
7043 case Intrinsic::riscv_vslide1down:
7044 case Intrinsic::riscv_vslide1up_mask:
7045 case Intrinsic::riscv_vslide1down_mask: {
7046 // We need to special case these when the scalar is larger than XLen.
7047 unsigned NumOps = Op.getNumOperands();
7048 bool IsMasked = NumOps == 7;
7050 // Convert the vector source to the equivalent nxvXi32 vector.
7051 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
7052 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
7053 SDValue ScalarLo, ScalarHi;
7054 std::tie(ScalarLo, ScalarHi) =
7055 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
7057 // Double the VL since we halved SEW.
7058 SDValue AVL = getVLOperand(Op);
7061 // Optimize for constant AVL
7062 if (isa<ConstantSDNode>(AVL)) {
7063 unsigned EltSize = VT.getScalarSizeInBits();
7064 unsigned MinSize = VT.getSizeInBits().getKnownMinValue();
7066 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
7068 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
7070 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
7072 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
7074 uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
7075 if (AVLInt <= MinVLMAX) {
7076 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
7077 } else if (AVLInt >= 2 * MaxVLMAX) {
7078 // Just set vl to VLMAX in this situation
7079 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);
7080 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
7081 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
7082 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
7083 SDValue SETVLMAX = DAG.getTargetConstant(
7084 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
7085 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
7088 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
7089 // is related to the hardware implementation.
7090 // So let the following code handle
7094 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
7095 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
7096 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
7097 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
7099 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
7100 // Using vsetvli instruction to get actually used length which related to
7101 // the hardware implementation
7102 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
7105 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
7108 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
7110 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
7114 Passthru = DAG.getUNDEF(I32VT);
7116 Passthru = DAG.getBitcast(I32VT, Operands[1]);
7118 if (IntNo == Intrinsic::riscv_vslide1up ||
7119 IntNo == Intrinsic::riscv_vslide1up_mask) {
7120 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
7121 ScalarHi, I32Mask, I32VL);
7122 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
7123 ScalarLo, I32Mask, I32VL);
7125 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
7126 ScalarLo, I32Mask, I32VL);
7127 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
7128 ScalarHi, I32Mask, I32VL);
7131 // Convert back to nxvXi64.
7132 Vec = DAG.getBitcast(VT, Vec);
7136 // Apply mask after the operation.
7137 SDValue Mask = Operands[NumOps - 3];
7138 SDValue MaskedOff = Operands[1];
7139 // Assume Policy operand is the last operand.
7141 cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue();
7142 // We don't need to select maskedoff if it's undef.
7143 if (MaskedOff.isUndef())
7146 if (Policy == RISCVII::TAIL_AGNOSTIC)
7147 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff,
7149 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
7150 // It's fine because vmerge does not care mask policy.
7151 return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff,
7156 // We need to convert the scalar to a splat vector.
7157 SDValue VL = getVLOperand(Op);
7158 assert(VL.getValueType() == XLenVT);
7159 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
7160 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7163 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
7164 // scalable vector llvm.get.vector.length for now.
7166 // We need to convert from a scalable VF to a vsetvli with VLMax equal to
7167 // (vscale * VF). The vscale and VF are independent of element width. We use
7168 // SEW=8 for the vsetvli because it is the only element width that supports all
7169 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
7170 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
7171 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
7172 // SEW and LMUL are better for the surrounding vector instructions.
7173 static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
7174 const RISCVSubtarget &Subtarget) {
7175 MVT XLenVT = Subtarget.getXLenVT();
7177 // The smallest LMUL is only valid for the smallest element width.
7178 const unsigned ElementWidth = 8;
7180 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
7181 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
7182 // We don't support VF==1 with ELEN==32.
7183 unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
7185 unsigned VF = N->getConstantOperandVal(2);
7186 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
7190 bool Fractional = VF < LMul1VF;
7191 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
7192 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
7193 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
7197 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
7198 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
7200 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
7202 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
7203 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
7206 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
7207 SelectionDAG &DAG) const {
7208 unsigned IntNo = Op.getConstantOperandVal(0);
7210 MVT XLenVT = Subtarget.getXLenVT();
7214 break; // Don't custom lower most intrinsics.
7215 case Intrinsic::thread_pointer: {
7216 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7217 return DAG.getRegister(RISCV::X4, PtrVT);
7219 case Intrinsic::riscv_orc_b:
7220 case Intrinsic::riscv_brev8:
7221 case Intrinsic::riscv_sha256sig0:
7222 case Intrinsic::riscv_sha256sig1:
7223 case Intrinsic::riscv_sha256sum0:
7224 case Intrinsic::riscv_sha256sum1:
7225 case Intrinsic::riscv_sm3p0:
7226 case Intrinsic::riscv_sm3p1: {
7229 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
7230 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
7231 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
7232 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
7233 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
7234 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
7235 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
7236 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
7239 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
7241 case Intrinsic::riscv_sm4ks:
7242 case Intrinsic::riscv_sm4ed: {
7244 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
7245 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
7248 case Intrinsic::riscv_zip:
7249 case Intrinsic::riscv_unzip: {
7251 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
7252 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
7254 case Intrinsic::riscv_clmul:
7255 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
7257 case Intrinsic::riscv_clmulh:
7258 return DAG.getNode(RISCVISD::CLMULH, DL, XLenVT, Op.getOperand(1),
7260 case Intrinsic::riscv_clmulr:
7261 return DAG.getNode(RISCVISD::CLMULR, DL, XLenVT, Op.getOperand(1),
7263 case Intrinsic::experimental_get_vector_length:
7264 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
7265 case Intrinsic::riscv_vmv_x_s:
7266 assert(Op.getValueType() == XLenVT && "Unexpected VT!");
7267 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
7269 case Intrinsic::riscv_vfmv_f_s:
7270 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
7271 Op.getOperand(1), DAG.getConstant(0, DL, XLenVT));
7272 case Intrinsic::riscv_vmv_v_x:
7273 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
7274 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
7276 case Intrinsic::riscv_vfmv_v_f:
7277 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
7278 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
7279 case Intrinsic::riscv_vmv_s_x: {
7280 SDValue Scalar = Op.getOperand(2);
7282 if (Scalar.getValueType().bitsLE(XLenVT)) {
7283 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
7284 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
7285 Op.getOperand(1), Scalar, Op.getOperand(3));
7288 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
7290 // This is an i64 value that lives in two scalar registers. We have to
7291 // insert this in a convoluted way. First we build vXi64 splat containing
7292 // the two values that we assemble using some bit math. Next we'll use
7293 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
7294 // to merge element 0 from our splat into the source vector.
7295 // FIXME: This is probably not the best way to do this, but it is
7296 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
7303 // vmseq.vx mMask, vVid, 0
7304 // vmerge.vvm vDest, vSrc, vVal, mMask
7305 MVT VT = Op.getSimpleValueType();
7306 SDValue Vec = Op.getOperand(1);
7307 SDValue VL = getVLOperand(Op);
7309 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
7310 if (Op.getOperand(1).isUndef())
7312 SDValue SplattedIdx =
7313 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
7314 DAG.getConstant(0, DL, MVT::i32), VL);
7316 MVT MaskVT = getMaskTypeFor(VT);
7317 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
7318 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
7319 SDValue SelectCond =
7320 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
7321 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
7322 DAG.getUNDEF(MaskVT), Mask, VL});
7323 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
7328 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
7331 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
7332 SelectionDAG &DAG) const {
7333 unsigned IntNo = Op.getConstantOperandVal(1);
7337 case Intrinsic::riscv_masked_strided_load: {
7339 MVT XLenVT = Subtarget.getXLenVT();
7341 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
7342 // the selection of the masked intrinsics doesn't do this for us.
7343 SDValue Mask = Op.getOperand(5);
7344 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
7346 MVT VT = Op->getSimpleValueType(0);
7347 MVT ContainerVT = VT;
7348 if (VT.isFixedLengthVector())
7349 ContainerVT = getContainerForFixedLengthVector(VT);
7351 SDValue PassThru = Op.getOperand(2);
7353 MVT MaskVT = getMaskTypeFor(ContainerVT);
7354 if (VT.isFixedLengthVector()) {
7355 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7356 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
7360 auto *Load = cast<MemIntrinsicSDNode>(Op);
7361 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7362 SDValue Ptr = Op.getOperand(3);
7363 SDValue Stride = Op.getOperand(4);
7364 SDValue Result, Chain;
7366 // TODO: We restrict this to unmasked loads currently in consideration of
7367 // the complexity of hanlding all falses masks.
7368 if (IsUnmasked && isNullConstant(Stride)) {
7369 MVT ScalarVT = ContainerVT.getVectorElementType();
7370 SDValue ScalarLoad =
7371 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
7372 ScalarVT, Load->getMemOperand());
7373 Chain = ScalarLoad.getValue(1);
7374 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
7377 SDValue IntID = DAG.getTargetConstant(
7378 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
7381 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
7383 Ops.push_back(DAG.getUNDEF(ContainerVT));
7385 Ops.push_back(PassThru);
7387 Ops.push_back(Stride);
7389 Ops.push_back(Mask);
7393 DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
7394 Ops.push_back(Policy);
7397 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
7399 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
7400 Load->getMemoryVT(), Load->getMemOperand());
7401 Chain = Result.getValue(1);
7403 if (VT.isFixedLengthVector())
7404 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
7405 return DAG.getMergeValues({Result, Chain}, DL);
7407 case Intrinsic::riscv_seg2_load:
7408 case Intrinsic::riscv_seg3_load:
7409 case Intrinsic::riscv_seg4_load:
7410 case Intrinsic::riscv_seg5_load:
7411 case Intrinsic::riscv_seg6_load:
7412 case Intrinsic::riscv_seg7_load:
7413 case Intrinsic::riscv_seg8_load: {
7415 static const Intrinsic::ID VlsegInts[7] = {
7416 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
7417 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
7418 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
7419 Intrinsic::riscv_vlseg8};
7420 unsigned NF = Op->getNumValues() - 1;
7421 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
7422 MVT XLenVT = Subtarget.getXLenVT();
7423 MVT VT = Op->getSimpleValueType(0);
7424 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7426 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
7427 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
7428 auto *Load = cast<MemIntrinsicSDNode>(Op);
7429 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
7430 ContainerVTs.push_back(MVT::Other);
7431 SDVTList VTs = DAG.getVTList(ContainerVTs);
7432 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
7433 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
7434 Ops.push_back(Op.getOperand(2));
7437 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
7438 Load->getMemoryVT(), Load->getMemOperand());
7439 SmallVector<SDValue, 9> Results;
7440 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
7441 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
7443 Results.push_back(Result.getValue(NF));
7444 return DAG.getMergeValues(Results, DL);
7448 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
7451 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
7452 SelectionDAG &DAG) const {
7453 unsigned IntNo = Op.getConstantOperandVal(1);
7457 case Intrinsic::riscv_masked_strided_store: {
7459 MVT XLenVT = Subtarget.getXLenVT();
7461 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
7462 // the selection of the masked intrinsics doesn't do this for us.
7463 SDValue Mask = Op.getOperand(5);
7464 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
7466 SDValue Val = Op.getOperand(2);
7467 MVT VT = Val.getSimpleValueType();
7468 MVT ContainerVT = VT;
7469 if (VT.isFixedLengthVector()) {
7470 ContainerVT = getContainerForFixedLengthVector(VT);
7471 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
7474 MVT MaskVT = getMaskTypeFor(ContainerVT);
7475 if (VT.isFixedLengthVector())
7476 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7479 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7481 SDValue IntID = DAG.getTargetConstant(
7482 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
7485 auto *Store = cast<MemIntrinsicSDNode>(Op);
7486 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
7488 Ops.push_back(Op.getOperand(3)); // Ptr
7489 Ops.push_back(Op.getOperand(4)); // Stride
7491 Ops.push_back(Mask);
7494 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
7495 Ops, Store->getMemoryVT(),
7496 Store->getMemOperand());
7498 case Intrinsic::riscv_seg2_store:
7499 case Intrinsic::riscv_seg3_store:
7500 case Intrinsic::riscv_seg4_store:
7501 case Intrinsic::riscv_seg5_store:
7502 case Intrinsic::riscv_seg6_store:
7503 case Intrinsic::riscv_seg7_store:
7504 case Intrinsic::riscv_seg8_store: {
7506 static const Intrinsic::ID VssegInts[] = {
7507 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
7508 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
7509 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
7510 Intrinsic::riscv_vsseg8};
7511 // Operands are (chain, int_id, vec*, ptr, vl)
7512 unsigned NF = Op->getNumOperands() - 4;
7513 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
7514 MVT XLenVT = Subtarget.getXLenVT();
7515 MVT VT = Op->getOperand(2).getSimpleValueType();
7516 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7518 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
7519 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
7520 SDValue Ptr = Op->getOperand(NF + 2);
7522 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
7523 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
7524 for (unsigned i = 0; i < NF; i++)
7525 Ops.push_back(convertToScalableVector(
7526 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
7527 Ops.append({Ptr, VL});
7529 return DAG.getMemIntrinsicNode(
7530 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
7531 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
7535 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
7538 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
7539 switch (ISDOpcode) {
7541 llvm_unreachable("Unhandled reduction");
7542 case ISD::VECREDUCE_ADD:
7543 return RISCVISD::VECREDUCE_ADD_VL;
7544 case ISD::VECREDUCE_UMAX:
7545 return RISCVISD::VECREDUCE_UMAX_VL;
7546 case ISD::VECREDUCE_SMAX:
7547 return RISCVISD::VECREDUCE_SMAX_VL;
7548 case ISD::VECREDUCE_UMIN:
7549 return RISCVISD::VECREDUCE_UMIN_VL;
7550 case ISD::VECREDUCE_SMIN:
7551 return RISCVISD::VECREDUCE_SMIN_VL;
7552 case ISD::VECREDUCE_AND:
7553 return RISCVISD::VECREDUCE_AND_VL;
7554 case ISD::VECREDUCE_OR:
7555 return RISCVISD::VECREDUCE_OR_VL;
7556 case ISD::VECREDUCE_XOR:
7557 return RISCVISD::VECREDUCE_XOR_VL;
7561 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
7565 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
7566 MVT VecVT = Vec.getSimpleValueType();
7567 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
7568 Op.getOpcode() == ISD::VECREDUCE_OR ||
7569 Op.getOpcode() == ISD::VECREDUCE_XOR ||
7570 Op.getOpcode() == ISD::VP_REDUCE_AND ||
7571 Op.getOpcode() == ISD::VP_REDUCE_OR ||
7572 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
7573 "Unexpected reduction lowering");
7575 MVT XLenVT = Subtarget.getXLenVT();
7576 assert(Op.getValueType() == XLenVT &&
7577 "Expected reduction output to be legalized to XLenVT");
7579 MVT ContainerVT = VecVT;
7580 if (VecVT.isFixedLengthVector()) {
7581 ContainerVT = getContainerForFixedLengthVector(VecVT);
7582 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7587 Mask = Op.getOperand(2);
7588 VL = Op.getOperand(3);
7590 std::tie(Mask, VL) =
7591 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7596 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7598 switch (Op.getOpcode()) {
7600 llvm_unreachable("Unhandled reduction");
7601 case ISD::VECREDUCE_AND:
7602 case ISD::VP_REDUCE_AND: {
7604 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
7605 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
7606 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
7611 case ISD::VECREDUCE_OR:
7612 case ISD::VP_REDUCE_OR:
7614 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
7618 case ISD::VECREDUCE_XOR:
7619 case ISD::VP_REDUCE_XOR: {
7620 // ((vcpop x) & 1) != 0
7621 SDValue One = DAG.getConstant(1, DL, XLenVT);
7622 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
7623 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
7630 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
7635 // Now include the start value in the operation.
7636 // Note that we must return the start value when no elements are operated
7637 // upon. The vcpop instructions we've emitted in each case above will return
7638 // 0 for an inactive vector, and so we've already received the neutral value:
7639 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
7640 // can simply include the start value.
7641 return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
7644 static bool isNonZeroAVL(SDValue AVL) {
7645 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
7646 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
7647 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
7648 (ImmAVL && ImmAVL->getZExtValue() >= 1);
7651 /// Helper to lower a reduction sequence of the form:
7652 /// scalar = reduce_op vec, scalar_start
7653 static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
7654 SDValue StartValue, SDValue Vec, SDValue Mask,
7655 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
7656 const RISCVSubtarget &Subtarget) {
7657 const MVT VecVT = Vec.getSimpleValueType();
7658 const MVT M1VT = getLMUL1VT(VecVT);
7659 const MVT XLenVT = Subtarget.getXLenVT();
7660 const bool NonZeroAVL = isNonZeroAVL(VL);
7662 // The reduction needs an LMUL1 input; do the splat at either LMUL1
7663 // or the original VT if fractional.
7664 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
7665 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
7666 // prove it is non-zero. For the AVL=0 case, we need the scalar to
7667 // be the result of the reduction operation.
7668 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
7669 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
7671 if (M1VT != InnerVT)
7672 InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT,
7674 InitialValue, DAG.getConstant(0, DL, XLenVT));
7675 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
7676 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
7677 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
7678 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
7679 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
7680 DAG.getConstant(0, DL, XLenVT));
7683 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
7684 SelectionDAG &DAG) const {
7686 SDValue Vec = Op.getOperand(0);
7687 EVT VecEVT = Vec.getValueType();
7689 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
7691 // Due to ordering in legalize types we may have a vector type that needs to
7692 // be split. Do that manually so we can get down to a legal type.
7693 while (getTypeAction(*DAG.getContext(), VecEVT) ==
7694 TargetLowering::TypeSplitVector) {
7695 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
7696 VecEVT = Lo.getValueType();
7697 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
7700 // TODO: The type may need to be widened rather than split. Or widened before
7702 if (!isTypeLegal(VecEVT))
7705 MVT VecVT = VecEVT.getSimpleVT();
7706 MVT VecEltVT = VecVT.getVectorElementType();
7707 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
7709 MVT ContainerVT = VecVT;
7710 if (VecVT.isFixedLengthVector()) {
7711 ContainerVT = getContainerForFixedLengthVector(VecVT);
7712 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7715 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7717 SDValue NeutralElem =
7718 DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
7719 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), NeutralElem, Vec,
7720 Mask, VL, DL, DAG, Subtarget);
7723 // Given a reduction op, this function returns the matching reduction opcode,
7724 // the vector SDValue and the scalar SDValue required to lower this to a
7726 static std::tuple<unsigned, SDValue, SDValue>
7727 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
7729 auto Flags = Op->getFlags();
7730 unsigned Opcode = Op.getOpcode();
7731 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
7734 llvm_unreachable("Unhandled reduction");
7735 case ISD::VECREDUCE_FADD: {
7736 // Use positive zero if we can. It is cheaper to materialize.
7738 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
7739 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
7741 case ISD::VECREDUCE_SEQ_FADD:
7742 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
7744 case ISD::VECREDUCE_FMIN:
7745 return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
7746 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
7747 case ISD::VECREDUCE_FMAX:
7748 return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
7749 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
7753 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
7754 SelectionDAG &DAG) const {
7756 MVT VecEltVT = Op.getSimpleValueType();
7759 SDValue VectorVal, ScalarVal;
7760 std::tie(RVVOpcode, VectorVal, ScalarVal) =
7761 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
7762 MVT VecVT = VectorVal.getSimpleValueType();
7764 MVT ContainerVT = VecVT;
7765 if (VecVT.isFixedLengthVector()) {
7766 ContainerVT = getContainerForFixedLengthVector(VecVT);
7767 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
7770 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7771 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal,
7772 VectorVal, Mask, VL, DL, DAG, Subtarget);
7775 static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
7776 switch (ISDOpcode) {
7778 llvm_unreachable("Unhandled reduction");
7779 case ISD::VP_REDUCE_ADD:
7780 return RISCVISD::VECREDUCE_ADD_VL;
7781 case ISD::VP_REDUCE_UMAX:
7782 return RISCVISD::VECREDUCE_UMAX_VL;
7783 case ISD::VP_REDUCE_SMAX:
7784 return RISCVISD::VECREDUCE_SMAX_VL;
7785 case ISD::VP_REDUCE_UMIN:
7786 return RISCVISD::VECREDUCE_UMIN_VL;
7787 case ISD::VP_REDUCE_SMIN:
7788 return RISCVISD::VECREDUCE_SMIN_VL;
7789 case ISD::VP_REDUCE_AND:
7790 return RISCVISD::VECREDUCE_AND_VL;
7791 case ISD::VP_REDUCE_OR:
7792 return RISCVISD::VECREDUCE_OR_VL;
7793 case ISD::VP_REDUCE_XOR:
7794 return RISCVISD::VECREDUCE_XOR_VL;
7795 case ISD::VP_REDUCE_FADD:
7796 return RISCVISD::VECREDUCE_FADD_VL;
7797 case ISD::VP_REDUCE_SEQ_FADD:
7798 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
7799 case ISD::VP_REDUCE_FMAX:
7800 return RISCVISD::VECREDUCE_FMAX_VL;
7801 case ISD::VP_REDUCE_FMIN:
7802 return RISCVISD::VECREDUCE_FMIN_VL;
7806 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
7807 SelectionDAG &DAG) const {
7809 SDValue Vec = Op.getOperand(1);
7810 EVT VecEVT = Vec.getValueType();
7812 // TODO: The type may need to be widened rather than split. Or widened before
7814 if (!isTypeLegal(VecEVT))
7817 MVT VecVT = VecEVT.getSimpleVT();
7818 unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
7820 if (VecVT.isFixedLengthVector()) {
7821 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
7822 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7825 SDValue VL = Op.getOperand(3);
7826 SDValue Mask = Op.getOperand(2);
7827 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
7828 Vec, Mask, VL, DL, DAG, Subtarget);
7831 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
7832 SelectionDAG &DAG) const {
7833 SDValue Vec = Op.getOperand(0);
7834 SDValue SubVec = Op.getOperand(1);
7835 MVT VecVT = Vec.getSimpleValueType();
7836 MVT SubVecVT = SubVec.getSimpleValueType();
7839 MVT XLenVT = Subtarget.getXLenVT();
7840 unsigned OrigIdx = Op.getConstantOperandVal(2);
7841 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
7843 // We don't have the ability to slide mask vectors up indexed by their i1
7844 // elements; the smallest we can do is i8. Often we are able to bitcast to
7845 // equivalent i8 vectors. Note that when inserting a fixed-length vector
7846 // into a scalable one, we might not necessarily have enough scalable
7847 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
7848 if (SubVecVT.getVectorElementType() == MVT::i1 &&
7849 (OrigIdx != 0 || !Vec.isUndef())) {
7850 if (VecVT.getVectorMinNumElements() >= 8 &&
7851 SubVecVT.getVectorMinNumElements() >= 8) {
7852 assert(OrigIdx % 8 == 0 && "Invalid index");
7853 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
7854 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
7855 "Unexpected mask vector lowering");
7858 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
7859 SubVecVT.isScalableVector());
7860 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
7861 VecVT.isScalableVector());
7862 Vec = DAG.getBitcast(VecVT, Vec);
7863 SubVec = DAG.getBitcast(SubVecVT, SubVec);
7865 // We can't slide this mask vector up indexed by its i1 elements.
7866 // This poses a problem when we wish to insert a scalable vector which
7867 // can't be re-expressed as a larger type. Just choose the slow path and
7868 // extend to a larger type, then truncate back down.
7869 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
7870 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
7871 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
7872 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
7873 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
7875 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
7876 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
7880 // If the subvector vector is a fixed-length type, we cannot use subregister
7881 // manipulation to simplify the codegen; we don't know which register of a
7882 // LMUL group contains the specific subvector as we only know the minimum
7883 // register size. Therefore we must slide the vector group up the full
7885 if (SubVecVT.isFixedLengthVector()) {
7886 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
7888 MVT ContainerVT = VecVT;
7889 if (VecVT.isFixedLengthVector()) {
7890 ContainerVT = getContainerForFixedLengthVector(VecVT);
7891 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7893 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
7894 DAG.getUNDEF(ContainerVT), SubVec,
7895 DAG.getConstant(0, DL, XLenVT));
7896 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
7897 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
7898 return DAG.getBitcast(Op.getValueType(), SubVec);
7901 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
7902 // Set the vector length to only the number of elements we care about. Note
7903 // that for slideup this includes the offset.
7904 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
7905 SDValue VL = getVLOp(EndIndex, DL, DAG, Subtarget);
7907 // Use tail agnostic policy if we're inserting over Vec's tail.
7908 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
7909 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
7910 Policy = RISCVII::TAIL_AGNOSTIC;
7912 // If we're inserting into the lowest elements, use a tail undisturbed
7916 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
7918 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
7919 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
7920 SlideupAmt, Mask, VL, Policy);
7923 if (VecVT.isFixedLengthVector())
7924 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
7925 return DAG.getBitcast(Op.getValueType(), SubVec);
7928 unsigned SubRegIdx, RemIdx;
7929 std::tie(SubRegIdx, RemIdx) =
7930 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
7931 VecVT, SubVecVT, OrigIdx, TRI);
7933 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
7934 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
7935 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
7936 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
7938 // 1. If the Idx has been completely eliminated and this subvector's size is
7939 // a vector register or a multiple thereof, or the surrounding elements are
7940 // undef, then this is a subvector insert which naturally aligns to a vector
7941 // register. These can easily be handled using subregister manipulation.
7942 // 2. If the subvector is smaller than a vector register, then the insertion
7943 // must preserve the undisturbed elements of the register. We do this by
7944 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
7945 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
7946 // subvector within the vector register, and an INSERT_SUBVECTOR of that
7947 // LMUL=1 type back into the larger vector (resolving to another subregister
7948 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
7949 // to avoid allocating a large register group to hold our subvector.
7950 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
7953 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
7954 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
7955 // (in our case undisturbed). This means we can set up a subvector insertion
7956 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
7957 // size of the subvector.
7958 MVT InterSubVT = VecVT;
7959 SDValue AlignedExtract = Vec;
7960 unsigned AlignedIdx = OrigIdx - RemIdx;
7961 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
7962 InterSubVT = getLMUL1VT(VecVT);
7963 // Extract a subvector equal to the nearest full vector register type. This
7964 // should resolve to a EXTRACT_SUBREG instruction.
7965 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
7966 DAG.getConstant(AlignedIdx, DL, XLenVT));
7969 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
7970 DAG.getUNDEF(InterSubVT), SubVec,
7971 DAG.getConstant(0, DL, XLenVT));
7973 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
7975 VL = computeVLMax(SubVecVT, DL, DAG);
7977 // If we're inserting into the lowest elements, use a tail undisturbed
7980 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
7983 SDValue SlideupAmt =
7984 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
7986 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
7987 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
7989 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
7990 SlideupAmt, Mask, VL);
7993 // If required, insert this subvector back into the correct vector register.
7994 // This should resolve to an INSERT_SUBREG instruction.
7995 if (VecVT.bitsGT(InterSubVT))
7996 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
7997 DAG.getConstant(AlignedIdx, DL, XLenVT));
7999 // We might have bitcast from a mask type: cast back to the original type if
8001 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
8004 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
8005 SelectionDAG &DAG) const {
8006 SDValue Vec = Op.getOperand(0);
8007 MVT SubVecVT = Op.getSimpleValueType();
8008 MVT VecVT = Vec.getSimpleValueType();
8011 MVT XLenVT = Subtarget.getXLenVT();
8012 unsigned OrigIdx = Op.getConstantOperandVal(1);
8013 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
8015 // We don't have the ability to slide mask vectors down indexed by their i1
8016 // elements; the smallest we can do is i8. Often we are able to bitcast to
8017 // equivalent i8 vectors. Note that when extracting a fixed-length vector
8018 // from a scalable one, we might not necessarily have enough scalable
8019 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
8020 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
8021 if (VecVT.getVectorMinNumElements() >= 8 &&
8022 SubVecVT.getVectorMinNumElements() >= 8) {
8023 assert(OrigIdx % 8 == 0 && "Invalid index");
8024 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
8025 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
8026 "Unexpected mask vector lowering");
8029 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
8030 SubVecVT.isScalableVector());
8031 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
8032 VecVT.isScalableVector());
8033 Vec = DAG.getBitcast(VecVT, Vec);
8035 // We can't slide this mask vector down, indexed by its i1 elements.
8036 // This poses a problem when we wish to extract a scalable vector which
8037 // can't be re-expressed as a larger type. Just choose the slow path and
8038 // extend to a larger type, then truncate back down.
8039 // TODO: We could probably improve this when extracting certain fixed
8040 // from fixed, where we can extract as i8 and shift the correct element
8041 // right to reach the desired subvector?
8042 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
8043 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
8044 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
8045 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
8047 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
8048 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
8052 // If the subvector vector is a fixed-length type, we cannot use subregister
8053 // manipulation to simplify the codegen; we don't know which register of a
8054 // LMUL group contains the specific subvector as we only know the minimum
8055 // register size. Therefore we must slide the vector group down the full
8057 if (SubVecVT.isFixedLengthVector()) {
8058 // With an index of 0 this is a cast-like subvector, which can be performed
8059 // with subregister operations.
8062 MVT ContainerVT = VecVT;
8063 if (VecVT.isFixedLengthVector()) {
8064 ContainerVT = getContainerForFixedLengthVector(VecVT);
8065 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8068 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
8069 // Set the vector length to only the number of elements we care about. This
8070 // avoids sliding down elements we're going to discard straight away.
8071 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), DL, DAG, Subtarget);
8072 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
8074 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8075 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
8076 // Now we can use a cast-like subvector extract to get the result.
8077 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
8078 DAG.getConstant(0, DL, XLenVT));
8079 return DAG.getBitcast(Op.getValueType(), Slidedown);
8082 unsigned SubRegIdx, RemIdx;
8083 std::tie(SubRegIdx, RemIdx) =
8084 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
8085 VecVT, SubVecVT, OrigIdx, TRI);
8087 // If the Idx has been completely eliminated then this is a subvector extract
8088 // which naturally aligns to a vector register. These can easily be handled
8089 // using subregister manipulation.
8093 // Else we must shift our vector register directly to extract the subvector.
8094 // Do this using VSLIDEDOWN.
8096 // If the vector type is an LMUL-group type, extract a subvector equal to the
8097 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
8099 MVT InterSubVT = VecVT;
8100 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
8101 InterSubVT = getLMUL1VT(VecVT);
8102 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
8103 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
8106 // Slide this vector register down by the desired number of elements in order
8107 // to place the desired subvector starting at element 0.
8108 SDValue SlidedownAmt =
8109 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
8111 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
8113 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
8114 Vec, SlidedownAmt, Mask, VL);
8116 // Now the vector is in the right position, extract our final subvector. This
8117 // should resolve to a COPY.
8118 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
8119 DAG.getConstant(0, DL, XLenVT));
8121 // We might have bitcast from a mask type: cast back to the original type if
8123 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
8126 // Widen a vector's operands to i8, then truncate its results back to the
8127 // original type, typically i1. All operand and result types must be the same.
8128 static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,
8129 SelectionDAG &DAG) {
8130 MVT VT = N.getSimpleValueType();
8131 MVT WideVT = VT.changeVectorElementType(MVT::i8);
8132 SmallVector<SDValue, 4> WideOps;
8133 for (SDValue Op : N->ops()) {
8134 assert(Op.getSimpleValueType() == VT &&
8135 "Operands and result must be same type");
8136 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
8139 unsigned NumVals = N->getNumValues();
8141 SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(
8142 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
8143 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
8144 SmallVector<SDValue, 4> TruncVals;
8145 for (unsigned I = 0; I < NumVals; I++) {
8146 TruncVals.push_back(
8147 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
8148 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
8151 if (TruncVals.size() > 1)
8152 return DAG.getMergeValues(TruncVals, DL);
8153 return TruncVals.front();
8156 SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
8157 SelectionDAG &DAG) const {
8159 MVT VecVT = Op.getSimpleValueType();
8160 MVT XLenVT = Subtarget.getXLenVT();
8162 assert(VecVT.isScalableVector() &&
8163 "vector_interleave on non-scalable vector!");
8165 // 1 bit element vectors need to be widened to e8
8166 if (VecVT.getVectorElementType() == MVT::i1)
8167 return widenVectorOpsToi8(Op, DL, DAG);
8169 // If the VT is LMUL=8, we need to split and reassemble.
8170 if (VecVT.getSizeInBits().getKnownMinValue() ==
8171 (8 * RISCV::RVVBitsPerBlock)) {
8172 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
8173 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
8174 EVT SplitVT = Op0Lo.getValueType();
8176 SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
8177 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
8178 SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
8179 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
8181 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
8182 ResLo.getValue(0), ResHi.getValue(0));
8183 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
8185 return DAG.getMergeValues({Even, Odd}, DL);
8188 // Concatenate the two vectors as one vector to deinterleave
8190 MVT::getVectorVT(VecVT.getVectorElementType(),
8191 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
8192 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
8193 Op.getOperand(0), Op.getOperand(1));
8195 // We want to operate on all lanes, so get the mask and VL and mask for it
8196 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
8197 SDValue Passthru = DAG.getUNDEF(ConcatVT);
8199 // We can deinterleave through vnsrl.wi if the element type is smaller than
8201 if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) {
8203 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
8205 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
8206 return DAG.getMergeValues({Even, Odd}, DL);
8209 // For the indices, use the same SEW to avoid an extra vsetvli
8210 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
8211 // Create a vector of even indices {0, 2, 4, ...}
8213 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
8214 // Create a vector of odd indices {1, 3, 5, ... }
8216 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
8218 // Gather the even and odd elements into two separate vectors
8219 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
8220 Concat, EvenIdx, Passthru, Mask, VL);
8221 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
8222 Concat, OddIdx, Passthru, Mask, VL);
8224 // Extract the result half of the gather for even and odd
8225 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
8226 DAG.getConstant(0, DL, XLenVT));
8227 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
8228 DAG.getConstant(0, DL, XLenVT));
8230 return DAG.getMergeValues({Even, Odd}, DL);
8233 SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
8234 SelectionDAG &DAG) const {
8236 MVT VecVT = Op.getSimpleValueType();
8238 assert(VecVT.isScalableVector() &&
8239 "vector_interleave on non-scalable vector!");
8241 // i1 vectors need to be widened to i8
8242 if (VecVT.getVectorElementType() == MVT::i1)
8243 return widenVectorOpsToi8(Op, DL, DAG);
8245 MVT XLenVT = Subtarget.getXLenVT();
8246 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
8248 // If the VT is LMUL=8, we need to split and reassemble.
8249 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
8250 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
8251 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
8252 EVT SplitVT = Op0Lo.getValueType();
8254 SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
8255 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
8256 SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
8257 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
8259 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
8260 ResLo.getValue(0), ResLo.getValue(1));
8261 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
8262 ResHi.getValue(0), ResHi.getValue(1));
8263 return DAG.getMergeValues({Lo, Hi}, DL);
8266 SDValue Interleaved;
8268 // If the element type is smaller than ELEN, then we can interleave with
8269 // vwaddu.vv and vwmaccu.vx
8270 if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) {
8271 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
8274 // Otherwise, fallback to using vrgathere16.vv
8276 MVT::getVectorVT(VecVT.getVectorElementType(),
8277 VecVT.getVectorElementCount().multiplyCoefficientBy(2));
8278 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
8279 Op.getOperand(0), Op.getOperand(1));
8281 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
8283 // 0 1 2 3 4 5 6 7 ...
8284 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
8286 // 1 1 1 1 1 1 1 1 ...
8287 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
8289 // 1 0 1 0 1 0 1 0 ...
8290 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
8291 OddMask = DAG.getSetCC(
8292 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
8293 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
8294 ISD::CondCode::SETNE);
8296 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
8298 // Build up the index vector for interleaving the concatenated vector
8299 // 0 0 1 1 2 2 3 3 ...
8300 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
8301 // 0 n 1 n+1 2 n+2 3 n+3 ...
8303 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
8305 // Then perform the interleave
8306 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
8307 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
8308 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
8309 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
8312 // Extract the two halves from the interleaved result
8313 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
8314 DAG.getVectorIdxConstant(0, DL));
8315 SDValue Hi = DAG.getNode(
8316 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
8317 DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL));
8319 return DAG.getMergeValues({Lo, Hi}, DL);
8322 // Lower step_vector to the vid instruction. Any non-identity step value must
8323 // be accounted for my manual expansion.
8324 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
8325 SelectionDAG &DAG) const {
8327 MVT VT = Op.getSimpleValueType();
8328 assert(VT.isScalableVector() && "Expected scalable vector");
8329 MVT XLenVT = Subtarget.getXLenVT();
8330 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
8331 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
8332 uint64_t StepValImm = Op.getConstantOperandVal(0);
8333 if (StepValImm != 1) {
8334 if (isPowerOf2_64(StepValImm)) {
8336 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8337 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
8338 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
8340 SDValue StepVal = lowerScalarSplat(
8341 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
8342 VL, VT, DL, DAG, Subtarget);
8343 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
8349 // Implement vector_reverse using vrgather.vv with indices determined by
8350 // subtracting the id of each element from (VLMAX-1). This will convert
8351 // the indices like so:
8352 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
8353 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
8354 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
8355 SelectionDAG &DAG) const {
8357 MVT VecVT = Op.getSimpleValueType();
8358 if (VecVT.getVectorElementType() == MVT::i1) {
8359 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8360 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
8361 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
8362 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
8364 unsigned EltSize = VecVT.getScalarSizeInBits();
8365 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
8366 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
8368 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
8370 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
8371 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
8373 // If this is SEW=8 and VLMAX is potentially more than 256, we need
8374 // to use vrgatherei16.vv.
8375 // TODO: It's also possible to use vrgatherei16.vv for other types to
8376 // decrease register width for the index calculation.
8377 if (MaxVLMAX > 256 && EltSize == 8) {
8378 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
8379 // Reverse each half, then reassemble them in reverse order.
8380 // NOTE: It's also possible that after splitting that VLMAX no longer
8381 // requires vrgatherei16.vv.
8382 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
8383 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
8384 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
8385 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
8386 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
8387 // Reassemble the low and high pieces reversed.
8388 // FIXME: This is a CONCAT_VECTORS.
8390 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
8391 DAG.getIntPtrConstant(0, DL));
8393 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
8394 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
8397 // Just promote the int type to i16 which will double the LMUL.
8398 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
8399 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
8402 MVT XLenVT = Subtarget.getXLenVT();
8403 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
8405 // Calculate VLMAX-1 for the desired SEW.
8406 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
8407 computeVLMax(VecVT, DL, DAG),
8408 DAG.getConstant(1, DL, XLenVT));
8410 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
8412 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
8415 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
8417 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
8418 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
8420 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
8421 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
8422 DAG.getUNDEF(IntVT), Mask, VL);
8424 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
8425 DAG.getUNDEF(VecVT), Mask, VL);
8428 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
8429 SelectionDAG &DAG) const {
8431 SDValue V1 = Op.getOperand(0);
8432 SDValue V2 = Op.getOperand(1);
8433 MVT XLenVT = Subtarget.getXLenVT();
8434 MVT VecVT = Op.getSimpleValueType();
8436 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
8438 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
8439 SDValue DownOffset, UpOffset;
8440 if (ImmValue >= 0) {
8441 // The operand is a TargetConstant, we need to rebuild it as a regular
8443 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
8444 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
8446 // The operand is a TargetConstant, we need to rebuild it as a regular
8447 // constant rather than negating the original operand.
8448 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
8449 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
8452 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
8455 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
8456 DownOffset, TrueMask, UpOffset);
8457 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
8458 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
8459 RISCVII::TAIL_AGNOSTIC);
8463 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
8464 SelectionDAG &DAG) const {
8466 auto *Load = cast<LoadSDNode>(Op);
8468 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
8469 Load->getMemoryVT(),
8470 *Load->getMemOperand()) &&
8471 "Expecting a correctly-aligned load");
8473 MVT VT = Op.getSimpleValueType();
8474 MVT XLenVT = Subtarget.getXLenVT();
8475 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8477 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
8479 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
8480 SDValue IntID = DAG.getTargetConstant(
8481 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
8482 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
8484 Ops.push_back(DAG.getUNDEF(ContainerVT));
8485 Ops.push_back(Load->getBasePtr());
8487 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
8489 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
8490 Load->getMemoryVT(), Load->getMemOperand());
8492 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
8493 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
8497 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
8498 SelectionDAG &DAG) const {
8500 auto *Store = cast<StoreSDNode>(Op);
8502 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
8503 Store->getMemoryVT(),
8504 *Store->getMemOperand()) &&
8505 "Expecting a correctly-aligned store");
8507 SDValue StoreVal = Store->getValue();
8508 MVT VT = StoreVal.getSimpleValueType();
8509 MVT XLenVT = Subtarget.getXLenVT();
8511 // If the size less than a byte, we need to pad with zeros to make a byte.
8512 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
8514 StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
8515 DAG.getConstant(0, DL, VT), StoreVal,
8516 DAG.getIntPtrConstant(0, DL));
8519 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8521 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
8524 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
8526 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
8527 SDValue IntID = DAG.getTargetConstant(
8528 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
8529 return DAG.getMemIntrinsicNode(
8530 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
8531 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
8532 Store->getMemoryVT(), Store->getMemOperand());
8535 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
8536 SelectionDAG &DAG) const {
8538 MVT VT = Op.getSimpleValueType();
8540 const auto *MemSD = cast<MemSDNode>(Op);
8541 EVT MemVT = MemSD->getMemoryVT();
8542 MachineMemOperand *MMO = MemSD->getMemOperand();
8543 SDValue Chain = MemSD->getChain();
8544 SDValue BasePtr = MemSD->getBasePtr();
8546 SDValue Mask, PassThru, VL;
8547 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
8548 Mask = VPLoad->getMask();
8549 PassThru = DAG.getUNDEF(VT);
8550 VL = VPLoad->getVectorLength();
8552 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
8553 Mask = MLoad->getMask();
8554 PassThru = MLoad->getPassThru();
8557 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8559 MVT XLenVT = Subtarget.getXLenVT();
8561 MVT ContainerVT = VT;
8562 if (VT.isFixedLengthVector()) {
8563 ContainerVT = getContainerForFixedLengthVector(VT);
8564 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
8566 MVT MaskVT = getMaskTypeFor(ContainerVT);
8567 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8572 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8575 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
8576 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
8578 Ops.push_back(DAG.getUNDEF(ContainerVT));
8580 Ops.push_back(PassThru);
8581 Ops.push_back(BasePtr);
8583 Ops.push_back(Mask);
8586 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
8588 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
8591 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
8592 Chain = Result.getValue(1);
8594 if (VT.isFixedLengthVector())
8595 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8597 return DAG.getMergeValues({Result, Chain}, DL);
8600 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
8601 SelectionDAG &DAG) const {
8604 const auto *MemSD = cast<MemSDNode>(Op);
8605 EVT MemVT = MemSD->getMemoryVT();
8606 MachineMemOperand *MMO = MemSD->getMemOperand();
8607 SDValue Chain = MemSD->getChain();
8608 SDValue BasePtr = MemSD->getBasePtr();
8609 SDValue Val, Mask, VL;
8611 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
8612 Val = VPStore->getValue();
8613 Mask = VPStore->getMask();
8614 VL = VPStore->getVectorLength();
8616 const auto *MStore = cast<MaskedStoreSDNode>(Op);
8617 Val = MStore->getValue();
8618 Mask = MStore->getMask();
8621 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8623 MVT VT = Val.getSimpleValueType();
8624 MVT XLenVT = Subtarget.getXLenVT();
8626 MVT ContainerVT = VT;
8627 if (VT.isFixedLengthVector()) {
8628 ContainerVT = getContainerForFixedLengthVector(VT);
8630 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
8632 MVT MaskVT = getMaskTypeFor(ContainerVT);
8633 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8638 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8641 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
8642 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
8644 Ops.push_back(BasePtr);
8646 Ops.push_back(Mask);
8649 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
8650 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
8654 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
8655 SelectionDAG &DAG) const {
8656 MVT InVT = Op.getOperand(0).getSimpleValueType();
8657 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
8659 MVT VT = Op.getSimpleValueType();
8662 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8664 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
8667 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
8669 MVT MaskVT = getMaskTypeFor(ContainerVT);
8672 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
8673 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
8675 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
8678 SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
8679 SelectionDAG &DAG) const {
8680 unsigned Opc = Op.getOpcode();
8682 SDValue Chain = Op.getOperand(0);
8683 SDValue Op1 = Op.getOperand(1);
8684 SDValue Op2 = Op.getOperand(2);
8685 SDValue CC = Op.getOperand(3);
8686 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
8687 MVT VT = Op.getSimpleValueType();
8688 MVT InVT = Op1.getSimpleValueType();
8690 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
8692 if (Opc == ISD::STRICT_FSETCCS) {
8693 // Expand strict_fsetccs(x, oeq) to
8694 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
8695 SDVTList VTList = Op->getVTList();
8696 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
8697 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
8698 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
8700 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
8702 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
8703 Tmp1.getValue(1), Tmp2.getValue(1));
8704 // Tmp1 and Tmp2 might be the same node.
8706 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
8707 return DAG.getMergeValues({Tmp1, OutChain}, DL);
8710 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
8711 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
8712 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
8713 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
8715 SDValue Res = DAG.getNOT(DL, OEQ, VT);
8716 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
8720 MVT ContainerInVT = InVT;
8721 if (InVT.isFixedLengthVector()) {
8722 ContainerInVT = getContainerForFixedLengthVector(InVT);
8723 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
8724 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
8726 MVT MaskVT = getMaskTypeFor(ContainerInVT);
8728 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
8731 if (Opc == ISD::STRICT_FSETCC &&
8732 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
8733 CCVal == ISD::SETOLE)) {
8734 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
8735 // active when both input elements are ordered.
8736 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
8737 SDValue OrderMask1 = DAG.getNode(
8738 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
8739 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
8741 SDValue OrderMask2 = DAG.getNode(
8742 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
8743 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
8746 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
8747 // Use Mask as the merge operand to let the result be 0 if either of the
8748 // inputs is unordered.
8749 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
8750 DAG.getVTList(MaskVT, MVT::Other),
8751 {Chain, Op1, Op2, CC, Mask, Mask, VL});
8753 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
8754 : RISCVISD::STRICT_FSETCCS_VL;
8755 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
8756 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
8759 if (VT.isFixedLengthVector()) {
8760 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8761 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8766 // Lower vector ABS to smax(X, sub(0, X)).
8767 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
8769 MVT VT = Op.getSimpleValueType();
8770 SDValue X = Op.getOperand(0);
8772 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
8773 "Unexpected type for ISD::ABS");
8775 MVT ContainerVT = VT;
8776 if (VT.isFixedLengthVector()) {
8777 ContainerVT = getContainerForFixedLengthVector(VT);
8778 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
8782 if (Op->getOpcode() == ISD::VP_ABS) {
8783 Mask = Op->getOperand(1);
8784 if (VT.isFixedLengthVector())
8785 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
8787 VL = Op->getOperand(2);
8789 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8791 SDValue SplatZero = DAG.getNode(
8792 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
8793 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
8794 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
8795 DAG.getUNDEF(ContainerVT), Mask, VL);
8796 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
8797 DAG.getUNDEF(ContainerVT), Mask, VL);
8799 if (VT.isFixedLengthVector())
8800 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
8804 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
8805 SDValue Op, SelectionDAG &DAG) const {
8807 MVT VT = Op.getSimpleValueType();
8808 SDValue Mag = Op.getOperand(0);
8809 SDValue Sign = Op.getOperand(1);
8810 assert(Mag.getValueType() == Sign.getValueType() &&
8811 "Can only handle COPYSIGN with matching types.");
8813 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8814 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
8815 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
8817 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8819 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
8820 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
8822 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
8825 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
8826 SDValue Op, SelectionDAG &DAG) const {
8827 MVT VT = Op.getSimpleValueType();
8828 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8831 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8834 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
8836 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
8838 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
8841 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8844 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
8846 return convertFromScalableVector(VT, Select, DAG, Subtarget);
8849 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
8850 SelectionDAG &DAG) const {
8851 unsigned NewOpc = getRISCVVLOp(Op);
8852 bool HasMergeOp = hasMergeOp(NewOpc);
8853 bool HasMask = hasMaskOp(NewOpc);
8855 MVT VT = Op.getSimpleValueType();
8856 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8858 // Create list of operands by converting existing ones to scalable types.
8859 SmallVector<SDValue, 6> Ops;
8860 for (const SDValue &V : Op->op_values()) {
8861 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
8863 // Pass through non-vector operands.
8864 if (!V.getValueType().isVector()) {
8869 // "cast" fixed length vector to a scalable vector.
8870 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
8871 "Only fixed length vectors are supported!");
8872 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
8876 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8878 Ops.push_back(DAG.getUNDEF(ContainerVT));
8880 Ops.push_back(Mask);
8883 // StrictFP operations have two result values. Their lowered result should
8884 // have same result count.
8885 if (Op->isStrictFPOpcode()) {
8886 SDValue ScalableRes =
8887 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
8889 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
8890 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
8893 SDValue ScalableRes =
8894 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
8895 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
8898 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
8899 // * Operands of each node are assumed to be in the same order.
8900 // * The EVL operand is promoted from i32 to i64 on RV64.
8901 // * Fixed-length vectors are converted to their scalable-vector container
8903 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
8904 unsigned RISCVISDOpc,
8905 bool HasMergeOp) const {
8907 MVT VT = Op.getSimpleValueType();
8908 SmallVector<SDValue, 4> Ops;
8910 MVT ContainerVT = VT;
8911 if (VT.isFixedLengthVector())
8912 ContainerVT = getContainerForFixedLengthVector(VT);
8914 for (const auto &OpIdx : enumerate(Op->ops())) {
8915 SDValue V = OpIdx.value();
8916 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
8917 // Add dummy merge value before the mask.
8918 if (HasMergeOp && *ISD::getVPMaskIdx(Op.getOpcode()) == OpIdx.index())
8919 Ops.push_back(DAG.getUNDEF(ContainerVT));
8920 // Pass through operands which aren't fixed-length vectors.
8921 if (!V.getValueType().isFixedLengthVector()) {
8925 // "cast" fixed length vector to a scalable vector.
8926 MVT OpVT = V.getSimpleValueType();
8927 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
8928 assert(useRVVForFixedLengthVectorVT(OpVT) &&
8929 "Only fixed length vectors are supported!");
8930 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
8933 if (!VT.isFixedLengthVector())
8934 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
8936 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
8938 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
8941 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
8942 SelectionDAG &DAG) const {
8944 MVT VT = Op.getSimpleValueType();
8946 SDValue Src = Op.getOperand(0);
8947 // NOTE: Mask is dropped.
8948 SDValue VL = Op.getOperand(2);
8950 MVT ContainerVT = VT;
8951 if (VT.isFixedLengthVector()) {
8952 ContainerVT = getContainerForFixedLengthVector(VT);
8953 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8954 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
8957 MVT XLenVT = Subtarget.getXLenVT();
8958 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8959 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8960 DAG.getUNDEF(ContainerVT), Zero, VL);
8962 SDValue SplatValue = DAG.getConstant(
8963 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
8964 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8965 DAG.getUNDEF(ContainerVT), SplatValue, VL);
8967 SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src,
8968 Splat, ZeroSplat, VL);
8969 if (!VT.isFixedLengthVector())
8971 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8974 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
8975 SelectionDAG &DAG) const {
8977 MVT VT = Op.getSimpleValueType();
8979 SDValue Op1 = Op.getOperand(0);
8980 SDValue Op2 = Op.getOperand(1);
8981 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8982 // NOTE: Mask is dropped.
8983 SDValue VL = Op.getOperand(4);
8985 MVT ContainerVT = VT;
8986 if (VT.isFixedLengthVector()) {
8987 ContainerVT = getContainerForFixedLengthVector(VT);
8988 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
8989 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
8993 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
8995 switch (Condition) {
9000 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
9002 // X == Y --> ~(X^Y)
9005 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
9007 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
9010 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
9011 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
9015 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
9016 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
9019 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
9020 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
9024 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
9025 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
9028 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
9029 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
9033 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
9034 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
9037 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
9038 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
9042 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
9043 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
9048 if (!VT.isFixedLengthVector())
9050 return convertFromScalableVector(VT, Result, DAG, Subtarget);
9053 // Lower Floating-Point/Integer Type-Convert VP SDNodes
9054 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
9055 unsigned RISCVISDOpc) const {
9058 SDValue Src = Op.getOperand(0);
9059 SDValue Mask = Op.getOperand(1);
9060 SDValue VL = Op.getOperand(2);
9062 MVT DstVT = Op.getSimpleValueType();
9063 MVT SrcVT = Src.getSimpleValueType();
9064 if (DstVT.isFixedLengthVector()) {
9065 DstVT = getContainerForFixedLengthVector(DstVT);
9066 SrcVT = getContainerForFixedLengthVector(SrcVT);
9067 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
9068 MVT MaskVT = getMaskTypeFor(DstVT);
9069 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9072 unsigned DstEltSize = DstVT.getScalarSizeInBits();
9073 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
9076 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
9077 if (SrcVT.isInteger()) {
9078 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
9080 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
9081 ? RISCVISD::VSEXT_VL
9082 : RISCVISD::VZEXT_VL;
9084 // Do we need to do any pre-widening before converting?
9085 if (SrcEltSize == 1) {
9086 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
9087 MVT XLenVT = Subtarget.getXLenVT();
9088 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9089 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
9090 DAG.getUNDEF(IntVT), Zero, VL);
9091 SDValue One = DAG.getConstant(
9092 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
9093 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
9094 DAG.getUNDEF(IntVT), One, VL);
9095 Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat,
9097 } else if (DstEltSize > (2 * SrcEltSize)) {
9098 // Widen before converting.
9099 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
9100 DstVT.getVectorElementCount());
9101 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
9104 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
9106 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
9107 "Wrong input/output vector types");
9109 // Convert f16 to f32 then convert f32 to i64.
9110 if (DstEltSize > (2 * SrcEltSize)) {
9111 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
9113 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
9115 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
9118 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
9120 } else { // Narrowing + Conversion
9121 if (SrcVT.isInteger()) {
9122 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
9123 // First do a narrowing convert to an FP type half the size, then round
9124 // the FP type to a small FP type if needed.
9126 MVT InterimFVT = DstVT;
9127 if (SrcEltSize > (2 * DstEltSize)) {
9128 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
9129 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
9130 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
9133 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
9135 if (InterimFVT != DstVT) {
9137 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
9140 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
9141 "Wrong input/output vector types");
9142 // First do a narrowing conversion to an integer half the size, then
9143 // truncate if needed.
9145 if (DstEltSize == 1) {
9146 // First convert to the same size integer, then convert to mask using
9148 assert(SrcEltSize >= 16 && "Unexpected FP type!");
9149 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
9150 DstVT.getVectorElementCount());
9151 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
9153 // Compare the integer result to 0. The integer should be 0 or 1/-1,
9154 // otherwise the conversion was undefined.
9155 MVT XLenVT = Subtarget.getXLenVT();
9156 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9157 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
9158 DAG.getUNDEF(InterimIVT), SplatZero, VL);
9159 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
9160 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
9161 DAG.getUNDEF(DstVT), Mask, VL});
9163 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
9164 DstVT.getVectorElementCount());
9166 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
9168 while (InterimIVT != DstVT) {
9171 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
9172 DstVT.getVectorElementCount());
9173 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
9180 MVT VT = Op.getSimpleValueType();
9181 if (!VT.isFixedLengthVector())
9183 return convertFromScalableVector(VT, Result, DAG, Subtarget);
9186 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
9188 unsigned VecOpc) const {
9189 MVT VT = Op.getSimpleValueType();
9190 if (VT.getVectorElementType() != MVT::i1)
9191 return lowerVPOp(Op, DAG, VecOpc, true);
9193 // It is safe to drop mask parameter as masked-off elements are undef.
9194 SDValue Op1 = Op->getOperand(0);
9195 SDValue Op2 = Op->getOperand(1);
9196 SDValue VL = Op->getOperand(3);
9198 MVT ContainerVT = VT;
9199 const bool IsFixed = VT.isFixedLengthVector();
9201 ContainerVT = getContainerForFixedLengthVector(VT);
9202 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
9203 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
9207 SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL);
9210 return convertFromScalableVector(VT, Val, DAG, Subtarget);
9213 SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
9214 SelectionDAG &DAG) const {
9216 MVT XLenVT = Subtarget.getXLenVT();
9217 MVT VT = Op.getSimpleValueType();
9218 MVT ContainerVT = VT;
9219 if (VT.isFixedLengthVector())
9220 ContainerVT = getContainerForFixedLengthVector(VT);
9222 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9224 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
9225 // Check if the mask is known to be all ones
9226 SDValue Mask = VPNode->getMask();
9227 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9229 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
9230 : Intrinsic::riscv_vlse_mask,
9232 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
9233 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
9234 VPNode->getStride()};
9236 if (VT.isFixedLengthVector()) {
9237 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
9238 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9240 Ops.push_back(Mask);
9242 Ops.push_back(VPNode->getVectorLength());
9244 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9245 Ops.push_back(Policy);
9249 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
9250 VPNode->getMemoryVT(), VPNode->getMemOperand());
9251 SDValue Chain = Result.getValue(1);
9253 if (VT.isFixedLengthVector())
9254 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9256 return DAG.getMergeValues({Result, Chain}, DL);
9259 SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
9260 SelectionDAG &DAG) const {
9262 MVT XLenVT = Subtarget.getXLenVT();
9264 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
9265 SDValue StoreVal = VPNode->getValue();
9266 MVT VT = StoreVal.getSimpleValueType();
9267 MVT ContainerVT = VT;
9268 if (VT.isFixedLengthVector()) {
9269 ContainerVT = getContainerForFixedLengthVector(VT);
9270 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
9273 // Check if the mask is known to be all ones
9274 SDValue Mask = VPNode->getMask();
9275 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9277 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
9278 : Intrinsic::riscv_vsse_mask,
9280 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
9281 VPNode->getBasePtr(), VPNode->getStride()};
9283 if (VT.isFixedLengthVector()) {
9284 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
9285 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9287 Ops.push_back(Mask);
9289 Ops.push_back(VPNode->getVectorLength());
9291 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
9292 Ops, VPNode->getMemoryVT(),
9293 VPNode->getMemOperand());
9296 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
9297 // matched to a RVV indexed load. The RVV indexed load instructions only
9298 // support the "unsigned unscaled" addressing mode; indices are implicitly
9299 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
9300 // signed or scaled indexing is extended to the XLEN value type and scaled
9302 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
9303 SelectionDAG &DAG) const {
9305 MVT VT = Op.getSimpleValueType();
9307 const auto *MemSD = cast<MemSDNode>(Op.getNode());
9308 EVT MemVT = MemSD->getMemoryVT();
9309 MachineMemOperand *MMO = MemSD->getMemOperand();
9310 SDValue Chain = MemSD->getChain();
9311 SDValue BasePtr = MemSD->getBasePtr();
9313 ISD::LoadExtType LoadExtType;
9314 SDValue Index, Mask, PassThru, VL;
9316 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
9317 Index = VPGN->getIndex();
9318 Mask = VPGN->getMask();
9319 PassThru = DAG.getUNDEF(VT);
9320 VL = VPGN->getVectorLength();
9321 // VP doesn't support extending loads.
9322 LoadExtType = ISD::NON_EXTLOAD;
9324 // Else it must be a MGATHER.
9325 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
9326 Index = MGN->getIndex();
9327 Mask = MGN->getMask();
9328 PassThru = MGN->getPassThru();
9329 LoadExtType = MGN->getExtensionType();
9332 MVT IndexVT = Index.getSimpleValueType();
9333 MVT XLenVT = Subtarget.getXLenVT();
9335 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
9337 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
9338 // Targets have to explicitly opt-in for extending vector loads.
9339 assert(LoadExtType == ISD::NON_EXTLOAD &&
9340 "Unexpected extending MGATHER/VP_GATHER");
9343 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9344 // the selection of the masked intrinsics doesn't do this for us.
9345 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9347 MVT ContainerVT = VT;
9348 if (VT.isFixedLengthVector()) {
9349 ContainerVT = getContainerForFixedLengthVector(VT);
9350 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
9351 ContainerVT.getVectorElementCount());
9353 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
9356 MVT MaskVT = getMaskTypeFor(ContainerVT);
9357 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9358 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9363 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9365 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
9366 IndexVT = IndexVT.changeVectorElementType(XLenVT);
9367 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
9369 Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
9374 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
9375 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
9377 Ops.push_back(DAG.getUNDEF(ContainerVT));
9379 Ops.push_back(PassThru);
9380 Ops.push_back(BasePtr);
9381 Ops.push_back(Index);
9383 Ops.push_back(Mask);
9386 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
9388 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9390 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
9391 Chain = Result.getValue(1);
9393 if (VT.isFixedLengthVector())
9394 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9396 return DAG.getMergeValues({Result, Chain}, DL);
9399 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
9400 // matched to a RVV indexed store. The RVV indexed store instructions only
9401 // support the "unsigned unscaled" addressing mode; indices are implicitly
9402 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
9403 // signed or scaled indexing is extended to the XLEN value type and scaled
9405 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
9406 SelectionDAG &DAG) const {
9408 const auto *MemSD = cast<MemSDNode>(Op.getNode());
9409 EVT MemVT = MemSD->getMemoryVT();
9410 MachineMemOperand *MMO = MemSD->getMemOperand();
9411 SDValue Chain = MemSD->getChain();
9412 SDValue BasePtr = MemSD->getBasePtr();
9414 bool IsTruncatingStore = false;
9415 SDValue Index, Mask, Val, VL;
9417 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
9418 Index = VPSN->getIndex();
9419 Mask = VPSN->getMask();
9420 Val = VPSN->getValue();
9421 VL = VPSN->getVectorLength();
9422 // VP doesn't support truncating stores.
9423 IsTruncatingStore = false;
9425 // Else it must be a MSCATTER.
9426 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
9427 Index = MSN->getIndex();
9428 Mask = MSN->getMask();
9429 Val = MSN->getValue();
9430 IsTruncatingStore = MSN->isTruncatingStore();
9433 MVT VT = Val.getSimpleValueType();
9434 MVT IndexVT = Index.getSimpleValueType();
9435 MVT XLenVT = Subtarget.getXLenVT();
9437 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
9439 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
9440 // Targets have to explicitly opt-in for extending vector loads and
9441 // truncating vector stores.
9442 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
9443 (void)IsTruncatingStore;
9445 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9446 // the selection of the masked intrinsics doesn't do this for us.
9447 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9449 MVT ContainerVT = VT;
9450 if (VT.isFixedLengthVector()) {
9451 ContainerVT = getContainerForFixedLengthVector(VT);
9452 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
9453 ContainerVT.getVectorElementCount());
9455 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
9456 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9459 MVT MaskVT = getMaskTypeFor(ContainerVT);
9460 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9465 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9467 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
9468 IndexVT = IndexVT.changeVectorElementType(XLenVT);
9469 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
9471 Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
9476 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
9477 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
9479 Ops.push_back(BasePtr);
9480 Ops.push_back(Index);
9482 Ops.push_back(Mask);
9485 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
9486 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
9489 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
9490 SelectionDAG &DAG) const {
9491 const MVT XLenVT = Subtarget.getXLenVT();
9493 SDValue Chain = Op->getOperand(0);
9494 SDValue SysRegNo = DAG.getTargetConstant(
9495 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
9496 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
9497 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
9499 // Encoding used for rounding mode in RISC-V differs from that used in
9500 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
9501 // table, which consists of a sequence of 4-bit fields, each representing
9502 // corresponding FLT_ROUNDS mode.
9503 static const int Table =
9504 (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
9505 (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
9506 (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
9507 (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
9508 (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
9511 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
9512 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
9513 DAG.getConstant(Table, DL, XLenVT), Shift);
9514 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
9515 DAG.getConstant(7, DL, XLenVT));
9517 return DAG.getMergeValues({Masked, Chain}, DL);
9520 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
9521 SelectionDAG &DAG) const {
9522 const MVT XLenVT = Subtarget.getXLenVT();
9524 SDValue Chain = Op->getOperand(0);
9525 SDValue RMValue = Op->getOperand(1);
9526 SDValue SysRegNo = DAG.getTargetConstant(
9527 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
9529 // Encoding used for rounding mode in RISC-V differs from that used in
9530 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
9531 // a table, which consists of a sequence of 4-bit fields, each representing
9532 // corresponding RISC-V mode.
9533 static const unsigned Table =
9534 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
9535 (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
9536 (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
9537 (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
9538 (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
9540 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
9541 DAG.getConstant(2, DL, XLenVT));
9542 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
9543 DAG.getConstant(Table, DL, XLenVT), Shift);
9544 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
9545 DAG.getConstant(0x7, DL, XLenVT));
9546 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
9550 SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
9551 SelectionDAG &DAG) const {
9552 MachineFunction &MF = DAG.getMachineFunction();
9554 bool isRISCV64 = Subtarget.is64Bit();
9555 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9557 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
9558 return DAG.getFrameIndex(FI, PtrVT);
9561 // Returns the opcode of the target-specific SDNode that implements the 32-bit
9562 // form of the given Opcode.
9563 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
9566 llvm_unreachable("Unexpected opcode");
9568 return RISCVISD::SLLW;
9570 return RISCVISD::SRAW;
9572 return RISCVISD::SRLW;
9574 return RISCVISD::DIVW;
9576 return RISCVISD::DIVUW;
9578 return RISCVISD::REMUW;
9580 return RISCVISD::ROLW;
9582 return RISCVISD::RORW;
9586 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
9587 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
9588 // otherwise be promoted to i64, making it difficult to select the
9589 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
9590 // type i8/i16/i32 is lost.
9591 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
9592 unsigned ExtOpc = ISD::ANY_EXTEND) {
9594 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
9595 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
9596 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
9597 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
9598 // ReplaceNodeResults requires we maintain the same type for the return value.
9599 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
9602 // Converts the given 32-bit operation to a i64 operation with signed extension
9603 // semantic to reduce the signed extension instructions.
9604 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
9606 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9607 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
9608 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
9609 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
9610 DAG.getValueType(MVT::i32));
9611 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
9614 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
9615 SmallVectorImpl<SDValue> &Results,
9616 SelectionDAG &DAG) const {
9618 switch (N->getOpcode()) {
9620 llvm_unreachable("Don't know how to custom type legalize this operation!");
9621 case ISD::STRICT_FP_TO_SINT:
9622 case ISD::STRICT_FP_TO_UINT:
9623 case ISD::FP_TO_SINT:
9624 case ISD::FP_TO_UINT: {
9625 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9626 "Unexpected custom legalisation");
9627 bool IsStrict = N->isStrictFPOpcode();
9628 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
9629 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
9630 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
9631 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
9632 TargetLowering::TypeSoftenFloat) {
9633 if (!isTypeLegal(Op0.getValueType()))
9636 SDValue Chain = N->getOperand(0);
9637 // In absense of Zfh, promote f16 to f32, then convert.
9638 if (Op0.getValueType() == MVT::f16 &&
9639 !Subtarget.hasStdExtZfhOrZhinx()) {
9640 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
9642 Chain = Op0.getValue(1);
9644 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
9645 : RISCVISD::STRICT_FCVT_WU_RV64;
9646 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
9647 SDValue Res = DAG.getNode(
9648 Opc, DL, VTs, Chain, Op0,
9649 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
9650 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9651 Results.push_back(Res.getValue(1));
9654 // In absense of Zfh, promote f16 to f32, then convert.
9655 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
9656 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
9658 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
9660 DAG.getNode(Opc, DL, MVT::i64, Op0,
9661 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
9662 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9665 // If the FP type needs to be softened, emit a library call using the 'si'
9666 // version. If we left it to default legalization we'd end up with 'di'. If
9667 // the FP type doesn't need to be softened just let generic type
9668 // legalization promote the result type.
9671 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
9673 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
9674 MakeLibCallOptions CallOptions;
9675 EVT OpVT = Op0.getValueType();
9676 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
9677 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
9679 std::tie(Result, Chain) =
9680 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
9681 Results.push_back(Result);
9683 Results.push_back(Chain);
9687 SDValue Op0 = N->getOperand(0);
9688 EVT Op0VT = Op0.getValueType();
9689 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
9690 TargetLowering::TypeSoftenFloat) {
9691 if (!isTypeLegal(Op0VT))
9694 // In absense of Zfh, promote f16 to f32, then convert.
9695 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
9696 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
9699 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
9700 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
9701 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9704 // If the FP type needs to be softened, emit a library call to lround. We'll
9705 // need to truncate the result. We assume any value that doesn't fit in i32
9706 // is allowed to return an unspecified value.
9708 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
9709 MakeLibCallOptions CallOptions;
9710 EVT OpVT = Op0.getValueType();
9711 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
9712 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
9713 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
9714 Results.push_back(Result);
9717 case ISD::READCYCLECOUNTER: {
9718 assert(!Subtarget.is64Bit() &&
9719 "READCYCLECOUNTER only has custom type legalization on riscv32");
9721 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
9723 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
9726 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
9727 Results.push_back(RCW.getValue(2));
9731 if (!ISD::isNON_EXTLoad(N))
9734 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
9735 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
9736 LoadSDNode *Ld = cast<LoadSDNode>(N);
9739 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
9740 Ld->getBasePtr(), Ld->getMemoryVT(),
9741 Ld->getMemOperand());
9742 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
9743 Results.push_back(Res.getValue(1));
9747 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
9748 unsigned XLen = Subtarget.getXLen();
9749 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
9751 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
9752 SDValue LHS = N->getOperand(0);
9753 SDValue RHS = N->getOperand(1);
9754 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
9756 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
9757 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
9758 // We need exactly one side to be unsigned.
9759 if (LHSIsU == RHSIsU)
9762 auto MakeMULPair = [&](SDValue S, SDValue U) {
9763 MVT XLenVT = Subtarget.getXLenVT();
9764 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
9765 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
9766 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
9767 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
9768 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
9771 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
9772 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
9774 // The other operand should be signed, but still prefer MULH when
9776 if (RHSIsU && LHSIsS && !RHSIsS)
9777 Results.push_back(MakeMULPair(LHS, RHS));
9778 else if (LHSIsU && RHSIsS && !LHSIsS)
9779 Results.push_back(MakeMULPair(RHS, LHS));
9787 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9788 "Unexpected custom legalisation");
9789 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
9794 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9795 "Unexpected custom legalisation");
9796 if (N->getOperand(1).getOpcode() != ISD::Constant) {
9797 // If we can use a BSET instruction, allow default promotion to apply.
9798 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
9799 isOneConstant(N->getOperand(0)))
9801 Results.push_back(customLegalizeToWOp(N, DAG));
9805 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
9806 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
9808 if (N->getOpcode() == ISD::SHL) {
9811 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9813 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
9814 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
9815 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
9816 DAG.getValueType(MVT::i32));
9817 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
9823 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9824 "Unexpected custom legalisation");
9825 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
9826 Subtarget.hasVendorXTHeadBb()) &&
9827 "Unexpected custom legalization");
9828 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
9829 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
9831 Results.push_back(customLegalizeToWOp(N, DAG));
9834 case ISD::CTTZ_ZERO_UNDEF:
9836 case ISD::CTLZ_ZERO_UNDEF: {
9837 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9838 "Unexpected custom legalisation");
9841 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9843 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
9844 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
9845 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
9846 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9852 MVT VT = N->getSimpleValueType(0);
9853 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
9854 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
9855 "Unexpected custom legalisation");
9856 // Don't promote division/remainder by constant since we should expand those
9857 // to multiply by magic constant.
9858 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
9859 if (N->getOperand(1).getOpcode() == ISD::Constant &&
9860 !isIntDivCheap(N->getValueType(0), Attr))
9863 // If the input is i32, use ANY_EXTEND since the W instructions don't read
9864 // the upper 32 bits. For other types we need to sign or zero extend
9865 // based on the opcode.
9866 unsigned ExtOpc = ISD::ANY_EXTEND;
9868 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
9871 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
9875 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9876 "Unexpected custom legalisation");
9878 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
9879 // use the default legalization.
9880 if (!isa<ConstantSDNode>(N->getOperand(1)))
9883 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
9884 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
9885 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
9886 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
9887 DAG.getValueType(MVT::i32));
9889 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
9891 // For an addition, the result should be less than one of the operands (LHS)
9892 // if and only if the other operand (RHS) is negative, otherwise there will
9894 // For a subtraction, the result should be less than one of the operands
9895 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9896 // otherwise there will be overflow.
9897 EVT OType = N->getValueType(1);
9898 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
9899 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
9902 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
9903 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9904 Results.push_back(Overflow);
9909 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9910 "Unexpected custom legalisation");
9911 bool IsAdd = N->getOpcode() == ISD::UADDO;
9912 // Create an ADDW or SUBW.
9913 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9914 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
9916 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
9917 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
9918 DAG.getValueType(MVT::i32));
9921 if (IsAdd && isOneConstant(RHS)) {
9922 // Special case uaddo X, 1 overflowed if the addition result is 0.
9923 // The general case (X + C) < C is not necessarily beneficial. Although we
9924 // reduce the live range of X, we may introduce the materialization of
9925 // constant C, especially when the setcc result is used by branch. We have
9926 // no compare with constant and branch instructions.
9927 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
9928 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
9929 } else if (IsAdd && isAllOnesConstant(RHS)) {
9930 // Special case uaddo X, -1 overflowed if X != 0.
9931 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
9932 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
9934 // Sign extend the LHS and perform an unsigned compare with the ADDW
9935 // result. Since the inputs are sign extended from i32, this is equivalent
9936 // to comparing the lower 32 bits.
9937 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
9938 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
9939 IsAdd ? ISD::SETULT : ISD::SETUGT);
9942 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9943 Results.push_back(Overflow);
9947 case ISD::USUBSAT: {
9948 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9949 "Unexpected custom legalisation");
9950 if (Subtarget.hasStdExtZbb()) {
9951 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
9952 // sign extend allows overflow of the lower 32 bits to be detected on
9953 // the promoted size.
9955 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
9957 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
9958 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
9959 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9963 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
9964 // promotion for UADDO/USUBO.
9965 Results.push_back(expandAddSubSat(N, DAG));
9969 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9970 "Unexpected custom legalisation");
9972 if (Subtarget.hasStdExtZbb()) {
9973 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
9974 // This allows us to remember that the result is sign extended. Expanding
9975 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
9976 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
9978 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
9979 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
9983 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
9984 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9986 // Freeze the source so we can increase it's use count.
9987 Src = DAG.getFreeze(Src);
9989 // Copy sign bit to all bits using the sraiw pattern.
9990 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
9991 DAG.getValueType(MVT::i32));
9992 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
9993 DAG.getConstant(31, DL, MVT::i64));
9995 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
9996 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
9998 // NOTE: The result is only required to be anyextended, but sext is
9999 // consistent with type legalization of sub.
10000 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
10001 DAG.getValueType(MVT::i32));
10002 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
10005 case ISD::BITCAST: {
10006 EVT VT = N->getValueType(0);
10007 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
10008 SDValue Op0 = N->getOperand(0);
10009 EVT Op0VT = Op0.getValueType();
10010 MVT XLenVT = Subtarget.getXLenVT();
10011 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
10012 Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
10013 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
10014 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
10015 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
10016 Subtarget.hasStdExtZfbfmin()) {
10017 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
10018 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
10019 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
10020 Subtarget.hasStdExtFOrZfinx()) {
10022 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
10023 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
10024 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 &&
10025 Subtarget.hasStdExtZfa()) {
10026 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
10027 DAG.getVTList(MVT::i32, MVT::i32), Op0);
10028 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
10029 NewReg.getValue(0), NewReg.getValue(1));
10030 Results.push_back(RetReg);
10031 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
10032 isTypeLegal(Op0VT)) {
10033 // Custom-legalize bitcasts from fixed-length vector types to illegal
10034 // scalar types in order to improve codegen. Bitcast the vector to a
10035 // one-element vector type whose element type is the same as the result
10036 // type, and extract the first element.
10037 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
10038 if (isTypeLegal(BVT)) {
10039 SDValue BVec = DAG.getBitcast(BVT, Op0);
10040 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
10041 DAG.getConstant(0, DL, XLenVT)));
10046 case RISCVISD::BREV8: {
10047 MVT VT = N->getSimpleValueType(0);
10048 MVT XLenVT = Subtarget.getXLenVT();
10049 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
10050 "Unexpected custom legalisation");
10051 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
10052 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
10053 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
10054 // ReplaceNodeResults requires we maintain the same type for the return
10056 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
10059 case ISD::EXTRACT_VECTOR_ELT: {
10060 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
10061 // type is illegal (currently only vXi64 RV32).
10062 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
10063 // transferred to the destination register. We issue two of these from the
10064 // upper- and lower- halves of the SEW-bit vector element, slid down to the
10066 SDValue Vec = N->getOperand(0);
10067 SDValue Idx = N->getOperand(1);
10069 // The vector type hasn't been legalized yet so we can't issue target
10070 // specific nodes if it needs legalization.
10071 // FIXME: We would manually legalize if it's important.
10072 if (!isTypeLegal(Vec.getValueType()))
10075 MVT VecVT = Vec.getSimpleValueType();
10077 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
10078 VecVT.getVectorElementType() == MVT::i64 &&
10079 "Unexpected EXTRACT_VECTOR_ELT legalization");
10081 // If this is a fixed vector, we need to convert it to a scalable vector.
10082 MVT ContainerVT = VecVT;
10083 if (VecVT.isFixedLengthVector()) {
10084 ContainerVT = getContainerForFixedLengthVector(VecVT);
10085 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10088 MVT XLenVT = Subtarget.getXLenVT();
10090 // Use a VL of 1 to avoid processing more elements than we need.
10091 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10093 // Unless the index is known to be 0, we must slide the vector down to get
10094 // the desired element into index 0.
10095 if (!isNullConstant(Idx)) {
10096 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10097 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10100 // Extract the lower XLEN bits of the correct vector element.
10101 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10103 // To extract the upper XLEN bits of the vector element, shift the first
10104 // element right by 32 bits and re-extract the lower XLEN bits.
10105 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10106 DAG.getUNDEF(ContainerVT),
10107 DAG.getConstant(32, DL, XLenVT), VL);
10109 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
10110 DAG.getUNDEF(ContainerVT), Mask, VL);
10112 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
10114 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
10117 case ISD::INTRINSIC_WO_CHAIN: {
10118 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
10122 "Don't know how to custom type legalize this intrinsic!");
10123 case Intrinsic::experimental_get_vector_length: {
10124 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
10125 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10128 case Intrinsic::riscv_orc_b:
10129 case Intrinsic::riscv_brev8:
10130 case Intrinsic::riscv_sha256sig0:
10131 case Intrinsic::riscv_sha256sig1:
10132 case Intrinsic::riscv_sha256sum0:
10133 case Intrinsic::riscv_sha256sum1:
10134 case Intrinsic::riscv_sm3p0:
10135 case Intrinsic::riscv_sm3p1: {
10136 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
10140 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10141 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10142 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10143 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10144 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10145 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10146 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10147 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10151 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10152 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
10153 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10156 case Intrinsic::riscv_sm4ks:
10157 case Intrinsic::riscv_sm4ed: {
10159 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10161 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10163 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
10165 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
10166 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10169 case Intrinsic::riscv_clmul: {
10170 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
10174 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10176 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
10177 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
10178 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10181 case Intrinsic::riscv_clmulh:
10182 case Intrinsic::riscv_clmulr: {
10183 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
10186 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
10187 // to the full 128-bit clmul result of multiplying two xlen values.
10188 // Perform clmulr or clmulh on the shifted values. Finally, extract the
10191 // The alternative is to mask the inputs to 32 bits and use clmul, but
10192 // that requires two shifts to mask each input without zext.w.
10193 // FIXME: If the inputs are known zero extended or could be freely
10194 // zero extended, the mask form would be better.
10196 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10198 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
10199 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
10200 DAG.getConstant(32, DL, MVT::i64));
10201 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
10202 DAG.getConstant(32, DL, MVT::i64));
10203 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
10204 : RISCVISD::CLMULR;
10205 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
10206 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
10207 DAG.getConstant(32, DL, MVT::i64));
10208 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10211 case Intrinsic::riscv_vmv_x_s: {
10212 EVT VT = N->getValueType(0);
10213 MVT XLenVT = Subtarget.getXLenVT();
10214 if (VT.bitsLT(XLenVT)) {
10215 // Simple case just extract using vmv.x.s and truncate.
10216 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
10217 Subtarget.getXLenVT(), N->getOperand(1));
10218 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
10222 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
10223 "Unexpected custom legalization");
10225 // We need to do the move in two steps.
10226 SDValue Vec = N->getOperand(1);
10227 MVT VecVT = Vec.getSimpleValueType();
10229 // First extract the lower XLEN bits of the element.
10230 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10232 // To extract the upper XLEN bits of the vector element, shift the first
10233 // element right by 32 bits and re-extract the lower XLEN bits.
10234 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
10236 SDValue ThirtyTwoV =
10237 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
10238 DAG.getConstant(32, DL, XLenVT), VL);
10239 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
10240 DAG.getUNDEF(VecVT), Mask, VL);
10241 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
10244 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
10250 case ISD::VECREDUCE_ADD:
10251 case ISD::VECREDUCE_AND:
10252 case ISD::VECREDUCE_OR:
10253 case ISD::VECREDUCE_XOR:
10254 case ISD::VECREDUCE_SMAX:
10255 case ISD::VECREDUCE_UMAX:
10256 case ISD::VECREDUCE_SMIN:
10257 case ISD::VECREDUCE_UMIN:
10258 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
10259 Results.push_back(V);
10261 case ISD::VP_REDUCE_ADD:
10262 case ISD::VP_REDUCE_AND:
10263 case ISD::VP_REDUCE_OR:
10264 case ISD::VP_REDUCE_XOR:
10265 case ISD::VP_REDUCE_SMAX:
10266 case ISD::VP_REDUCE_UMAX:
10267 case ISD::VP_REDUCE_SMIN:
10268 case ISD::VP_REDUCE_UMIN:
10269 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
10270 Results.push_back(V);
10272 case ISD::GET_ROUNDING: {
10273 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
10274 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
10275 Results.push_back(Res.getValue(0));
10276 Results.push_back(Res.getValue(1));
10282 // Try to fold (<bop> x, (reduction.<bop> vec, start))
10283 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
10284 const RISCVSubtarget &Subtarget) {
10285 auto BinOpToRVVReduce = [](unsigned Opc) {
10288 llvm_unreachable("Unhandled binary to transfrom reduction");
10290 return RISCVISD::VECREDUCE_ADD_VL;
10292 return RISCVISD::VECREDUCE_UMAX_VL;
10294 return RISCVISD::VECREDUCE_SMAX_VL;
10296 return RISCVISD::VECREDUCE_UMIN_VL;
10298 return RISCVISD::VECREDUCE_SMIN_VL;
10300 return RISCVISD::VECREDUCE_AND_VL;
10302 return RISCVISD::VECREDUCE_OR_VL;
10304 return RISCVISD::VECREDUCE_XOR_VL;
10306 return RISCVISD::VECREDUCE_FADD_VL;
10308 return RISCVISD::VECREDUCE_FMAX_VL;
10310 return RISCVISD::VECREDUCE_FMIN_VL;
10314 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
10315 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10316 isNullConstant(V.getOperand(1)) &&
10317 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
10320 unsigned Opc = N->getOpcode();
10321 unsigned ReduceIdx;
10322 if (IsReduction(N->getOperand(0), Opc))
10324 else if (IsReduction(N->getOperand(1), Opc))
10329 // Skip if FADD disallows reassociation but the combiner needs.
10330 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
10333 SDValue Extract = N->getOperand(ReduceIdx);
10334 SDValue Reduce = Extract.getOperand(0);
10335 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
10338 SDValue ScalarV = Reduce.getOperand(2);
10339 EVT ScalarVT = ScalarV.getValueType();
10340 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
10341 ScalarV.getOperand(0)->isUndef() &&
10342 isNullConstant(ScalarV.getOperand(2)))
10343 ScalarV = ScalarV.getOperand(1);
10345 // Make sure that ScalarV is a splat with VL=1.
10346 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
10347 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
10348 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
10351 if (!isNonZeroAVL(ScalarV.getOperand(2)))
10354 // Check the scalar of ScalarV is neutral element
10355 // TODO: Deal with value other than neutral element.
10356 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
10360 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
10361 // FIXME: We might be able to improve this if operand 0 is undef.
10362 if (!isNonZeroAVL(Reduce.getOperand(5)))
10365 SDValue NewStart = N->getOperand(1 - ReduceIdx);
10368 SDValue NewScalarV =
10369 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
10370 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
10372 // If we looked through an INSERT_SUBVECTOR we need to restore it.
10373 if (ScalarVT != ScalarV.getValueType())
10375 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
10376 NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
10378 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
10379 NewScalarV, Reduce.getOperand(3),
10380 Reduce.getOperand(4), Reduce.getOperand(5)};
10381 SDValue NewReduce =
10382 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
10383 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
10384 Extract.getOperand(1));
10387 // Optimize (add (shl x, c0), (shl y, c1)) ->
10388 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
10389 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
10390 const RISCVSubtarget &Subtarget) {
10391 // Perform this optimization only in the zba extension.
10392 if (!Subtarget.hasStdExtZba())
10395 // Skip for vector types and larger types.
10396 EVT VT = N->getValueType(0);
10397 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
10400 // The two operand nodes must be SHL and have no other use.
10401 SDValue N0 = N->getOperand(0);
10402 SDValue N1 = N->getOperand(1);
10403 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
10404 !N0->hasOneUse() || !N1->hasOneUse())
10407 // Check c0 and c1.
10408 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
10409 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
10412 int64_t C0 = N0C->getSExtValue();
10413 int64_t C1 = N1C->getSExtValue();
10414 if (C0 <= 0 || C1 <= 0)
10417 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
10418 int64_t Bits = std::min(C0, C1);
10419 int64_t Diff = std::abs(C0 - C1);
10420 if (Diff != 1 && Diff != 2 && Diff != 3)
10425 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
10426 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
10428 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
10429 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
10430 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
10433 // Combine a constant select operand into its use:
10435 // (and (select cond, -1, c), x)
10436 // -> (select cond, x, (and x, c)) [AllOnes=1]
10437 // (or (select cond, 0, c), x)
10438 // -> (select cond, x, (or x, c)) [AllOnes=0]
10439 // (xor (select cond, 0, c), x)
10440 // -> (select cond, x, (xor x, c)) [AllOnes=0]
10441 // (add (select cond, 0, c), x)
10442 // -> (select cond, x, (add x, c)) [AllOnes=0]
10443 // (sub x, (select cond, 0, c))
10444 // -> (select cond, x, (sub x, c)) [AllOnes=0]
10445 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
10446 SelectionDAG &DAG, bool AllOnes,
10447 const RISCVSubtarget &Subtarget) {
10448 EVT VT = N->getValueType(0);
10454 if (!Subtarget.hasShortForwardBranchOpt() ||
10455 (Slct.getOpcode() != ISD::SELECT &&
10456 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
10460 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
10461 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
10464 bool SwapSelectOps;
10465 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
10466 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
10467 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
10468 SDValue NonConstantVal;
10469 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
10470 SwapSelectOps = false;
10471 NonConstantVal = FalseVal;
10472 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
10473 SwapSelectOps = true;
10474 NonConstantVal = TrueVal;
10478 // Slct is now know to be the desired identity constant when CC is true.
10480 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
10481 // Unless SwapSelectOps says the condition should be false.
10483 std::swap(TrueVal, FalseVal);
10485 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
10486 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
10487 {Slct.getOperand(0), Slct.getOperand(1),
10488 Slct.getOperand(2), TrueVal, FalseVal});
10490 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
10491 {Slct.getOperand(0), TrueVal, FalseVal});
10494 // Attempt combineSelectAndUse on each operand of a commutative operator N.
10495 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
10497 const RISCVSubtarget &Subtarget) {
10498 SDValue N0 = N->getOperand(0);
10499 SDValue N1 = N->getOperand(1);
10500 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
10502 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
10507 // Transform (add (mul x, c0), c1) ->
10508 // (add (mul (add x, c1/c0), c0), c1%c0).
10509 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
10510 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
10511 // to an infinite loop in DAGCombine if transformed.
10512 // Or transform (add (mul x, c0), c1) ->
10513 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
10514 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
10515 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
10516 // lead to an infinite loop in DAGCombine if transformed.
10517 // Or transform (add (mul x, c0), c1) ->
10518 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
10519 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
10520 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
10521 // lead to an infinite loop in DAGCombine if transformed.
10522 // Or transform (add (mul x, c0), c1) ->
10523 // (mul (add x, c1/c0), c0).
10524 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
10525 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
10526 const RISCVSubtarget &Subtarget) {
10527 // Skip for vector types and larger types.
10528 EVT VT = N->getValueType(0);
10529 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
10531 // The first operand node must be a MUL and has no other use.
10532 SDValue N0 = N->getOperand(0);
10533 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
10535 // Check if c0 and c1 match above conditions.
10536 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
10537 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
10540 // If N0C has multiple uses it's possible one of the cases in
10541 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
10542 // in an infinite loop.
10543 if (!N0C->hasOneUse())
10545 int64_t C0 = N0C->getSExtValue();
10546 int64_t C1 = N1C->getSExtValue();
10548 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
10550 // Search for proper CA (non-zero) and CB that both are simm12.
10551 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
10552 !isInt<12>(C0 * (C1 / C0))) {
10555 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
10556 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
10559 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
10560 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
10565 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
10567 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
10568 DAG.getConstant(CA, DL, VT));
10570 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
10571 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
10574 // Try to turn (add (xor (setcc X, Y), 1) -1) into (neg (setcc X, Y)).
10575 static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
10576 SDValue N0 = N->getOperand(0);
10577 SDValue N1 = N->getOperand(1);
10578 EVT VT = N->getValueType(0);
10581 // RHS should be -1.
10582 if (!isAllOnesConstant(N1))
10585 // Look for an (xor (setcc X, Y), 1).
10586 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)) ||
10587 N0.getOperand(0).getOpcode() != ISD::SETCC)
10590 // Emit a negate of the setcc.
10591 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
10595 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
10596 const RISCVSubtarget &Subtarget) {
10597 if (SDValue V = combineAddOfBooleanXor(N, DAG))
10599 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
10601 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
10603 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
10605 // fold (add (select lhs, rhs, cc, 0, y), x) ->
10606 // (select lhs, rhs, cc, x, (add x, y))
10607 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
10610 // Try to turn a sub boolean RHS and constant LHS into an addi.
10611 static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
10612 SDValue N0 = N->getOperand(0);
10613 SDValue N1 = N->getOperand(1);
10614 EVT VT = N->getValueType(0);
10617 // Require a constant LHS.
10618 auto *N0C = dyn_cast<ConstantSDNode>(N0);
10622 // All our optimizations involve subtracting 1 from the immediate and forming
10623 // an ADDI. Make sure the new immediate is valid for an ADDI.
10624 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
10625 if (!ImmValMinus1.isSignedIntN(12))
10629 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
10630 // (sub constant, (setcc x, y, eq/neq)) ->
10631 // (add (setcc x, y, neq/eq), constant - 1)
10632 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
10633 EVT SetCCOpVT = N1.getOperand(0).getValueType();
10634 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
10636 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
10638 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
10639 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
10640 N1.getOperand(0).getOpcode() == ISD::SETCC) {
10641 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
10642 // Since setcc returns a bool the xor is equivalent to 1-setcc.
10643 NewLHS = N1.getOperand(0);
10647 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
10648 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
10651 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
10652 const RISCVSubtarget &Subtarget) {
10653 if (SDValue V = combineSubOfBoolean(N, DAG))
10656 SDValue N0 = N->getOperand(0);
10657 SDValue N1 = N->getOperand(1);
10658 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
10659 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
10660 isNullConstant(N1.getOperand(1))) {
10661 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
10662 if (CCVal == ISD::SETLT) {
10663 EVT VT = N->getValueType(0);
10665 unsigned ShAmt = N0.getValueSizeInBits() - 1;
10666 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
10667 DAG.getConstant(ShAmt, DL, VT));
10671 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
10672 // (select lhs, rhs, cc, x, (sub x, y))
10673 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
10676 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
10677 // Legalizing setcc can introduce xors like this. Doing this transform reduces
10678 // the number of xors and may allow the xor to fold into a branch condition.
10679 static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
10680 SDValue N0 = N->getOperand(0);
10681 SDValue N1 = N->getOperand(1);
10682 bool IsAnd = N->getOpcode() == ISD::AND;
10684 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
10687 if (!N0.hasOneUse() || !N1.hasOneUse())
10690 SDValue N01 = N0.getOperand(1);
10691 SDValue N11 = N1.getOperand(1);
10693 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
10694 // (xor X, -1) based on the upper bits of the other operand being 0. If the
10695 // operation is And, allow one of the Xors to use -1.
10696 if (isOneConstant(N01)) {
10697 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
10699 } else if (isOneConstant(N11)) {
10700 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
10701 if (!(IsAnd && isAllOnesConstant(N01)))
10706 EVT VT = N->getValueType(0);
10708 SDValue N00 = N0.getOperand(0);
10709 SDValue N10 = N1.getOperand(0);
10711 // The LHS of the xors needs to be 0/1.
10712 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
10713 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
10716 // Invert the opcode and insert a new xor.
10718 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
10719 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
10720 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
10723 static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
10724 const RISCVSubtarget &Subtarget) {
10725 SDValue N0 = N->getOperand(0);
10726 EVT VT = N->getValueType(0);
10728 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
10729 // extending X. This is safe since we only need the LSB after the shift and
10730 // shift amounts larger than 31 would produce poison. If we wait until
10731 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
10732 // to use a BEXT instruction.
10733 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
10734 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
10735 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
10737 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
10738 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
10739 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
10740 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
10746 // Combines two comparison operation and logic operation to one selection
10747 // operation(min, max) and logic operation. Returns new constructed Node if
10748 // conditions for optimization are satisfied.
10749 static SDValue performANDCombine(SDNode *N,
10750 TargetLowering::DAGCombinerInfo &DCI,
10751 const RISCVSubtarget &Subtarget) {
10752 SelectionDAG &DAG = DCI.DAG;
10754 SDValue N0 = N->getOperand(0);
10755 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
10756 // extending X. This is safe since we only need the LSB after the shift and
10757 // shift amounts larger than 31 would produce poison. If we wait until
10758 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
10759 // to use a BEXT instruction.
10760 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
10761 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
10762 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
10765 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
10766 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
10767 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
10768 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
10769 DAG.getConstant(1, DL, MVT::i64));
10770 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
10773 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
10776 if (DCI.isAfterLegalizeDAG())
10777 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
10780 // fold (and (select lhs, rhs, cc, -1, y), x) ->
10781 // (select lhs, rhs, cc, x, (and x, y))
10782 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
10785 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
10786 const RISCVSubtarget &Subtarget) {
10787 SelectionDAG &DAG = DCI.DAG;
10789 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
10792 if (DCI.isAfterLegalizeDAG())
10793 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
10796 // fold (or (select cond, 0, y), x) ->
10797 // (select cond, x, (or x, y))
10798 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
10801 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
10802 const RISCVSubtarget &Subtarget) {
10803 SDValue N0 = N->getOperand(0);
10804 SDValue N1 = N->getOperand(1);
10806 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
10807 // NOTE: Assumes ROL being legal means ROLW is legal.
10808 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10809 if (N0.getOpcode() == RISCVISD::SLLW &&
10810 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&
10811 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
10813 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
10814 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
10817 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
10818 if (N0.hasOneUse() && N0.getOpcode() == ISD::SETCC && isOneConstant(N1)) {
10819 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
10820 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10821 if (ConstN00 && CC == ISD::SETLT) {
10822 EVT VT = N0.getValueType();
10824 const APInt &Imm = ConstN00->getAPIntValue();
10825 if ((Imm + 1).isSignedIntN(12))
10826 return DAG.getSetCC(DL, VT, N0.getOperand(1),
10827 DAG.getConstant(Imm + 1, DL, VT), CC);
10831 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
10833 // fold (xor (select cond, 0, y), x) ->
10834 // (select cond, x, (xor x, y))
10835 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
10838 // According to the property that indexed load/store instructions
10839 // zero-extended their indices, \p narrowIndex tries to narrow the type of index
10840 // operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C <
10842 static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) {
10843 if (N.getOpcode() != ISD::SHL || !N->hasOneUse())
10846 SDValue N0 = N.getOperand(0);
10847 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
10848 N0.getOpcode() != RISCVISD::VZEXT_VL)
10850 if (!N0->hasOneUse())
10854 SDValue N1 = N.getOperand(1);
10855 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
10859 SDValue Src = N0.getOperand(0);
10860 EVT SrcVT = Src.getValueType();
10861 unsigned SrcElen = SrcVT.getScalarSizeInBits();
10862 unsigned ShAmtV = ShAmt.getZExtValue();
10863 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
10864 NewElen = std::max(NewElen, 8U);
10866 // Skip if NewElen is not narrower than the original extended type.
10867 if (NewElen >= N0.getValueType().getScalarSizeInBits())
10870 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
10871 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
10873 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
10874 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
10875 return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
10878 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
10879 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
10880 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
10881 // can become a sext.w instead of a shift pair.
10882 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
10883 const RISCVSubtarget &Subtarget) {
10884 SDValue N0 = N->getOperand(0);
10885 SDValue N1 = N->getOperand(1);
10886 EVT VT = N->getValueType(0);
10887 EVT OpVT = N0.getValueType();
10889 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
10892 // RHS needs to be a constant.
10893 auto *N1C = dyn_cast<ConstantSDNode>(N1);
10897 // LHS needs to be (and X, 0xffffffff).
10898 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
10899 !isa<ConstantSDNode>(N0.getOperand(1)) ||
10900 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
10903 // Looking for an equality compare.
10904 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10905 if (!isIntEqualitySetCC(Cond))
10908 // Don't do this if the sign bit is provably zero, it will be turned back into
10910 APInt SignMask = APInt::getOneBitSet(64, 31);
10911 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
10914 const APInt &C1 = N1C->getAPIntValue();
10917 // If the constant is larger than 2^32 - 1 it is impossible for both sides
10919 if (C1.getActiveBits() > 32)
10920 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
10922 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
10923 N0.getOperand(0), DAG.getValueType(MVT::i32));
10924 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
10929 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
10930 const RISCVSubtarget &Subtarget) {
10931 SDValue Src = N->getOperand(0);
10932 EVT VT = N->getValueType(0);
10934 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
10935 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
10936 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
10937 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
10938 Src.getOperand(0));
10944 // Forward declaration of the structure holding the necessary information to
10945 // apply a combine.
10946 struct CombineResult;
10948 /// Helper class for folding sign/zero extensions.
10949 /// In particular, this class is used for the following combines:
10950 /// add_vl -> vwadd(u) | vwadd(u)_w
10951 /// sub_vl -> vwsub(u) | vwsub(u)_w
10952 /// mul_vl -> vwmul(u) | vwmul_su
10954 /// An object of this class represents an operand of the operation we want to
10956 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
10957 /// NodeExtensionHelper for `a` and one for `b`.
10959 /// This class abstracts away how the extension is materialized and
10960 /// how its Mask, VL, number of users affect the combines.
10963 /// - VWADD_W is conceptually == add(op0, sext(op1))
10964 /// - VWADDU_W == add(op0, zext(op1))
10965 /// - VWSUB_W == sub(op0, sext(op1))
10966 /// - VWSUBU_W == sub(op0, zext(op1))
10968 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
10969 /// zext|sext(smaller_value).
10970 struct NodeExtensionHelper {
10971 /// Records if this operand is like being zero extended.
10973 /// Records if this operand is like being sign extended.
10974 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
10975 /// instance, a splat constant (e.g., 3), would support being both sign and
10978 /// This boolean captures whether we care if this operand would still be
10979 /// around after the folding happens.
10980 bool EnforceOneUse;
10981 /// Records if this operand's mask needs to match the mask of the operation
10982 /// that it will fold into.
10984 /// Value of the Mask for this operand.
10985 /// It may be SDValue().
10987 /// Value of the vector length operand.
10988 /// It may be SDValue().
10990 /// Original value that this NodeExtensionHelper represents.
10991 SDValue OrigOperand;
10993 /// Get the value feeding the extension or the value itself.
10994 /// E.g., for zext(a), this would return a.
10995 SDValue getSource() const {
10996 switch (OrigOperand.getOpcode()) {
10997 case RISCVISD::VSEXT_VL:
10998 case RISCVISD::VZEXT_VL:
10999 return OrigOperand.getOperand(0);
11001 return OrigOperand;
11005 /// Check if this instance represents a splat.
11006 bool isSplat() const {
11007 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL;
11010 /// Get or create a value that can feed \p Root with the given extension \p
11011 /// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
11012 /// \see ::getSource().
11013 SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG,
11014 std::optional<bool> SExt) const {
11015 if (!SExt.has_value())
11016 return OrigOperand;
11018 MVT NarrowVT = getNarrowType(Root);
11020 SDValue Source = getSource();
11021 if (Source.getValueType() == NarrowVT)
11024 unsigned ExtOpc = *SExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
11026 // If we need an extension, we should be changing the type.
11028 auto [Mask, VL] = getMaskAndVL(Root);
11029 switch (OrigOperand.getOpcode()) {
11030 case RISCVISD::VSEXT_VL:
11031 case RISCVISD::VZEXT_VL:
11032 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
11033 case RISCVISD::VMV_V_X_VL:
11034 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
11035 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
11037 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
11038 // and that operand should already have the right NarrowVT so no
11039 // extension should be required at this point.
11040 llvm_unreachable("Unsupported opcode");
11044 /// Helper function to get the narrow type for \p Root.
11045 /// The narrow type is the type of \p Root where we divided the size of each
11046 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
11047 /// \pre The size of the type of the elements of Root must be a multiple of 2
11048 /// and be greater than 16.
11049 static MVT getNarrowType(const SDNode *Root) {
11050 MVT VT = Root->getSimpleValueType(0);
11052 // Determine the narrow size.
11053 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
11054 assert(NarrowSize >= 8 && "Trying to extend something we can't represent");
11055 MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
11056 VT.getVectorElementCount());
11060 /// Return the opcode required to materialize the folding of the sign
11061 /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for
11062 /// both operands for \p Opcode.
11063 /// Put differently, get the opcode to materialize:
11064 /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b)
11065 /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b)
11066 /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
11067 static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) {
11069 case RISCVISD::ADD_VL:
11070 case RISCVISD::VWADD_W_VL:
11071 case RISCVISD::VWADDU_W_VL:
11072 return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL;
11073 case RISCVISD::MUL_VL:
11074 return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
11075 case RISCVISD::SUB_VL:
11076 case RISCVISD::VWSUB_W_VL:
11077 case RISCVISD::VWSUBU_W_VL:
11078 return IsSExt ? RISCVISD::VWSUB_VL : RISCVISD::VWSUBU_VL;
11080 llvm_unreachable("Unexpected opcode");
11084 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
11085 /// newOpcode(a, b).
11086 static unsigned getSUOpcode(unsigned Opcode) {
11087 assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL");
11088 return RISCVISD::VWMULSU_VL;
11091 /// Get the opcode to materialize \p Opcode(a, s|zext(b)) ->
11092 /// newOpcode(a, b).
11093 static unsigned getWOpcode(unsigned Opcode, bool IsSExt) {
11095 case RISCVISD::ADD_VL:
11096 return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL;
11097 case RISCVISD::SUB_VL:
11098 return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL;
11100 llvm_unreachable("Unexpected opcode");
11104 using CombineToTry = std::function<std::optional<CombineResult>(
11105 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
11106 const NodeExtensionHelper & /*RHS*/)>;
11108 /// Check if this node needs to be fully folded or extended for all users.
11109 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
11111 /// Helper method to set the various fields of this struct based on the
11112 /// type of \p Root.
11113 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) {
11114 SupportsZExt = false;
11115 SupportsSExt = false;
11116 EnforceOneUse = true;
11118 switch (OrigOperand.getOpcode()) {
11119 case RISCVISD::VZEXT_VL:
11120 SupportsZExt = true;
11121 Mask = OrigOperand.getOperand(1);
11122 VL = OrigOperand.getOperand(2);
11124 case RISCVISD::VSEXT_VL:
11125 SupportsSExt = true;
11126 Mask = OrigOperand.getOperand(1);
11127 VL = OrigOperand.getOperand(2);
11129 case RISCVISD::VMV_V_X_VL: {
11130 // Historically, we didn't care about splat values not disappearing during
11132 EnforceOneUse = false;
11134 VL = OrigOperand.getOperand(2);
11136 // The operand is a splat of a scalar.
11138 // The pasthru must be undef for tail agnostic.
11139 if (!OrigOperand.getOperand(0).isUndef())
11142 // Get the scalar value.
11143 SDValue Op = OrigOperand.getOperand(1);
11145 // See if we have enough sign bits or zero bits in the scalar to use a
11146 // widening opcode by splatting to smaller element size.
11147 MVT VT = Root->getSimpleValueType(0);
11148 unsigned EltBits = VT.getScalarSizeInBits();
11149 unsigned ScalarBits = Op.getValueSizeInBits();
11150 // Make sure we're getting all element bits from the scalar register.
11151 // FIXME: Support implicit sign extension of vmv.v.x?
11152 if (ScalarBits < EltBits)
11155 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
11156 // If the narrow type cannot be expressed with a legal VMV,
11157 // this is not a valid candidate.
11158 if (NarrowSize < 8)
11161 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
11162 SupportsSExt = true;
11163 if (DAG.MaskedValueIsZero(Op,
11164 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
11165 SupportsZExt = true;
11173 /// Check if \p Root supports any extension folding combines.
11174 static bool isSupportedRoot(const SDNode *Root) {
11175 switch (Root->getOpcode()) {
11176 case RISCVISD::ADD_VL:
11177 case RISCVISD::MUL_VL:
11178 case RISCVISD::VWADD_W_VL:
11179 case RISCVISD::VWADDU_W_VL:
11180 case RISCVISD::SUB_VL:
11181 case RISCVISD::VWSUB_W_VL:
11182 case RISCVISD::VWSUBU_W_VL:
11189 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
11190 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) {
11191 assert(isSupportedRoot(Root) && "Trying to build an helper with an "
11192 "unsupported root");
11193 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
11194 OrigOperand = Root->getOperand(OperandIdx);
11196 unsigned Opc = Root->getOpcode();
11198 // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were
11199 // <ADD|SUB>(LHS, S|ZEXT(RHS))
11200 case RISCVISD::VWADD_W_VL:
11201 case RISCVISD::VWADDU_W_VL:
11202 case RISCVISD::VWSUB_W_VL:
11203 case RISCVISD::VWSUBU_W_VL:
11204 if (OperandIdx == 1) {
11206 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
11207 SupportsSExt = !SupportsZExt;
11208 std::tie(Mask, VL) = getMaskAndVL(Root);
11210 // There's no existing extension here, so we don't have to worry about
11211 // making sure it gets removed.
11212 EnforceOneUse = false;
11217 fillUpExtensionSupport(Root, DAG);
11222 /// Check if this operand is compatible with the given vector length \p VL.
11223 bool isVLCompatible(SDValue VL) const {
11224 return this->VL != SDValue() && this->VL == VL;
11227 /// Check if this operand is compatible with the given \p Mask.
11228 bool isMaskCompatible(SDValue Mask) const {
11229 return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask);
11232 /// Helper function to get the Mask and VL from \p Root.
11233 static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) {
11234 assert(isSupportedRoot(Root) && "Unexpected root");
11235 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
11238 /// Check if the Mask and VL of this operand are compatible with \p Root.
11239 bool areVLAndMaskCompatible(const SDNode *Root) const {
11240 auto [Mask, VL] = getMaskAndVL(Root);
11241 return isMaskCompatible(Mask) && isVLCompatible(VL);
11244 /// Helper function to check if \p N is commutative with respect to the
11245 /// foldings that are supported by this class.
11246 static bool isCommutative(const SDNode *N) {
11247 switch (N->getOpcode()) {
11248 case RISCVISD::ADD_VL:
11249 case RISCVISD::MUL_VL:
11250 case RISCVISD::VWADD_W_VL:
11251 case RISCVISD::VWADDU_W_VL:
11253 case RISCVISD::SUB_VL:
11254 case RISCVISD::VWSUB_W_VL:
11255 case RISCVISD::VWSUBU_W_VL:
11258 llvm_unreachable("Unexpected opcode");
11262 /// Get a list of combine to try for folding extensions in \p Root.
11263 /// Note that each returned CombineToTry function doesn't actually modify
11264 /// anything. Instead they produce an optional CombineResult that if not None,
11265 /// need to be materialized for the combine to be applied.
11266 /// \see CombineResult::materialize.
11267 /// If the related CombineToTry function returns std::nullopt, that means the
11268 /// combine didn't match.
11269 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
11272 /// Helper structure that holds all the necessary information to materialize a
11273 /// combine that does some extension folding.
11274 struct CombineResult {
11275 /// Opcode to be generated when materializing the combine.
11276 unsigned TargetOpcode;
11277 // No value means no extension is needed. If extension is needed, the value
11278 // indicates if it needs to be sign extended.
11279 std::optional<bool> SExtLHS;
11280 std::optional<bool> SExtRHS;
11281 /// Root of the combine.
11283 /// LHS of the TargetOpcode.
11284 NodeExtensionHelper LHS;
11285 /// RHS of the TargetOpcode.
11286 NodeExtensionHelper RHS;
11288 CombineResult(unsigned TargetOpcode, SDNode *Root,
11289 const NodeExtensionHelper &LHS, std::optional<bool> SExtLHS,
11290 const NodeExtensionHelper &RHS, std::optional<bool> SExtRHS)
11291 : TargetOpcode(TargetOpcode), SExtLHS(SExtLHS), SExtRHS(SExtRHS),
11292 Root(Root), LHS(LHS), RHS(RHS) {}
11294 /// Return a value that uses TargetOpcode and that can be used to replace
11296 /// The actual replacement is *not* done in that method.
11297 SDValue materialize(SelectionDAG &DAG) const {
11298 SDValue Mask, VL, Merge;
11299 std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root);
11300 Merge = Root->getOperand(2);
11301 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
11302 LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS),
11303 RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge,
11308 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
11309 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
11310 /// are zext) and LHS and RHS can be folded into Root.
11311 /// AllowSExt and AllozZExt define which form `ext` can take in this pattern.
11313 /// \note If the pattern can match with both zext and sext, the returned
11314 /// CombineResult will feature the zext result.
11316 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11317 /// can be used to apply the pattern.
11318 static std::optional<CombineResult>
11319 canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
11320 const NodeExtensionHelper &RHS, bool AllowSExt,
11322 assert((AllowSExt || AllowZExt) && "Forgot to set what you want?");
11323 if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
11324 return std::nullopt;
11325 if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt)
11326 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
11327 Root->getOpcode(), /*IsSExt=*/false),
11328 Root, LHS, /*SExtLHS=*/false, RHS,
11329 /*SExtRHS=*/false);
11330 if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt)
11331 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
11332 Root->getOpcode(), /*IsSExt=*/true),
11333 Root, LHS, /*SExtLHS=*/true, RHS,
11335 return std::nullopt;
11338 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
11339 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
11340 /// are zext) and LHS and RHS can be folded into Root.
11342 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11343 /// can be used to apply the pattern.
11344 static std::optional<CombineResult>
11345 canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
11346 const NodeExtensionHelper &RHS) {
11347 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
11348 /*AllowZExt=*/true);
11351 /// Check if \p Root follows a pattern Root(LHS, ext(RHS))
11353 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11354 /// can be used to apply the pattern.
11355 static std::optional<CombineResult>
11356 canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
11357 const NodeExtensionHelper &RHS) {
11358 if (!RHS.areVLAndMaskCompatible(Root))
11359 return std::nullopt;
11361 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
11363 // Control this behavior behind an option (AllowSplatInVW_W) for testing
11365 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
11366 return CombineResult(
11367 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false),
11368 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false);
11369 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
11370 return CombineResult(
11371 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true),
11372 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true);
11373 return std::nullopt;
11376 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
11378 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11379 /// can be used to apply the pattern.
11380 static std::optional<CombineResult>
11381 canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
11382 const NodeExtensionHelper &RHS) {
11383 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
11384 /*AllowZExt=*/false);
11387 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
11389 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11390 /// can be used to apply the pattern.
11391 static std::optional<CombineResult>
11392 canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
11393 const NodeExtensionHelper &RHS) {
11394 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false,
11395 /*AllowZExt=*/true);
11398 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
11400 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11401 /// can be used to apply the pattern.
11402 static std::optional<CombineResult>
11403 canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
11404 const NodeExtensionHelper &RHS) {
11405 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
11406 return std::nullopt;
11407 if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
11408 return std::nullopt;
11409 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
11410 Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false);
11413 SmallVector<NodeExtensionHelper::CombineToTry>
11414 NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
11415 SmallVector<CombineToTry> Strategies;
11416 switch (Root->getOpcode()) {
11417 case RISCVISD::ADD_VL:
11418 case RISCVISD::SUB_VL:
11419 // add|sub -> vwadd(u)|vwsub(u)
11420 Strategies.push_back(canFoldToVWWithSameExtension);
11421 // add|sub -> vwadd(u)_w|vwsub(u)_w
11422 Strategies.push_back(canFoldToVW_W);
11424 case RISCVISD::MUL_VL:
11426 Strategies.push_back(canFoldToVWWithSameExtension);
11428 Strategies.push_back(canFoldToVW_SU);
11430 case RISCVISD::VWADD_W_VL:
11431 case RISCVISD::VWSUB_W_VL:
11432 // vwadd_w|vwsub_w -> vwadd|vwsub
11433 Strategies.push_back(canFoldToVWWithSEXT);
11435 case RISCVISD::VWADDU_W_VL:
11436 case RISCVISD::VWSUBU_W_VL:
11437 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
11438 Strategies.push_back(canFoldToVWWithZEXT);
11441 llvm_unreachable("Unexpected opcode");
11445 } // End anonymous namespace.
11447 /// Combine a binary operation to its equivalent VW or VW_W form.
11448 /// The supported combines are:
11449 /// add_vl -> vwadd(u) | vwadd(u)_w
11450 /// sub_vl -> vwsub(u) | vwsub(u)_w
11451 /// mul_vl -> vwmul(u) | vwmul_su
11452 /// vwadd_w(u) -> vwadd(u)
11453 /// vwub_w(u) -> vwadd(u)
11455 combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
11456 SelectionDAG &DAG = DCI.DAG;
11458 assert(NodeExtensionHelper::isSupportedRoot(N) &&
11459 "Shouldn't have called this method");
11460 SmallVector<SDNode *> Worklist;
11461 SmallSet<SDNode *, 8> Inserted;
11462 Worklist.push_back(N);
11463 Inserted.insert(N);
11464 SmallVector<CombineResult> CombinesToApply;
11466 while (!Worklist.empty()) {
11467 SDNode *Root = Worklist.pop_back_val();
11468 if (!NodeExtensionHelper::isSupportedRoot(Root))
11471 NodeExtensionHelper LHS(N, 0, DAG);
11472 NodeExtensionHelper RHS(N, 1, DAG);
11473 auto AppendUsersIfNeeded = [&Worklist,
11474 &Inserted](const NodeExtensionHelper &Op) {
11475 if (Op.needToPromoteOtherUsers()) {
11476 for (SDNode *TheUse : Op.OrigOperand->uses()) {
11477 if (Inserted.insert(TheUse).second)
11478 Worklist.push_back(TheUse);
11483 // Control the compile time by limiting the number of node we look at in
11485 if (Inserted.size() > ExtensionMaxWebSize)
11488 SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =
11489 NodeExtensionHelper::getSupportedFoldings(N);
11491 assert(!FoldingStrategies.empty() && "Nothing to be folded");
11492 bool Matched = false;
11493 for (int Attempt = 0;
11494 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
11497 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
11498 FoldingStrategies) {
11499 std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS);
11502 CombinesToApply.push_back(*Res);
11503 // All the inputs that are extended need to be folded, otherwise
11504 // we would be leaving the old input (since it is may still be used),
11505 // and the new one.
11506 if (Res->SExtLHS.has_value())
11507 AppendUsersIfNeeded(LHS);
11508 if (Res->SExtRHS.has_value())
11509 AppendUsersIfNeeded(RHS);
11513 std::swap(LHS, RHS);
11515 // Right now we do an all or nothing approach.
11519 // Store the value for the replacement of the input node separately.
11520 SDValue InputRootReplacement;
11521 // We do the RAUW after we materialize all the combines, because some replaced
11522 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
11523 // some of these nodes may appear in the NodeExtensionHelpers of some of the
11524 // yet-to-be-visited CombinesToApply roots.
11525 SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
11526 ValuesToReplace.reserve(CombinesToApply.size());
11527 for (CombineResult Res : CombinesToApply) {
11528 SDValue NewValue = Res.materialize(DAG);
11529 if (!InputRootReplacement) {
11530 assert(Res.Root == N &&
11531 "First element is expected to be the current node");
11532 InputRootReplacement = NewValue;
11534 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
11537 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
11538 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
11539 DCI.AddToWorklist(OldNewValues.second.getNode());
11541 return InputRootReplacement;
11544 // Helper function for performMemPairCombine.
11545 // Try to combine the memory loads/stores LSNode1 and LSNode2
11546 // into a single memory pair operation.
11547 static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
11548 LSBaseSDNode *LSNode2, SDValue BasePtr,
11550 SmallPtrSet<const SDNode *, 32> Visited;
11551 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
11553 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
11554 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
11557 MachineFunction &MF = DAG.getMachineFunction();
11558 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
11560 // The new operation has twice the width.
11561 MVT XLenVT = Subtarget.getXLenVT();
11562 EVT MemVT = LSNode1->getMemoryVT();
11563 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
11564 MachineMemOperand *MMO = LSNode1->getMemOperand();
11565 MachineMemOperand *NewMMO = MF.getMachineMemOperand(
11566 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
11568 if (LSNode1->getOpcode() == ISD::LOAD) {
11569 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
11571 if (MemVT == MVT::i32)
11572 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
11574 Opcode = RISCVISD::TH_LDD;
11576 SDValue Res = DAG.getMemIntrinsicNode(
11577 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
11578 {LSNode1->getChain(), BasePtr,
11579 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
11583 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
11585 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
11587 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
11590 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
11592 SDValue Res = DAG.getMemIntrinsicNode(
11593 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
11594 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
11595 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
11598 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
11603 // Try to combine two adjacent loads/stores to a single pair instruction from
11604 // the XTHeadMemPair vendor extension.
11605 static SDValue performMemPairCombine(SDNode *N,
11606 TargetLowering::DAGCombinerInfo &DCI) {
11607 SelectionDAG &DAG = DCI.DAG;
11608 MachineFunction &MF = DAG.getMachineFunction();
11609 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
11611 // Target does not support load/store pair.
11612 if (!Subtarget.hasVendorXTHeadMemPair())
11615 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
11616 EVT MemVT = LSNode1->getMemoryVT();
11617 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
11619 // No volatile, indexed or atomic loads/stores.
11620 if (!LSNode1->isSimple() || LSNode1->isIndexed())
11623 // Function to get a base + constant representation from a memory value.
11624 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
11625 if (Ptr->getOpcode() == ISD::ADD)
11626 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
11627 return {Ptr->getOperand(0), C1->getZExtValue()};
11631 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
11633 SDValue Chain = N->getOperand(0);
11634 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
11636 SDUse &Use = UI.getUse();
11637 if (Use.getUser() != N && Use.getResNo() == 0 &&
11638 Use.getUser()->getOpcode() == N->getOpcode()) {
11639 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
11641 // No volatile, indexed or atomic loads/stores.
11642 if (!LSNode2->isSimple() || LSNode2->isIndexed())
11645 // Check if LSNode1 and LSNode2 have the same type and extension.
11646 if (LSNode1->getOpcode() == ISD::LOAD)
11647 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
11648 cast<LoadSDNode>(LSNode1)->getExtensionType())
11651 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
11654 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
11656 // Check if the base pointer is the same for both instruction.
11657 if (Base1 != Base2)
11660 // Check if the offsets match the XTHeadMemPair encoding contraints.
11661 bool Valid = false;
11662 if (MemVT == MVT::i32) {
11663 // Check for adjacent i32 values and a 2-bit index.
11664 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
11666 } else if (MemVT == MVT::i64) {
11667 // Check for adjacent i64 values and a 2-bit index.
11668 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
11677 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
11686 // (fp_to_int (froundeven X)) -> fcvt X, rne
11687 // (fp_to_int (ftrunc X)) -> fcvt X, rtz
11688 // (fp_to_int (ffloor X)) -> fcvt X, rdn
11689 // (fp_to_int (fceil X)) -> fcvt X, rup
11690 // (fp_to_int (fround X)) -> fcvt X, rmm
11691 static SDValue performFP_TO_INTCombine(SDNode *N,
11692 TargetLowering::DAGCombinerInfo &DCI,
11693 const RISCVSubtarget &Subtarget) {
11694 SelectionDAG &DAG = DCI.DAG;
11695 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11696 MVT XLenVT = Subtarget.getXLenVT();
11698 SDValue Src = N->getOperand(0);
11700 // Don't do this for strict-fp Src.
11701 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
11704 // Ensure the FP type is legal.
11705 if (!TLI.isTypeLegal(Src.getValueType()))
11708 // Don't do this for f16 with Zfhmin and not Zfh.
11709 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
11712 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
11713 if (FRM == RISCVFPRndMode::Invalid)
11717 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
11718 EVT VT = N->getValueType(0);
11720 if (VT.isVector() && TLI.isTypeLegal(VT)) {
11721 MVT SrcVT = Src.getSimpleValueType();
11722 MVT SrcContainerVT = SrcVT;
11723 MVT ContainerVT = VT.getSimpleVT();
11724 SDValue XVal = Src.getOperand(0);
11726 // For widening and narrowing conversions we just combine it into a
11727 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
11728 // end up getting lowered to their appropriate pseudo instructions based on
11729 // their operand types
11730 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
11731 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
11734 // Make fixed-length vectors scalable first
11735 if (SrcVT.isFixedLengthVector()) {
11736 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
11737 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
11739 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
11743 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
11746 if (FRM == RISCVFPRndMode::RTZ) {
11747 // Use the dedicated trunc static rounding mode if we're truncating so we
11748 // don't need to generate calls to fsrmi/fsrm
11750 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
11751 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
11754 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
11755 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
11756 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
11759 // If converted from fixed-length to scalable, convert back
11760 if (VT.isFixedLengthVector())
11761 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
11766 // Only handle XLen or i32 types. Other types narrower than XLen will
11767 // eventually be legalized to XLenVT.
11768 if (VT != MVT::i32 && VT != XLenVT)
11773 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
11775 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11777 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
11778 DAG.getTargetConstant(FRM, DL, XLenVT));
11779 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
11783 // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
11784 // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
11785 // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
11786 // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
11787 // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
11788 static SDValue performFP_TO_INT_SATCombine(SDNode *N,
11789 TargetLowering::DAGCombinerInfo &DCI,
11790 const RISCVSubtarget &Subtarget) {
11791 SelectionDAG &DAG = DCI.DAG;
11792 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11793 MVT XLenVT = Subtarget.getXLenVT();
11795 // Only handle XLen types. Other types narrower than XLen will eventually be
11796 // legalized to XLenVT.
11797 EVT DstVT = N->getValueType(0);
11798 if (DstVT != XLenVT)
11801 SDValue Src = N->getOperand(0);
11803 // Don't do this for strict-fp Src.
11804 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
11807 // Ensure the FP type is also legal.
11808 if (!TLI.isTypeLegal(Src.getValueType()))
11811 // Don't do this for f16 with Zfhmin and not Zfh.
11812 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
11815 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11817 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
11818 if (FRM == RISCVFPRndMode::Invalid)
11821 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
11824 if (SatVT == DstVT)
11825 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
11826 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
11827 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11830 // FIXME: Support other SatVTs by clamping before or after the conversion.
11832 Src = Src.getOperand(0);
11835 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
11836 DAG.getTargetConstant(FRM, DL, XLenVT));
11838 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
11840 if (Opc == RISCVISD::FCVT_WU_RV64)
11841 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
11843 // RISC-V FP-to-int conversions saturate to the destination register size, but
11844 // don't produce 0 for nan.
11845 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
11846 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
11849 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
11850 // smaller than XLenVT.
11851 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
11852 const RISCVSubtarget &Subtarget) {
11853 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
11855 SDValue Src = N->getOperand(0);
11856 if (Src.getOpcode() != ISD::BSWAP)
11859 EVT VT = N->getValueType(0);
11860 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
11861 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
11865 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
11868 // Convert from one FMA opcode to another based on whether we are negating the
11869 // multiply result and/or the accumulator.
11870 // NOTE: Only supports RVV operations with VL.
11871 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
11872 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
11874 // clang-format off
11876 default: llvm_unreachable("Unexpected opcode");
11877 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
11878 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
11879 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
11880 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
11881 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
11882 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
11883 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
11884 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
11889 // Negating the accumulator changes ADD<->SUB.
11891 // clang-format off
11893 default: llvm_unreachable("Unexpected opcode");
11894 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
11895 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
11896 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
11897 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
11898 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
11899 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
11900 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
11901 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
11909 static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
11910 // Fold FNEG_VL into FMA opcodes.
11911 // The first operand of strict-fp is chain.
11912 unsigned Offset = N->isTargetStrictFPOpcode();
11913 SDValue A = N->getOperand(0 + Offset);
11914 SDValue B = N->getOperand(1 + Offset);
11915 SDValue C = N->getOperand(2 + Offset);
11916 SDValue Mask = N->getOperand(3 + Offset);
11917 SDValue VL = N->getOperand(4 + Offset);
11919 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
11920 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
11921 V.getOperand(2) == VL) {
11922 // Return the negated input.
11923 V = V.getOperand(0);
11930 bool NegA = invertIfNegative(A);
11931 bool NegB = invertIfNegative(B);
11932 bool NegC = invertIfNegative(C);
11934 // If no operands are negated, we're done.
11935 if (!NegA && !NegB && !NegC)
11938 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
11939 if (N->isTargetStrictFPOpcode())
11940 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
11941 {N->getOperand(0), A, B, C, Mask, VL});
11942 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
11946 static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG) {
11947 if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
11950 // FIXME: Ignore strict opcodes for now.
11951 if (N->isTargetStrictFPOpcode())
11954 // Try to form widening FMA.
11955 SDValue Op0 = N->getOperand(0);
11956 SDValue Op1 = N->getOperand(1);
11957 SDValue Mask = N->getOperand(3);
11958 SDValue VL = N->getOperand(4);
11960 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
11961 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
11964 // TODO: Refactor to handle more complex cases similar to
11965 // combineBinOp_VLToVWBinOp_VL.
11966 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
11967 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
11970 // Check the mask and VL are the same.
11971 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
11972 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
11976 switch (N->getOpcode()) {
11978 llvm_unreachable("Unexpected opcode");
11979 case RISCVISD::VFMADD_VL:
11980 NewOpc = RISCVISD::VFWMADD_VL;
11982 case RISCVISD::VFNMSUB_VL:
11983 NewOpc = RISCVISD::VFWNMSUB_VL;
11985 case RISCVISD::VFNMADD_VL:
11986 NewOpc = RISCVISD::VFWNMADD_VL;
11988 case RISCVISD::VFMSUB_VL:
11989 NewOpc = RISCVISD::VFWMSUB_VL;
11993 Op0 = Op0.getOperand(0);
11994 Op1 = Op1.getOperand(0);
11996 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
11997 N->getOperand(2), Mask, VL);
12000 static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG) {
12001 // FIXME: Ignore strict opcodes for now.
12002 assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode");
12004 // Try to form widening multiply.
12005 SDValue Op0 = N->getOperand(0);
12006 SDValue Op1 = N->getOperand(1);
12007 SDValue Merge = N->getOperand(2);
12008 SDValue Mask = N->getOperand(3);
12009 SDValue VL = N->getOperand(4);
12011 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
12012 Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
12015 // TODO: Refactor to handle more complex cases similar to
12016 // combineBinOp_VLToVWBinOp_VL.
12017 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
12018 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
12021 // Check the mask and VL are the same.
12022 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
12023 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
12026 Op0 = Op0.getOperand(0);
12027 Op1 = Op1.getOperand(0);
12029 return DAG.getNode(RISCVISD::VFWMUL_VL, SDLoc(N), N->getValueType(0), Op0,
12030 Op1, Merge, Mask, VL);
12033 static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG) {
12034 SDValue Op0 = N->getOperand(0);
12035 SDValue Op1 = N->getOperand(1);
12036 SDValue Merge = N->getOperand(2);
12037 SDValue Mask = N->getOperand(3);
12038 SDValue VL = N->getOperand(4);
12040 bool IsAdd = N->getOpcode() == RISCVISD::FADD_VL;
12042 // Look for foldable FP_EXTENDS.
12044 Op0.getOpcode() == RISCVISD::FP_EXTEND_VL &&
12045 (Op0.hasOneUse() || (Op0 == Op1 && Op0->hasNUsesOfValue(2, 0)));
12047 (Op0 == Op1 && Op0IsExtend) ||
12048 (Op1.getOpcode() == RISCVISD::FP_EXTEND_VL && Op1.hasOneUse());
12050 // Check the mask and VL.
12051 if (Op0IsExtend && (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL))
12052 Op0IsExtend = false;
12053 if (Op1IsExtend && (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL))
12054 Op1IsExtend = false;
12057 if (!Op1IsExtend) {
12058 // Sub requires at least operand 1 to be an extend.
12062 // Add is commutable, if the other operand is foldable, swap them.
12066 std::swap(Op0, Op1);
12067 std::swap(Op0IsExtend, Op1IsExtend);
12070 // Op1 is a foldable extend. Op0 might be foldable.
12071 Op1 = Op1.getOperand(0);
12073 Op0 = Op0.getOperand(0);
12077 Opc = Op0IsExtend ? RISCVISD::VFWADD_VL : RISCVISD::VFWADD_W_VL;
12079 Opc = Op0IsExtend ? RISCVISD::VFWSUB_VL : RISCVISD::VFWSUB_W_VL;
12081 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op0, Op1, Merge, Mask,
12085 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
12086 const RISCVSubtarget &Subtarget) {
12087 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
12089 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
12092 if (!isa<ConstantSDNode>(N->getOperand(1)))
12094 uint64_t ShAmt = N->getConstantOperandVal(1);
12098 SDValue N0 = N->getOperand(0);
12100 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
12101 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
12102 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
12104 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
12105 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
12106 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
12107 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
12108 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
12110 SDLoc ShlDL(N0.getOperand(0));
12111 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
12112 N0.getOperand(0).getOperand(0),
12113 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
12115 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
12116 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
12120 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
12121 // FIXME: Should this be a generic combine? There's a similar combine on X86.
12123 // Also try these folds where an add or sub is in the middle.
12124 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
12125 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
12127 ConstantSDNode *AddC = nullptr;
12129 // We might have an ADD or SUB between the SRA and SHL.
12130 bool IsAdd = N0.getOpcode() == ISD::ADD;
12131 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
12132 // Other operand needs to be a constant we can modify.
12133 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
12137 // AddC needs to have at least 32 trailing zeros.
12138 if (AddC->getAPIntValue().countr_zero() < 32)
12141 // All users should be a shift by constant less than or equal to 32. This
12142 // ensures we'll do this optimization for each of them to produce an
12143 // add/sub+sext_inreg they can all share.
12144 for (SDNode *U : N0->uses()) {
12145 if (U->getOpcode() != ISD::SRA ||
12146 !isa<ConstantSDNode>(U->getOperand(1)) ||
12147 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() > 32)
12151 Shl = N0.getOperand(IsAdd ? 0 : 1);
12153 // Not an ADD or SUB.
12157 // Look for a shift left by 32.
12158 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
12159 Shl.getConstantOperandVal(1) != 32)
12162 // We if we didn't look through an add/sub, then the shl should have one use.
12163 // If we did look through an add/sub, the sext_inreg we create is free so
12164 // we're only creating 2 new instructions. It's enough to only remove the
12165 // original sra+add/sub.
12166 if (!AddC && !Shl.hasOneUse())
12170 SDValue In = Shl.getOperand(0);
12172 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
12175 SDValue ShiftedAddC =
12176 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
12178 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
12180 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
12183 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
12184 DAG.getValueType(MVT::i32));
12188 return DAG.getNode(
12189 ISD::SHL, DL, MVT::i64, SExt,
12190 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
12193 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
12194 // the result is used as the conditon of a br_cc or select_cc we can invert,
12195 // inverting the setcc is free, and Z is 0/1. Caller will invert the
12196 // br_cc/select_cc.
12197 static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) {
12198 bool IsAnd = Cond.getOpcode() == ISD::AND;
12199 if (!IsAnd && Cond.getOpcode() != ISD::OR)
12202 if (!Cond.hasOneUse())
12205 SDValue Setcc = Cond.getOperand(0);
12206 SDValue Xor = Cond.getOperand(1);
12207 // Canonicalize setcc to LHS.
12208 if (Setcc.getOpcode() != ISD::SETCC)
12209 std::swap(Setcc, Xor);
12210 // LHS should be a setcc and RHS should be an xor.
12211 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
12212 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
12215 // If the condition is an And, SimplifyDemandedBits may have changed
12216 // (xor Z, 1) to (not Z).
12217 SDValue Xor1 = Xor.getOperand(1);
12218 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
12221 EVT VT = Cond.getValueType();
12222 SDValue Xor0 = Xor.getOperand(0);
12224 // The LHS of the xor needs to be 0/1.
12225 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
12226 if (!DAG.MaskedValueIsZero(Xor0, Mask))
12229 // We can only invert integer setccs.
12230 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
12231 if (!SetCCOpVT.isScalarInteger())
12234 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
12235 if (ISD::isIntEqualitySetCC(CCVal)) {
12236 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
12237 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
12238 Setcc.getOperand(1), CCVal);
12239 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
12240 // Invert (setlt 0, X) by converting to (setlt X, 1).
12241 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
12242 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
12243 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
12244 // (setlt X, 1) by converting to (setlt 0, X).
12245 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
12246 DAG.getConstant(0, SDLoc(Setcc), VT),
12247 Setcc.getOperand(0), CCVal);
12251 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
12252 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
12255 // Perform common combines for BR_CC and SELECT_CC condtions.
12256 static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
12257 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
12258 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
12260 // As far as arithmetic right shift always saves the sign,
12261 // shift can be omitted.
12262 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
12263 // setge (sra X, N), 0 -> setge X, 0
12264 if (auto *RHSConst = dyn_cast<ConstantSDNode>(RHS.getNode())) {
12265 if ((CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
12266 LHS.getOpcode() == ISD::SRA && RHSConst->isZero()) {
12267 LHS = LHS.getOperand(0);
12272 if (!ISD::isIntEqualitySetCC(CCVal))
12275 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
12276 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
12277 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
12278 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
12279 // If we're looking for eq 0 instead of ne 0, we need to invert the
12281 bool Invert = CCVal == ISD::SETEQ;
12282 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
12284 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
12286 RHS = LHS.getOperand(1);
12287 LHS = LHS.getOperand(0);
12288 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
12290 CC = DAG.getCondCode(CCVal);
12294 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
12295 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
12296 RHS = LHS.getOperand(1);
12297 LHS = LHS.getOperand(0);
12301 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
12302 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
12303 LHS.getOperand(1).getOpcode() == ISD::Constant) {
12304 SDValue LHS0 = LHS.getOperand(0);
12305 if (LHS0.getOpcode() == ISD::AND &&
12306 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
12307 uint64_t Mask = LHS0.getConstantOperandVal(1);
12308 uint64_t ShAmt = LHS.getConstantOperandVal(1);
12309 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
12310 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
12311 CC = DAG.getCondCode(CCVal);
12313 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
12314 LHS = LHS0.getOperand(0);
12317 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
12318 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
12324 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
12325 // This can occur when legalizing some floating point comparisons.
12326 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
12327 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
12328 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
12329 CC = DAG.getCondCode(CCVal);
12330 RHS = DAG.getConstant(0, DL, LHS.getValueType());
12334 if (isNullConstant(RHS)) {
12335 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
12336 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
12337 CC = DAG.getCondCode(CCVal);
12347 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
12348 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
12349 // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
12350 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
12351 static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
12352 SDValue TrueVal, SDValue FalseVal,
12354 bool Commutative = true;
12355 switch (TrueVal.getOpcode()) {
12359 Commutative = false;
12367 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
12371 if (FalseVal == TrueVal.getOperand(0))
12373 else if (Commutative && FalseVal == TrueVal.getOperand(1))
12378 EVT VT = N->getValueType(0);
12380 SDValue Zero = DAG.getConstant(0, DL, VT);
12381 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
12384 std::swap(OtherOp, Zero);
12385 SDValue NewSel = DAG.getSelect(DL, VT, N->getOperand(0), OtherOp, Zero);
12386 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
12389 // This tries to get rid of `select` and `icmp` that are being used to handle
12390 // `Targets` that do not support `cttz(0)`/`ctlz(0)`.
12391 static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
12392 SDValue Cond = N->getOperand(0);
12394 // This represents either CTTZ or CTLZ instruction.
12395 SDValue CountZeroes;
12399 if (Cond.getOpcode() != ISD::SETCC)
12402 if (!isNullConstant(Cond->getOperand(1)))
12405 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
12406 if (CCVal == ISD::CondCode::SETEQ) {
12407 CountZeroes = N->getOperand(2);
12408 ValOnZero = N->getOperand(1);
12409 } else if (CCVal == ISD::CondCode::SETNE) {
12410 CountZeroes = N->getOperand(1);
12411 ValOnZero = N->getOperand(2);
12416 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
12417 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
12418 CountZeroes = CountZeroes.getOperand(0);
12420 if (CountZeroes.getOpcode() != ISD::CTTZ &&
12421 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
12422 CountZeroes.getOpcode() != ISD::CTLZ &&
12423 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
12426 if (!isNullConstant(ValOnZero))
12429 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
12430 if (Cond->getOperand(0) != CountZeroesArgument)
12433 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
12434 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
12435 CountZeroes.getValueType(), CountZeroesArgument);
12436 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
12437 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
12438 CountZeroes.getValueType(), CountZeroesArgument);
12441 unsigned BitWidth = CountZeroes.getValueSizeInBits();
12442 SDValue BitWidthMinusOne =
12443 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
12445 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
12446 CountZeroes, BitWidthMinusOne);
12447 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
12450 static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
12451 const RISCVSubtarget &Subtarget) {
12452 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
12455 if (Subtarget.hasShortForwardBranchOpt())
12458 SDValue TrueVal = N->getOperand(1);
12459 SDValue FalseVal = N->getOperand(2);
12460 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
12462 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
12465 // If we're concatenating a series of vector loads like
12466 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
12467 // Then we can turn this into a strided load by widening the vector elements
12468 // vlse32 p, stride=n
12469 static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
12470 const RISCVSubtarget &Subtarget,
12471 const RISCVTargetLowering &TLI) {
12473 EVT VT = N->getValueType(0);
12475 // Only perform this combine on legal MVTs.
12476 if (!TLI.isTypeLegal(VT))
12479 // TODO: Potentially extend this to scalable vectors
12480 if (VT.isScalableVector())
12483 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
12484 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
12485 !SDValue(BaseLd, 0).hasOneUse())
12488 EVT BaseLdVT = BaseLd->getValueType(0);
12489 SDValue BasePtr = BaseLd->getBasePtr();
12491 // Go through the loads and check that they're strided
12492 SDValue CurPtr = BasePtr;
12494 Align Align = BaseLd->getAlign();
12496 for (SDValue Op : N->ops().drop_front()) {
12497 auto *Ld = dyn_cast<LoadSDNode>(Op);
12498 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
12499 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
12500 Ld->getValueType(0) != BaseLdVT)
12503 SDValue Ptr = Ld->getBasePtr();
12504 // Check that each load's pointer is (add CurPtr, Stride)
12505 if (Ptr.getOpcode() != ISD::ADD || Ptr.getOperand(0) != CurPtr)
12507 SDValue Offset = Ptr.getOperand(1);
12510 else if (Offset != Stride)
12513 // The common alignment is the most restrictive (smallest) of all the loads
12514 Align = std::min(Align, Ld->getAlign());
12519 // A special case is if the stride is exactly the width of one of the loads,
12520 // in which case it's contiguous and can be combined into a regular vle
12521 // without changing the element size
12522 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
12524 ConstStride->getZExtValue() == BaseLdVT.getFixedSizeInBits() / 8) {
12525 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
12526 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(),
12527 VT.getStoreSize(), Align);
12528 // Can't do the combine if the load isn't naturally aligned with the element
12530 if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(),
12531 DAG.getDataLayout(), VT, *MMO))
12534 SDValue WideLoad = DAG.getLoad(VT, DL, BaseLd->getChain(), BasePtr, MMO);
12535 for (SDValue Ld : N->ops())
12536 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), WideLoad);
12540 // Get the widened scalar type, e.g. v4i8 -> i64
12541 unsigned WideScalarBitWidth =
12542 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
12543 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
12545 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
12546 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
12547 if (!TLI.isTypeLegal(WideVecVT))
12550 // Check that the operation is legal
12551 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
12554 MVT ContainerVT = TLI.getContainerForFixedLengthVector(WideVecVT);
12556 getDefaultVLOps(WideVecVT, ContainerVT, DL, DAG, Subtarget).second;
12557 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12559 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, Subtarget.getXLenVT());
12560 SDValue Ops[] = {BaseLd->getChain(),
12562 DAG.getUNDEF(ContainerVT),
12568 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride))
12569 // total size = (elsize * n) + (stride - elsize) * (n-1)
12570 // = elsize + stride * (n-1)
12571 MemSize = WideScalarVT.getSizeInBits() +
12572 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
12574 // If Stride isn't constant, then we can't know how much it will load
12575 MemSize = MemoryLocation::UnknownSize;
12577 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
12578 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
12581 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
12582 Ops, WideVecVT, MMO);
12583 for (SDValue Ld : N->ops())
12584 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
12586 // Note: Perform the bitcast before the convertFromScalableVector so we have
12587 // balanced pairs of convertFromScalable/convertToScalable
12588 SDValue Res = DAG.getBitcast(
12589 TLI.getContainerForFixedLengthVector(VT.getSimpleVT()), StridedLoad);
12590 return convertFromScalableVector(VT, Res, DAG, Subtarget);
12593 static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
12594 const RISCVSubtarget &Subtarget) {
12595 assert(N->getOpcode() == RISCVISD::ADD_VL);
12596 SDValue Addend = N->getOperand(0);
12597 SDValue MulOp = N->getOperand(1);
12598 SDValue AddMergeOp = N->getOperand(2);
12600 if (!AddMergeOp.isUndef())
12603 auto IsVWMulOpc = [](unsigned Opc) {
12605 case RISCVISD::VWMUL_VL:
12606 case RISCVISD::VWMULU_VL:
12607 case RISCVISD::VWMULSU_VL:
12614 if (!IsVWMulOpc(MulOp.getOpcode()))
12615 std::swap(Addend, MulOp);
12617 if (!IsVWMulOpc(MulOp.getOpcode()))
12620 SDValue MulMergeOp = MulOp.getOperand(2);
12622 if (!MulMergeOp.isUndef())
12625 SDValue AddMask = N->getOperand(3);
12626 SDValue AddVL = N->getOperand(4);
12627 SDValue MulMask = MulOp.getOperand(3);
12628 SDValue MulVL = MulOp.getOperand(4);
12630 if (AddMask != MulMask || AddVL != MulVL)
12633 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
12634 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
12635 "Unexpected opcode after VWMACC_VL");
12636 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
12637 "Unexpected opcode after VWMACC_VL!");
12638 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
12639 "Unexpected opcode after VWMUL_VL!");
12640 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
12641 "Unexpected opcode after VWMUL_VL!");
12644 EVT VT = N->getValueType(0);
12645 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
12647 return DAG.getNode(Opc, DL, VT, Ops);
12650 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
12651 DAGCombinerInfo &DCI) const {
12652 SelectionDAG &DAG = DCI.DAG;
12654 // Helper to call SimplifyDemandedBits on an operand of N where only some low
12655 // bits are demanded. N will be added to the Worklist if it was not deleted.
12656 // Caller should return SDValue(N, 0) if this returns true.
12657 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
12658 SDValue Op = N->getOperand(OpNo);
12659 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
12660 if (!SimplifyDemandedBits(Op, Mask, DCI))
12663 if (N->getOpcode() != ISD::DELETED_NODE)
12664 DCI.AddToWorklist(N);
12668 switch (N->getOpcode()) {
12671 case RISCVISD::SplitF64: {
12672 SDValue Op0 = N->getOperand(0);
12673 // If the input to SplitF64 is just BuildPairF64 then the operation is
12674 // redundant. Instead, use BuildPairF64's operands directly.
12675 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
12676 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
12678 if (Op0->isUndef()) {
12679 SDValue Lo = DAG.getUNDEF(MVT::i32);
12680 SDValue Hi = DAG.getUNDEF(MVT::i32);
12681 return DCI.CombineTo(N, Lo, Hi);
12686 // It's cheaper to materialise two 32-bit integers than to load a double
12687 // from the constant pool and transfer it to integer registers through the
12689 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
12690 APInt V = C->getValueAPF().bitcastToAPInt();
12691 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
12692 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
12693 return DCI.CombineTo(N, Lo, Hi);
12696 // This is a target-specific version of a DAGCombine performed in
12697 // DAGCombiner::visitBITCAST. It performs the equivalent of:
12698 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12699 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12700 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
12701 !Op0.getNode()->hasOneUse())
12703 SDValue NewSplitF64 =
12704 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
12705 Op0.getOperand(0));
12706 SDValue Lo = NewSplitF64.getValue(0);
12707 SDValue Hi = NewSplitF64.getValue(1);
12708 APInt SignBit = APInt::getSignMask(32);
12709 if (Op0.getOpcode() == ISD::FNEG) {
12710 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
12711 DAG.getConstant(SignBit, DL, MVT::i32));
12712 return DCI.CombineTo(N, Lo, NewHi);
12714 assert(Op0.getOpcode() == ISD::FABS);
12715 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
12716 DAG.getConstant(~SignBit, DL, MVT::i32));
12717 return DCI.CombineTo(N, Lo, NewHi);
12719 case RISCVISD::SLLW:
12720 case RISCVISD::SRAW:
12721 case RISCVISD::SRLW:
12722 case RISCVISD::RORW:
12723 case RISCVISD::ROLW: {
12724 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
12725 if (SimplifyDemandedLowBitsHelper(0, 32) ||
12726 SimplifyDemandedLowBitsHelper(1, 5))
12727 return SDValue(N, 0);
12731 case RISCVISD::CLZW:
12732 case RISCVISD::CTZW: {
12733 // Only the lower 32 bits of the first operand are read
12734 if (SimplifyDemandedLowBitsHelper(0, 32))
12735 return SDValue(N, 0);
12738 case RISCVISD::FMV_W_X_RV64: {
12739 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
12740 // conversion is unnecessary and can be replaced with the
12741 // FMV_X_ANYEXTW_RV64 operand.
12742 SDValue Op0 = N->getOperand(0);
12743 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
12744 return Op0.getOperand(0);
12747 case RISCVISD::FMV_X_ANYEXTH:
12748 case RISCVISD::FMV_X_ANYEXTW_RV64: {
12750 SDValue Op0 = N->getOperand(0);
12751 MVT VT = N->getSimpleValueType(0);
12752 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
12753 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
12754 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
12755 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
12756 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
12757 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
12758 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
12759 assert(Op0.getOperand(0).getValueType() == VT &&
12760 "Unexpected value type!");
12761 return Op0.getOperand(0);
12764 // This is a target-specific version of a DAGCombine performed in
12765 // DAGCombiner::visitBITCAST. It performs the equivalent of:
12766 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12767 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12768 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
12769 !Op0.getNode()->hasOneUse())
12771 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
12772 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
12773 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
12774 if (Op0.getOpcode() == ISD::FNEG)
12775 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
12776 DAG.getConstant(SignBit, DL, VT));
12778 assert(Op0.getOpcode() == ISD::FABS);
12779 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
12780 DAG.getConstant(~SignBit, DL, VT));
12783 return performADDCombine(N, DAG, Subtarget);
12785 return performSUBCombine(N, DAG, Subtarget);
12787 return performANDCombine(N, DCI, Subtarget);
12789 return performORCombine(N, DCI, Subtarget);
12791 return performXORCombine(N, DAG, Subtarget);
12799 return combineBinOpToReduce(N, DAG, Subtarget);
12801 return performSETCCCombine(N, DAG, Subtarget);
12802 case ISD::SIGN_EXTEND_INREG:
12803 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
12804 case ISD::ZERO_EXTEND:
12805 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
12806 // type legalization. This is safe because fp_to_uint produces poison if
12808 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
12809 SDValue Src = N->getOperand(0);
12810 if (Src.getOpcode() == ISD::FP_TO_UINT &&
12811 isTypeLegal(Src.getOperand(0).getValueType()))
12812 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
12813 Src.getOperand(0));
12814 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
12815 isTypeLegal(Src.getOperand(1).getValueType())) {
12816 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12817 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
12818 Src.getOperand(0), Src.getOperand(1));
12819 DCI.CombineTo(N, Res);
12820 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
12821 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
12822 return SDValue(N, 0); // Return N so it doesn't get rechecked.
12826 case ISD::TRUNCATE:
12827 return performTRUNCATECombine(N, DAG, Subtarget);
12829 return performSELECTCombine(N, DAG, Subtarget);
12830 case RISCVISD::CZERO_EQZ:
12831 case RISCVISD::CZERO_NEZ:
12832 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
12833 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
12834 if (N->getOperand(1).getOpcode() == ISD::XOR &&
12835 isOneConstant(N->getOperand(1).getOperand(1))) {
12836 SDValue Cond = N->getOperand(1).getOperand(0);
12837 APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1);
12838 if (DAG.MaskedValueIsZero(Cond, Mask)) {
12839 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
12840 ? RISCVISD::CZERO_NEZ
12841 : RISCVISD::CZERO_EQZ;
12842 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0),
12843 N->getOperand(0), Cond);
12848 case RISCVISD::SELECT_CC: {
12850 SDValue LHS = N->getOperand(0);
12851 SDValue RHS = N->getOperand(1);
12852 SDValue CC = N->getOperand(2);
12853 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
12854 SDValue TrueV = N->getOperand(3);
12855 SDValue FalseV = N->getOperand(4);
12857 EVT VT = N->getValueType(0);
12859 // If the True and False values are the same, we don't need a select_cc.
12860 if (TrueV == FalseV)
12863 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
12864 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
12865 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
12866 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
12867 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
12868 if (CCVal == ISD::CondCode::SETGE)
12869 std::swap(TrueV, FalseV);
12871 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
12872 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
12873 // Only handle simm12, if it is not in this range, it can be considered as
12875 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
12876 isInt<12>(TrueSImm - FalseSImm)) {
12878 DAG.getNode(ISD::SRA, DL, VT, LHS,
12879 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
12881 DAG.getNode(ISD::AND, DL, VT, SRA,
12882 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
12883 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
12886 if (CCVal == ISD::CondCode::SETGE)
12887 std::swap(TrueV, FalseV);
12890 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
12891 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
12892 {LHS, RHS, CC, TrueV, FalseV});
12894 if (!Subtarget.hasShortForwardBranchOpt()) {
12895 // (select c, -1, y) -> -c | y
12896 if (isAllOnesConstant(TrueV)) {
12897 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
12898 SDValue Neg = DAG.getNegative(C, DL, VT);
12899 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
12901 // (select c, y, -1) -> -!c | y
12902 if (isAllOnesConstant(FalseV)) {
12904 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
12905 SDValue Neg = DAG.getNegative(C, DL, VT);
12906 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
12909 // (select c, 0, y) -> -!c & y
12910 if (isNullConstant(TrueV)) {
12912 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
12913 SDValue Neg = DAG.getNegative(C, DL, VT);
12914 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
12916 // (select c, y, 0) -> -c & y
12917 if (isNullConstant(FalseV)) {
12918 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
12919 SDValue Neg = DAG.getNegative(C, DL, VT);
12920 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
12922 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
12923 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
12924 if (((isOneConstant(FalseV) && LHS == TrueV &&
12925 CCVal == ISD::CondCode::SETNE) ||
12926 (isOneConstant(TrueV) && LHS == FalseV &&
12927 CCVal == ISD::CondCode::SETEQ)) &&
12928 isNullConstant(RHS)) {
12929 // freeze it to be safe.
12930 LHS = DAG.getFreeze(LHS);
12931 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
12932 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
12938 case RISCVISD::BR_CC: {
12939 SDValue LHS = N->getOperand(1);
12940 SDValue RHS = N->getOperand(2);
12941 SDValue CC = N->getOperand(3);
12944 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
12945 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
12946 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
12950 case ISD::BITREVERSE:
12951 return performBITREVERSECombine(N, DAG, Subtarget);
12952 case ISD::FP_TO_SINT:
12953 case ISD::FP_TO_UINT:
12954 return performFP_TO_INTCombine(N, DCI, Subtarget);
12955 case ISD::FP_TO_SINT_SAT:
12956 case ISD::FP_TO_UINT_SAT:
12957 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
12958 case ISD::FCOPYSIGN: {
12959 EVT VT = N->getValueType(0);
12960 if (!VT.isVector())
12962 // There is a form of VFSGNJ which injects the negated sign of its second
12963 // operand. Try and bubble any FNEG up after the extend/round to produce
12964 // this optimized pattern. Avoid modifying cases where FP_ROUND and
12966 SDValue In2 = N->getOperand(1);
12967 // Avoid cases where the extend/round has multiple uses, as duplicating
12968 // those is typically more expensive than removing a fneg.
12969 if (!In2.hasOneUse())
12971 if (In2.getOpcode() != ISD::FP_EXTEND &&
12972 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
12974 In2 = In2.getOperand(0);
12975 if (In2.getOpcode() != ISD::FNEG)
12978 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
12979 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
12980 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
12983 case ISD::MSCATTER:
12984 case ISD::VP_GATHER:
12985 case ISD::VP_SCATTER: {
12986 if (!DCI.isBeforeLegalize())
12988 SDValue Index, ScaleOp;
12989 bool IsIndexSigned = false;
12990 if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
12991 Index = VPGSN->getIndex();
12992 ScaleOp = VPGSN->getScale();
12993 IsIndexSigned = VPGSN->isIndexSigned();
12994 assert(!VPGSN->isIndexScaled() &&
12995 "Scaled gather/scatter should not be formed");
12997 const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
12998 Index = MGSN->getIndex();
12999 ScaleOp = MGSN->getScale();
13000 IsIndexSigned = MGSN->isIndexSigned();
13001 assert(!MGSN->isIndexScaled() &&
13002 "Scaled gather/scatter should not be formed");
13005 EVT IndexVT = Index.getValueType();
13006 MVT XLenVT = Subtarget.getXLenVT();
13007 // RISC-V indexed loads only support the "unsigned unscaled" addressing
13008 // mode, so anything else must be manually legalized.
13009 bool NeedsIdxLegalization =
13010 (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
13011 if (!NeedsIdxLegalization)
13016 // Any index legalization should first promote to XLenVT, so we don't lose
13017 // bits when scaling. This may create an illegal index type so we let
13018 // LLVM's legalization take care of the splitting.
13019 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
13020 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
13021 IndexVT = IndexVT.changeVectorElementType(XLenVT);
13022 Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
13023 DL, IndexVT, Index);
13026 ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED;
13027 if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
13028 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
13029 {VPGN->getChain(), VPGN->getBasePtr(), Index,
13030 ScaleOp, VPGN->getMask(),
13031 VPGN->getVectorLength()},
13032 VPGN->getMemOperand(), NewIndexTy);
13033 if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
13034 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
13035 {VPSN->getChain(), VPSN->getValue(),
13036 VPSN->getBasePtr(), Index, ScaleOp,
13037 VPSN->getMask(), VPSN->getVectorLength()},
13038 VPSN->getMemOperand(), NewIndexTy);
13039 if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
13040 return DAG.getMaskedGather(
13041 N->getVTList(), MGN->getMemoryVT(), DL,
13042 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
13043 MGN->getBasePtr(), Index, ScaleOp},
13044 MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
13045 const auto *MSN = cast<MaskedScatterSDNode>(N);
13046 return DAG.getMaskedScatter(
13047 N->getVTList(), MSN->getMemoryVT(), DL,
13048 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
13050 MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
13052 case RISCVISD::SRA_VL:
13053 case RISCVISD::SRL_VL:
13054 case RISCVISD::SHL_VL: {
13055 SDValue ShAmt = N->getOperand(1);
13056 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
13057 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
13059 SDValue VL = N->getOperand(3);
13060 EVT VT = N->getValueType(0);
13061 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
13062 ShAmt.getOperand(1), VL);
13063 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
13064 N->getOperand(2), N->getOperand(3), N->getOperand(4));
13069 if (SDValue V = performSRACombine(N, DAG, Subtarget))
13074 SDValue ShAmt = N->getOperand(1);
13075 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
13076 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
13078 EVT VT = N->getValueType(0);
13079 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
13080 ShAmt.getOperand(1),
13081 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
13082 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
13086 case RISCVISD::ADD_VL:
13087 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI))
13089 return combineToVWMACC(N, DAG, Subtarget);
13090 case RISCVISD::SUB_VL:
13091 case RISCVISD::VWADD_W_VL:
13092 case RISCVISD::VWADDU_W_VL:
13093 case RISCVISD::VWSUB_W_VL:
13094 case RISCVISD::VWSUBU_W_VL:
13095 case RISCVISD::MUL_VL:
13096 return combineBinOp_VLToVWBinOp_VL(N, DCI);
13097 case RISCVISD::VFMADD_VL:
13098 case RISCVISD::VFNMADD_VL:
13099 case RISCVISD::VFMSUB_VL:
13100 case RISCVISD::VFNMSUB_VL:
13101 case RISCVISD::STRICT_VFMADD_VL:
13102 case RISCVISD::STRICT_VFNMADD_VL:
13103 case RISCVISD::STRICT_VFMSUB_VL:
13104 case RISCVISD::STRICT_VFNMSUB_VL:
13105 return performVFMADD_VLCombine(N, DAG);
13106 case RISCVISD::FMUL_VL:
13107 return performVFMUL_VLCombine(N, DAG);
13108 case RISCVISD::FADD_VL:
13109 case RISCVISD::FSUB_VL:
13110 return performFADDSUB_VLCombine(N, DAG);
13113 if (DCI.isAfterLegalizeDAG())
13114 if (SDValue V = performMemPairCombine(N, DCI))
13117 if (N->getOpcode() != ISD::STORE)
13120 auto *Store = cast<StoreSDNode>(N);
13121 SDValue Chain = Store->getChain();
13122 EVT MemVT = Store->getMemoryVT();
13123 SDValue Val = Store->getValue();
13126 bool IsScalarizable =
13127 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
13128 Store->isSimple() &&
13129 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
13130 isPowerOf2_64(MemVT.getSizeInBits()) &&
13131 MemVT.getSizeInBits() <= Subtarget.getXLen();
13133 // If sufficiently aligned we can scalarize stores of constant vectors of
13134 // any power-of-two size up to XLen bits, provided that they aren't too
13135 // expensive to materialize.
13136 // vsetivli zero, 2, e8, m1, ta, ma
13138 // vse64.v v8, (a0)
13142 if (DCI.isBeforeLegalize() && IsScalarizable &&
13143 ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
13144 // Get the constant vector bits
13145 APInt NewC(Val.getValueSizeInBits(), 0);
13146 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
13147 if (Val.getOperand(i).isUndef())
13149 NewC.insertBits(Val.getConstantOperandAPInt(i),
13150 i * Val.getScalarValueSizeInBits());
13152 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
13154 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(),
13155 Subtarget.getFeatureBits(), true) <= 2 &&
13156 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
13157 NewVT, *Store->getMemOperand())) {
13158 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
13159 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
13160 Store->getPointerInfo(), Store->getOriginalAlign(),
13161 Store->getMemOperand()->getFlags());
13165 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
13166 // vsetivli zero, 2, e16, m1, ta, ma
13167 // vle16.v v8, (a0)
13168 // vse16.v v8, (a1)
13169 if (auto *L = dyn_cast<LoadSDNode>(Val);
13170 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
13171 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
13172 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
13173 L->getMemoryVT() == MemVT) {
13174 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
13175 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
13176 NewVT, *Store->getMemOperand()) &&
13177 allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
13178 NewVT, *L->getMemOperand())) {
13179 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
13180 L->getPointerInfo(), L->getOriginalAlign(),
13181 L->getMemOperand()->getFlags());
13182 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
13183 Store->getPointerInfo(), Store->getOriginalAlign(),
13184 Store->getMemOperand()->getFlags());
13188 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
13189 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
13190 // any illegal types.
13191 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
13192 (DCI.isAfterLegalizeDAG() &&
13193 Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13194 isNullConstant(Val.getOperand(1)))) {
13195 SDValue Src = Val.getOperand(0);
13196 MVT VecVT = Src.getSimpleValueType();
13197 // VecVT should be scalable and memory VT should match the element type.
13198 if (VecVT.isScalableVector() &&
13199 MemVT == VecVT.getVectorElementType()) {
13201 MVT MaskVT = getMaskTypeFor(VecVT);
13202 return DAG.getStoreVP(
13203 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
13204 DAG.getConstant(1, DL, MaskVT),
13205 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
13206 Store->getMemOperand(), Store->getAddressingMode(),
13207 Store->isTruncatingStore(), /*IsCompress*/ false);
13213 case ISD::SPLAT_VECTOR: {
13214 EVT VT = N->getValueType(0);
13215 // Only perform this combine on legal MVT types.
13216 if (!isTypeLegal(VT))
13218 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
13223 case ISD::CONCAT_VECTORS:
13224 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
13227 case RISCVISD::VMV_V_X_VL: {
13228 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
13230 unsigned ScalarSize = N->getOperand(1).getValueSizeInBits();
13231 unsigned EltWidth = N->getValueType(0).getScalarSizeInBits();
13232 if (ScalarSize > EltWidth && N->getOperand(0).isUndef())
13233 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
13234 return SDValue(N, 0);
13238 case RISCVISD::VFMV_S_F_VL: {
13239 SDValue Src = N->getOperand(1);
13240 // Try to remove vector->scalar->vector if the scalar->vector is inserting
13241 // into an undef vector.
13242 // TODO: Could use a vslide or vmv.v.v for non-undef.
13243 if (N->getOperand(0).isUndef() &&
13244 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13245 isNullConstant(Src.getOperand(1)) &&
13246 Src.getOperand(0).getValueType().isScalableVector()) {
13247 EVT VT = N->getValueType(0);
13248 EVT SrcVT = Src.getOperand(0).getValueType();
13249 assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
13250 // Widths match, just return the original vector.
13252 return Src.getOperand(0);
13253 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
13257 case ISD::INTRINSIC_VOID:
13258 case ISD::INTRINSIC_W_CHAIN:
13259 case ISD::INTRINSIC_WO_CHAIN: {
13260 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
13261 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
13263 // By default we do not combine any intrinsic.
13266 case Intrinsic::riscv_vcpop:
13267 case Intrinsic::riscv_vcpop_mask:
13268 case Intrinsic::riscv_vfirst:
13269 case Intrinsic::riscv_vfirst_mask: {
13270 SDValue VL = N->getOperand(2);
13271 if (IntNo == Intrinsic::riscv_vcpop_mask ||
13272 IntNo == Intrinsic::riscv_vfirst_mask)
13273 VL = N->getOperand(3);
13274 if (!isNullConstant(VL))
13276 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
13278 EVT VT = N->getValueType(0);
13279 if (IntNo == Intrinsic::riscv_vfirst ||
13280 IntNo == Intrinsic::riscv_vfirst_mask)
13281 return DAG.getConstant(-1, DL, VT);
13282 return DAG.getConstant(0, DL, VT);
13284 case Intrinsic::riscv_vloxei:
13285 case Intrinsic::riscv_vloxei_mask:
13286 case Intrinsic::riscv_vluxei:
13287 case Intrinsic::riscv_vluxei_mask:
13288 case Intrinsic::riscv_vsoxei:
13289 case Intrinsic::riscv_vsoxei_mask:
13290 case Intrinsic::riscv_vsuxei:
13291 case Intrinsic::riscv_vsuxei_mask:
13292 if (SDValue V = narrowIndex(N->getOperand(4), DAG)) {
13293 SmallVector<SDValue, 8> Ops(N->ops());
13295 const auto *MemSD = cast<MemIntrinsicSDNode>(N);
13296 return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(),
13297 Ops, MemSD->getMemoryVT(),
13298 MemSD->getMemOperand());
13303 case ISD::BITCAST: {
13304 assert(Subtarget.useRVVForFixedLengthVectors());
13305 SDValue N0 = N->getOperand(0);
13306 EVT VT = N->getValueType(0);
13307 EVT SrcVT = N0.getValueType();
13308 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
13309 // type, widen both sides to avoid a trip through memory.
13310 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
13311 VT.isScalarInteger()) {
13312 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
13313 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
13316 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
13317 N0 = DAG.getBitcast(MVT::i8, N0);
13318 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
13328 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
13329 EVT XVT, unsigned KeptBits) const {
13330 // For vectors, we don't have a preference..
13331 if (XVT.isVector())
13334 if (XVT != MVT::i32 && XVT != MVT::i64)
13337 // We can use sext.w for RV64 or an srai 31 on RV32.
13338 if (KeptBits == 32 || KeptBits == 64)
13341 // With Zbb we can use sext.h/sext.b.
13342 return Subtarget.hasStdExtZbb() &&
13343 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
13347 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
13348 const SDNode *N, CombineLevel Level) const {
13349 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13350 N->getOpcode() == ISD::SRL) &&
13351 "Expected shift op");
13353 // The following folds are only desirable if `(OP _, c1 << c2)` can be
13354 // materialised in fewer instructions than `(OP _, c1)`:
13356 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13357 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
13358 SDValue N0 = N->getOperand(0);
13359 EVT Ty = N0.getValueType();
13360 if (Ty.isScalarInteger() &&
13361 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
13362 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13363 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13365 const APInt &C1Int = C1->getAPIntValue();
13366 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
13368 // We can materialise `c1 << c2` into an add immediate, so it's "free",
13369 // and the combine should happen, to potentially allow further combines
13371 if (ShiftedC1Int.getSignificantBits() <= 64 &&
13372 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
13375 // We can materialise `c1` in an add immediate, so it's "free", and the
13376 // combine should be prevented.
13377 if (C1Int.getSignificantBits() <= 64 &&
13378 isLegalAddImmediate(C1Int.getSExtValue()))
13381 // Neither constant will fit into an immediate, so find materialisation
13383 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
13384 Subtarget.getFeatureBits(),
13385 /*CompressionCost*/true);
13386 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
13387 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
13388 /*CompressionCost*/true);
13390 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
13391 // combine should be prevented.
13392 if (C1Cost < ShiftedC1Cost)
13399 bool RISCVTargetLowering::targetShrinkDemandedConstant(
13400 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
13401 TargetLoweringOpt &TLO) const {
13402 // Delay this optimization as late as possible.
13406 EVT VT = Op.getValueType();
13410 unsigned Opcode = Op.getOpcode();
13411 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
13414 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
13418 const APInt &Mask = C->getAPIntValue();
13420 // Clear all non-demanded bits initially.
13421 APInt ShrunkMask = Mask & DemandedBits;
13423 // Try to make a smaller immediate by setting undemanded bits.
13425 APInt ExpandedMask = Mask | ~DemandedBits;
13427 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
13428 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
13430 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
13431 if (NewMask == Mask)
13434 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
13435 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
13436 Op.getOperand(0), NewC);
13437 return TLO.CombineTo(Op, NewOp);
13440 // If the shrunk mask fits in sign extended 12 bits, let the target
13441 // independent code apply it.
13442 if (ShrunkMask.isSignedIntN(12))
13445 // And has a few special cases for zext.
13446 if (Opcode == ISD::AND) {
13447 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
13448 // otherwise use SLLI + SRLI.
13449 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
13450 if (IsLegalMask(NewMask))
13451 return UseMask(NewMask);
13453 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
13454 if (VT == MVT::i64) {
13455 APInt NewMask = APInt(64, 0xffffffff);
13456 if (IsLegalMask(NewMask))
13457 return UseMask(NewMask);
13461 // For the remaining optimizations, we need to be able to make a negative
13462 // number through a combination of mask and undemanded bits.
13463 if (!ExpandedMask.isNegative())
13466 // What is the fewest number of bits we need to represent the negative number.
13467 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
13469 // Try to make a 12 bit negative immediate. If that fails try to make a 32
13470 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
13471 // If we can't create a simm12, we shouldn't change opaque constants.
13472 APInt NewMask = ShrunkMask;
13473 if (MinSignedBits <= 12)
13474 NewMask.setBitsFrom(11);
13475 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
13476 NewMask.setBitsFrom(31);
13480 // Check that our new mask is a subset of the demanded mask.
13481 assert(IsLegalMask(NewMask));
13482 return UseMask(NewMask);
13485 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
13486 static const uint64_t GREVMasks[] = {
13487 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
13488 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
13490 for (unsigned Stage = 0; Stage != 6; ++Stage) {
13491 unsigned Shift = 1 << Stage;
13492 if (ShAmt & Shift) {
13493 uint64_t Mask = GREVMasks[Stage];
13494 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
13504 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
13506 const APInt &DemandedElts,
13507 const SelectionDAG &DAG,
13508 unsigned Depth) const {
13509 unsigned BitWidth = Known.getBitWidth();
13510 unsigned Opc = Op.getOpcode();
13511 assert((Opc >= ISD::BUILTIN_OP_END ||
13512 Opc == ISD::INTRINSIC_WO_CHAIN ||
13513 Opc == ISD::INTRINSIC_W_CHAIN ||
13514 Opc == ISD::INTRINSIC_VOID) &&
13515 "Should use MaskedValueIsZero if you don't know whether Op"
13516 " is a target node!");
13521 case RISCVISD::SELECT_CC: {
13522 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
13523 // If we don't know any bits, early out.
13524 if (Known.isUnknown())
13526 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
13528 // Only known if known in both the LHS and RHS.
13529 Known = Known.intersectWith(Known2);
13532 case RISCVISD::CZERO_EQZ:
13533 case RISCVISD::CZERO_NEZ:
13534 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13535 // Result is either all zero or operand 0. We can propagate zeros, but not
13537 Known.One.clearAllBits();
13539 case RISCVISD::REMUW: {
13541 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
13542 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
13543 // We only care about the lower 32 bits.
13544 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
13545 // Restore the original width by sign extending.
13546 Known = Known.sext(BitWidth);
13549 case RISCVISD::DIVUW: {
13551 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
13552 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
13553 // We only care about the lower 32 bits.
13554 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
13555 // Restore the original width by sign extending.
13556 Known = Known.sext(BitWidth);
13559 case RISCVISD::CTZW: {
13560 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13561 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
13562 unsigned LowBits = llvm::bit_width(PossibleTZ);
13563 Known.Zero.setBitsFrom(LowBits);
13566 case RISCVISD::CLZW: {
13567 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13568 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
13569 unsigned LowBits = llvm::bit_width(PossibleLZ);
13570 Known.Zero.setBitsFrom(LowBits);
13573 case RISCVISD::BREV8:
13574 case RISCVISD::ORC_B: {
13575 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
13576 // control value of 7 is equivalent to brev8 and orc.b.
13577 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13578 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
13579 // To compute zeros, we need to invert the value and invert it back after.
13581 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
13582 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
13585 case RISCVISD::READ_VLENB: {
13586 // We can use the minimum and maximum VLEN values to bound VLENB. We
13587 // know VLEN must be a power of two.
13588 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
13589 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
13590 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
13591 Known.Zero.setLowBits(Log2_32(MinVLenB));
13592 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
13593 if (MaxVLenB == MinVLenB)
13594 Known.One.setBit(Log2_32(MinVLenB));
13597 case RISCVISD::FPCLASS: {
13598 // fclass will only set one of the low 10 bits.
13599 Known.Zero.setBitsFrom(10);
13602 case ISD::INTRINSIC_W_CHAIN:
13603 case ISD::INTRINSIC_WO_CHAIN: {
13605 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
13608 // We can't do anything for most intrinsics.
13610 case Intrinsic::riscv_vsetvli:
13611 case Intrinsic::riscv_vsetvlimax:
13612 // Assume that VL output is >= 65536.
13613 // TODO: Take SEW and LMUL into account.
13615 Known.Zero.setBitsFrom(17);
13623 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
13624 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
13625 unsigned Depth) const {
13626 switch (Op.getOpcode()) {
13629 case RISCVISD::SELECT_CC: {
13631 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
13632 if (Tmp == 1) return 1; // Early out.
13634 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
13635 return std::min(Tmp, Tmp2);
13637 case RISCVISD::CZERO_EQZ:
13638 case RISCVISD::CZERO_NEZ:
13639 // Output is either all zero or operand 0. We can propagate sign bit count
13641 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
13642 case RISCVISD::ABSW: {
13643 // We expand this at isel to negw+max. The result will have 33 sign bits
13644 // if the input has at least 33 sign bits.
13646 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
13647 if (Tmp < 33) return 1;
13650 case RISCVISD::SLLW:
13651 case RISCVISD::SRAW:
13652 case RISCVISD::SRLW:
13653 case RISCVISD::DIVW:
13654 case RISCVISD::DIVUW:
13655 case RISCVISD::REMUW:
13656 case RISCVISD::ROLW:
13657 case RISCVISD::RORW:
13658 case RISCVISD::FCVT_W_RV64:
13659 case RISCVISD::FCVT_WU_RV64:
13660 case RISCVISD::STRICT_FCVT_W_RV64:
13661 case RISCVISD::STRICT_FCVT_WU_RV64:
13662 // TODO: As the result is sign-extended, this is conservatively correct. A
13663 // more precise answer could be calculated for SRAW depending on known
13664 // bits in the shift amount.
13666 case RISCVISD::VMV_X_S: {
13667 // The number of sign bits of the scalar result is computed by obtaining the
13668 // element type of the input vector operand, subtracting its width from the
13669 // XLEN, and then adding one (sign bit within the element type). If the
13670 // element type is wider than XLen, the least-significant XLEN bits are
13672 unsigned XLen = Subtarget.getXLen();
13673 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
13674 if (EltBits <= XLen)
13675 return XLen - EltBits + 1;
13678 case ISD::INTRINSIC_W_CHAIN: {
13679 unsigned IntNo = Op.getConstantOperandVal(1);
13683 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
13684 case Intrinsic::riscv_masked_atomicrmw_add_i64:
13685 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
13686 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
13687 case Intrinsic::riscv_masked_atomicrmw_max_i64:
13688 case Intrinsic::riscv_masked_atomicrmw_min_i64:
13689 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
13690 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
13691 case Intrinsic::riscv_masked_cmpxchg_i64:
13692 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
13693 // narrow atomic operation. These are implemented using atomic
13694 // operations at the minimum supported atomicrmw/cmpxchg width whose
13695 // result is then sign extended to XLEN. With +A, the minimum width is
13696 // 32 for both 64 and 32.
13697 assert(Subtarget.getXLen() == 64);
13698 assert(getMinCmpXchgSizeInBits() == 32);
13699 assert(Subtarget.hasStdExtA());
13709 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {
13710 assert(Ld && "Unexpected null LoadSDNode");
13711 if (!ISD::isNormalLoad(Ld))
13714 SDValue Ptr = Ld->getBasePtr();
13716 // Only constant pools with no offset are supported.
13717 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
13718 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
13719 if (!CNode || CNode->isMachineConstantPoolEntry() ||
13720 CNode->getOffset() != 0)
13726 // Simple case, LLA.
13727 if (Ptr.getOpcode() == RISCVISD::LLA) {
13728 auto *CNode = GetSupportedConstantPool(Ptr);
13729 if (!CNode || CNode->getTargetFlags() != 0)
13732 return CNode->getConstVal();
13735 // Look for a HI and ADD_LO pair.
13736 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
13737 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
13740 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
13741 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
13743 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
13744 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
13747 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
13750 return CNodeLo->getConstVal();
13753 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
13754 MachineBasicBlock *BB) {
13755 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
13757 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
13758 // Should the count have wrapped while it was being read, we need to try
13762 // rdcycleh x3 # load high word of cycle
13763 // rdcycle x2 # load low word of cycle
13764 // rdcycleh x4 # load high word of cycle
13765 // bne x3, x4, read # check if high word reads match, otherwise try again
13768 MachineFunction &MF = *BB->getParent();
13769 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13770 MachineFunction::iterator It = ++BB->getIterator();
13772 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
13773 MF.insert(It, LoopMBB);
13775 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
13776 MF.insert(It, DoneMBB);
13778 // Transfer the remainder of BB and its successor edges to DoneMBB.
13779 DoneMBB->splice(DoneMBB->begin(), BB,
13780 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13781 DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
13783 BB->addSuccessor(LoopMBB);
13785 MachineRegisterInfo &RegInfo = MF.getRegInfo();
13786 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
13787 Register LoReg = MI.getOperand(0).getReg();
13788 Register HiReg = MI.getOperand(1).getReg();
13789 DebugLoc DL = MI.getDebugLoc();
13791 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
13792 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
13793 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
13794 .addReg(RISCV::X0);
13795 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
13796 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
13797 .addReg(RISCV::X0);
13798 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
13799 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
13800 .addReg(RISCV::X0);
13802 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
13804 .addReg(ReadAgainReg)
13807 LoopMBB->addSuccessor(LoopMBB);
13808 LoopMBB->addSuccessor(DoneMBB);
13810 MI.eraseFromParent();
13815 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
13816 MachineBasicBlock *BB,
13817 const RISCVSubtarget &Subtarget) {
13818 assert((MI.getOpcode() == RISCV::SplitF64Pseudo ||
13819 MI.getOpcode() == RISCV::SplitF64Pseudo_INX) &&
13820 "Unexpected instruction");
13822 MachineFunction &MF = *BB->getParent();
13823 DebugLoc DL = MI.getDebugLoc();
13824 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
13825 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
13826 Register LoReg = MI.getOperand(0).getReg();
13827 Register HiReg = MI.getOperand(1).getReg();
13828 Register SrcReg = MI.getOperand(2).getReg();
13830 const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX
13831 ? &RISCV::GPRPF64RegClass
13832 : &RISCV::FPR64RegClass;
13833 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
13835 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
13837 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
13838 MachineMemOperand *MMOLo =
13839 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
13840 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
13841 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
13842 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
13845 .addMemOperand(MMOLo);
13846 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
13849 .addMemOperand(MMOHi);
13850 MI.eraseFromParent(); // The pseudo instruction is gone now.
13854 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
13855 MachineBasicBlock *BB,
13856 const RISCVSubtarget &Subtarget) {
13857 assert((MI.getOpcode() == RISCV::BuildPairF64Pseudo ||
13858 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX) &&
13859 "Unexpected instruction");
13861 MachineFunction &MF = *BB->getParent();
13862 DebugLoc DL = MI.getDebugLoc();
13863 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
13864 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
13865 Register DstReg = MI.getOperand(0).getReg();
13866 Register LoReg = MI.getOperand(1).getReg();
13867 Register HiReg = MI.getOperand(2).getReg();
13869 const TargetRegisterClass *DstRC =
13870 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass
13871 : &RISCV::FPR64RegClass;
13872 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
13874 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
13875 MachineMemOperand *MMOLo =
13876 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
13877 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
13878 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
13879 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
13880 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
13883 .addMemOperand(MMOLo);
13884 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
13885 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
13888 .addMemOperand(MMOHi);
13889 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
13890 MI.eraseFromParent(); // The pseudo instruction is gone now.
13894 static bool isSelectPseudo(MachineInstr &MI) {
13895 switch (MI.getOpcode()) {
13898 case RISCV::Select_GPR_Using_CC_GPR:
13899 case RISCV::Select_FPR16_Using_CC_GPR:
13900 case RISCV::Select_FPR16INX_Using_CC_GPR:
13901 case RISCV::Select_FPR32_Using_CC_GPR:
13902 case RISCV::Select_FPR32INX_Using_CC_GPR:
13903 case RISCV::Select_FPR64_Using_CC_GPR:
13904 case RISCV::Select_FPR64INX_Using_CC_GPR:
13905 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
13910 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
13911 unsigned RelOpcode, unsigned EqOpcode,
13912 const RISCVSubtarget &Subtarget) {
13913 DebugLoc DL = MI.getDebugLoc();
13914 Register DstReg = MI.getOperand(0).getReg();
13915 Register Src1Reg = MI.getOperand(1).getReg();
13916 Register Src2Reg = MI.getOperand(2).getReg();
13917 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
13918 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
13919 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
13921 // Save the current FFLAGS.
13922 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
13924 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
13927 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
13928 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
13930 // Restore the FFLAGS.
13931 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
13932 .addReg(SavedFFlags, RegState::Kill);
13934 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
13935 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
13936 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
13937 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
13938 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
13939 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
13941 // Erase the pseudoinstruction.
13942 MI.eraseFromParent();
13946 static MachineBasicBlock *
13947 EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
13948 MachineBasicBlock *ThisMBB,
13949 const RISCVSubtarget &Subtarget) {
13950 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
13951 // Without this, custom-inserter would have generated:
13963 // A: X = ...; Y = ...
13965 // C: Z = PHI [X, A], [Y, B]
13967 // E: PHI [X, C], [Z, D]
13969 // If we lower both Select_FPRX_ in a single step, we can instead generate:
13981 // A: X = ...; Y = ...
13983 // E: PHI [X, A], [X, C], [Y, D]
13985 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
13986 const DebugLoc &DL = First.getDebugLoc();
13987 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
13988 MachineFunction *F = ThisMBB->getParent();
13989 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
13990 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
13991 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13992 MachineFunction::iterator It = ++ThisMBB->getIterator();
13993 F->insert(It, FirstMBB);
13994 F->insert(It, SecondMBB);
13995 F->insert(It, SinkMBB);
13997 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
13998 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
13999 std::next(MachineBasicBlock::iterator(First)),
14001 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
14003 // Fallthrough block for ThisMBB.
14004 ThisMBB->addSuccessor(FirstMBB);
14005 // Fallthrough block for FirstMBB.
14006 FirstMBB->addSuccessor(SecondMBB);
14007 ThisMBB->addSuccessor(SinkMBB);
14008 FirstMBB->addSuccessor(SinkMBB);
14009 // This is fallthrough.
14010 SecondMBB->addSuccessor(SinkMBB);
14012 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
14013 Register FLHS = First.getOperand(1).getReg();
14014 Register FRHS = First.getOperand(2).getReg();
14015 // Insert appropriate branch.
14016 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
14021 Register SLHS = Second.getOperand(1).getReg();
14022 Register SRHS = Second.getOperand(2).getReg();
14023 Register Op1Reg4 = First.getOperand(4).getReg();
14024 Register Op1Reg5 = First.getOperand(5).getReg();
14026 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
14027 // Insert appropriate branch.
14028 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
14033 Register DestReg = Second.getOperand(0).getReg();
14034 Register Op2Reg4 = Second.getOperand(4).getReg();
14035 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
14041 .addMBB(SecondMBB);
14043 // Now remove the Select_FPRX_s.
14044 First.eraseFromParent();
14045 Second.eraseFromParent();
14049 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
14050 MachineBasicBlock *BB,
14051 const RISCVSubtarget &Subtarget) {
14052 // To "insert" Select_* instructions, we actually have to insert the triangle
14053 // control-flow pattern. The incoming instructions know the destination vreg
14054 // to set, the condition code register to branch on, the true/false values to
14055 // select between, and the condcode to use to select the appropriate branch.
14057 // We produce the following control flow:
14064 // When we find a sequence of selects we attempt to optimize their emission
14065 // by sharing the control flow. Currently we only handle cases where we have
14066 // multiple selects with the exact same condition (same LHS, RHS and CC).
14067 // The selects may be interleaved with other instructions if the other
14068 // instructions meet some requirements we deem safe:
14069 // - They are not pseudo instructions.
14070 // - They are debug instructions. Otherwise,
14071 // - They do not have side-effects, do not access memory and their inputs do
14072 // not depend on the results of the select pseudo-instructions.
14073 // The TrueV/FalseV operands of the selects cannot depend on the result of
14074 // previous selects in the sequence.
14075 // These conditions could be further relaxed. See the X86 target for a
14076 // related approach and more information.
14078 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
14079 // is checked here and handled by a separate function -
14080 // EmitLoweredCascadedSelect.
14081 Register LHS = MI.getOperand(1).getReg();
14082 Register RHS = MI.getOperand(2).getReg();
14083 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
14085 SmallVector<MachineInstr *, 4> SelectDebugValues;
14086 SmallSet<Register, 4> SelectDests;
14087 SelectDests.insert(MI.getOperand(0).getReg());
14089 MachineInstr *LastSelectPseudo = &MI;
14090 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
14091 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
14092 Next->getOpcode() == MI.getOpcode() &&
14093 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
14094 Next->getOperand(5).isKill()) {
14095 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
14098 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
14099 SequenceMBBI != E; ++SequenceMBBI) {
14100 if (SequenceMBBI->isDebugInstr())
14102 if (isSelectPseudo(*SequenceMBBI)) {
14103 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
14104 SequenceMBBI->getOperand(2).getReg() != RHS ||
14105 SequenceMBBI->getOperand(3).getImm() != CC ||
14106 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
14107 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
14109 LastSelectPseudo = &*SequenceMBBI;
14110 SequenceMBBI->collectDebugValues(SelectDebugValues);
14111 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
14114 if (SequenceMBBI->hasUnmodeledSideEffects() ||
14115 SequenceMBBI->mayLoadOrStore() ||
14116 SequenceMBBI->usesCustomInsertionHook())
14118 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
14119 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
14124 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
14125 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14126 DebugLoc DL = MI.getDebugLoc();
14127 MachineFunction::iterator I = ++BB->getIterator();
14129 MachineBasicBlock *HeadMBB = BB;
14130 MachineFunction *F = BB->getParent();
14131 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
14132 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
14134 F->insert(I, IfFalseMBB);
14135 F->insert(I, TailMBB);
14137 // Transfer debug instructions associated with the selects to TailMBB.
14138 for (MachineInstr *DebugInstr : SelectDebugValues) {
14139 TailMBB->push_back(DebugInstr->removeFromParent());
14142 // Move all instructions after the sequence to TailMBB.
14143 TailMBB->splice(TailMBB->end(), HeadMBB,
14144 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
14145 // Update machine-CFG edges by transferring all successors of the current
14146 // block to the new block which will contain the Phi nodes for the selects.
14147 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
14148 // Set the successors for HeadMBB.
14149 HeadMBB->addSuccessor(IfFalseMBB);
14150 HeadMBB->addSuccessor(TailMBB);
14152 // Insert appropriate branch.
14153 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
14158 // IfFalseMBB just falls through to TailMBB.
14159 IfFalseMBB->addSuccessor(TailMBB);
14161 // Create PHIs for all of the select pseudo-instructions.
14162 auto SelectMBBI = MI.getIterator();
14163 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
14164 auto InsertionPoint = TailMBB->begin();
14165 while (SelectMBBI != SelectEnd) {
14166 auto Next = std::next(SelectMBBI);
14167 if (isSelectPseudo(*SelectMBBI)) {
14168 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
14169 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
14170 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
14171 .addReg(SelectMBBI->getOperand(4).getReg())
14173 .addReg(SelectMBBI->getOperand(5).getReg())
14174 .addMBB(IfFalseMBB);
14175 SelectMBBI->eraseFromParent();
14180 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
14184 static MachineBasicBlock *emitVFCVT_RM(MachineInstr &MI, MachineBasicBlock *BB,
14186 DebugLoc DL = MI.getDebugLoc();
14188 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
14190 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
14191 Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass);
14193 assert(MI.getNumOperands() == 8 || MI.getNumOperands() == 7);
14194 unsigned FRMIdx = MI.getNumOperands() == 8 ? 4 : 3;
14196 // Update FRM and save the old value.
14197 BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM)
14198 .addImm(MI.getOperand(FRMIdx).getImm());
14200 // Emit an VFCVT with the FRM == DYN
14201 auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode));
14203 for (unsigned I = 0; I < MI.getNumOperands(); I++)
14205 MIB = MIB.add(MI.getOperand(I));
14207 MIB = MIB.add(MachineOperand::CreateImm(7)); // frm = DYN
14209 MIB.add(MachineOperand::CreateReg(RISCV::FRM,
14213 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
14214 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
14217 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM))
14218 .addReg(SavedFRM, RegState::Kill);
14220 // Erase the pseudoinstruction.
14221 MI.eraseFromParent();
14225 static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
14226 MachineBasicBlock *BB,
14228 unsigned CVTFOpc) {
14229 DebugLoc DL = MI.getDebugLoc();
14231 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
14233 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
14234 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
14236 // Save the old value of FFLAGS.
14237 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
14239 assert(MI.getNumOperands() == 7);
14241 // Emit a VFCVT_X_F
14242 const TargetRegisterInfo *TRI =
14243 BB->getParent()->getSubtarget().getRegisterInfo();
14244 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
14245 Register Tmp = MRI.createVirtualRegister(RC);
14246 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
14247 .add(MI.getOperand(1))
14248 .add(MI.getOperand(2))
14249 .add(MI.getOperand(3))
14250 .add(MachineOperand::CreateImm(7)) // frm = DYN
14251 .add(MI.getOperand(4))
14252 .add(MI.getOperand(5))
14253 .add(MI.getOperand(6))
14254 .add(MachineOperand::CreateReg(RISCV::FRM,
14258 // Emit a VFCVT_F_X
14259 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
14260 .add(MI.getOperand(0))
14261 .add(MI.getOperand(1))
14263 .add(MI.getOperand(3))
14264 .add(MachineOperand::CreateImm(7)) // frm = DYN
14265 .add(MI.getOperand(4))
14266 .add(MI.getOperand(5))
14267 .add(MI.getOperand(6))
14268 .add(MachineOperand::CreateReg(RISCV::FRM,
14273 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
14274 .addReg(SavedFFLAGS, RegState::Kill);
14276 // Erase the pseudoinstruction.
14277 MI.eraseFromParent();
14281 static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
14282 const RISCVSubtarget &Subtarget) {
14283 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
14284 const TargetRegisterClass *RC;
14285 switch (MI.getOpcode()) {
14287 llvm_unreachable("Unexpected opcode");
14288 case RISCV::PseudoFROUND_H:
14289 CmpOpc = RISCV::FLT_H;
14290 F2IOpc = RISCV::FCVT_W_H;
14291 I2FOpc = RISCV::FCVT_H_W;
14292 FSGNJOpc = RISCV::FSGNJ_H;
14293 FSGNJXOpc = RISCV::FSGNJX_H;
14294 RC = &RISCV::FPR16RegClass;
14296 case RISCV::PseudoFROUND_H_INX:
14297 CmpOpc = RISCV::FLT_H_INX;
14298 F2IOpc = RISCV::FCVT_W_H_INX;
14299 I2FOpc = RISCV::FCVT_H_W_INX;
14300 FSGNJOpc = RISCV::FSGNJ_H_INX;
14301 FSGNJXOpc = RISCV::FSGNJX_H_INX;
14302 RC = &RISCV::GPRF16RegClass;
14304 case RISCV::PseudoFROUND_S:
14305 CmpOpc = RISCV::FLT_S;
14306 F2IOpc = RISCV::FCVT_W_S;
14307 I2FOpc = RISCV::FCVT_S_W;
14308 FSGNJOpc = RISCV::FSGNJ_S;
14309 FSGNJXOpc = RISCV::FSGNJX_S;
14310 RC = &RISCV::FPR32RegClass;
14312 case RISCV::PseudoFROUND_S_INX:
14313 CmpOpc = RISCV::FLT_S_INX;
14314 F2IOpc = RISCV::FCVT_W_S_INX;
14315 I2FOpc = RISCV::FCVT_S_W_INX;
14316 FSGNJOpc = RISCV::FSGNJ_S_INX;
14317 FSGNJXOpc = RISCV::FSGNJX_S_INX;
14318 RC = &RISCV::GPRF32RegClass;
14320 case RISCV::PseudoFROUND_D:
14321 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
14322 CmpOpc = RISCV::FLT_D;
14323 F2IOpc = RISCV::FCVT_L_D;
14324 I2FOpc = RISCV::FCVT_D_L;
14325 FSGNJOpc = RISCV::FSGNJ_D;
14326 FSGNJXOpc = RISCV::FSGNJX_D;
14327 RC = &RISCV::FPR64RegClass;
14329 case RISCV::PseudoFROUND_D_INX:
14330 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
14331 CmpOpc = RISCV::FLT_D_INX;
14332 F2IOpc = RISCV::FCVT_L_D_INX;
14333 I2FOpc = RISCV::FCVT_D_L_INX;
14334 FSGNJOpc = RISCV::FSGNJ_D_INX;
14335 FSGNJXOpc = RISCV::FSGNJX_D_INX;
14336 RC = &RISCV::GPRRegClass;
14340 const BasicBlock *BB = MBB->getBasicBlock();
14341 DebugLoc DL = MI.getDebugLoc();
14342 MachineFunction::iterator I = ++MBB->getIterator();
14344 MachineFunction *F = MBB->getParent();
14345 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
14346 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
14348 F->insert(I, CvtMBB);
14349 F->insert(I, DoneMBB);
14350 // Move all instructions after the sequence to DoneMBB.
14351 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
14353 // Update machine-CFG edges by transferring all successors of the current
14354 // block to the new block which will contain the Phi nodes for the selects.
14355 DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
14356 // Set the successors for MBB.
14357 MBB->addSuccessor(CvtMBB);
14358 MBB->addSuccessor(DoneMBB);
14360 Register DstReg = MI.getOperand(0).getReg();
14361 Register SrcReg = MI.getOperand(1).getReg();
14362 Register MaxReg = MI.getOperand(2).getReg();
14363 int64_t FRM = MI.getOperand(3).getImm();
14365 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
14366 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
14368 Register FabsReg = MRI.createVirtualRegister(RC);
14369 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
14371 // Compare the FP value to the max value.
14372 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
14374 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
14375 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
14376 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
14379 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
14384 CvtMBB->addSuccessor(DoneMBB);
14386 // Convert to integer.
14387 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
14388 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
14389 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
14390 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
14392 // Convert back to FP.
14393 Register I2FReg = MRI.createVirtualRegister(RC);
14394 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
14395 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
14396 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
14398 // Restore the sign bit.
14399 Register CvtReg = MRI.createVirtualRegister(RC);
14400 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
14402 // Merge the results.
14403 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
14409 MI.eraseFromParent();
14413 MachineBasicBlock *
14414 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
14415 MachineBasicBlock *BB) const {
14416 switch (MI.getOpcode()) {
14418 llvm_unreachable("Unexpected instr type to insert");
14419 case RISCV::ReadCycleWide:
14420 assert(!Subtarget.is64Bit() &&
14421 "ReadCycleWrite is only to be used on riscv32");
14422 return emitReadCycleWidePseudo(MI, BB);
14423 case RISCV::Select_GPR_Using_CC_GPR:
14424 case RISCV::Select_FPR16_Using_CC_GPR:
14425 case RISCV::Select_FPR16INX_Using_CC_GPR:
14426 case RISCV::Select_FPR32_Using_CC_GPR:
14427 case RISCV::Select_FPR32INX_Using_CC_GPR:
14428 case RISCV::Select_FPR64_Using_CC_GPR:
14429 case RISCV::Select_FPR64INX_Using_CC_GPR:
14430 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
14431 return emitSelectPseudo(MI, BB, Subtarget);
14432 case RISCV::BuildPairF64Pseudo:
14433 case RISCV::BuildPairF64Pseudo_INX:
14434 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
14435 case RISCV::SplitF64Pseudo:
14436 case RISCV::SplitF64Pseudo_INX:
14437 return emitSplitF64Pseudo(MI, BB, Subtarget);
14438 case RISCV::PseudoQuietFLE_H:
14439 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
14440 case RISCV::PseudoQuietFLE_H_INX:
14441 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
14442 case RISCV::PseudoQuietFLT_H:
14443 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
14444 case RISCV::PseudoQuietFLT_H_INX:
14445 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
14446 case RISCV::PseudoQuietFLE_S:
14447 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
14448 case RISCV::PseudoQuietFLE_S_INX:
14449 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
14450 case RISCV::PseudoQuietFLT_S:
14451 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
14452 case RISCV::PseudoQuietFLT_S_INX:
14453 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
14454 case RISCV::PseudoQuietFLE_D:
14455 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
14456 case RISCV::PseudoQuietFLE_D_INX:
14457 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
14458 case RISCV::PseudoQuietFLE_D_IN32X:
14459 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
14461 case RISCV::PseudoQuietFLT_D:
14462 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
14463 case RISCV::PseudoQuietFLT_D_INX:
14464 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
14465 case RISCV::PseudoQuietFLT_D_IN32X:
14466 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
14469 #define PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, LMUL) \
14470 case RISCV::RMOpc##_##LMUL: \
14471 return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL); \
14472 case RISCV::RMOpc##_##LMUL##_MASK: \
14473 return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL##_MASK);
14475 #define PseudoVFCVT_RM_CASE(RMOpc, Opc) \
14476 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M1) \
14477 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M2) \
14478 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M4) \
14479 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF2) \
14480 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF4)
14482 #define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc) \
14483 PseudoVFCVT_RM_CASE(RMOpc, Opc) \
14484 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M8)
14486 #define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc) \
14487 PseudoVFCVT_RM_CASE(RMOpc, Opc) \
14488 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF8)
14491 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_X_F_V, PseudoVFCVT_X_F_V)
14492 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_XU_F_V, PseudoVFCVT_XU_F_V)
14493 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_XU_V, PseudoVFCVT_F_XU_V)
14494 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_X_V, PseudoVFCVT_F_X_V)
14497 PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_XU_F_V, PseudoVFWCVT_XU_F_V);
14498 PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_X_F_V, PseudoVFWCVT_X_F_V);
14501 PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_XU_F_W, PseudoVFNCVT_XU_F_W);
14502 PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_X_F_W, PseudoVFNCVT_X_F_W);
14503 PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_XU_W, PseudoVFNCVT_F_XU_W);
14504 PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_X_W, PseudoVFNCVT_F_X_W);
14506 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
14507 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
14508 RISCV::PseudoVFCVT_F_X_V_M1_MASK);
14509 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
14510 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK,
14511 RISCV::PseudoVFCVT_F_X_V_M2_MASK);
14512 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
14513 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK,
14514 RISCV::PseudoVFCVT_F_X_V_M4_MASK);
14515 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
14516 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK,
14517 RISCV::PseudoVFCVT_F_X_V_M8_MASK);
14518 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
14519 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK,
14520 RISCV::PseudoVFCVT_F_X_V_MF2_MASK);
14521 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
14522 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK,
14523 RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
14524 case RISCV::PseudoFROUND_H:
14525 case RISCV::PseudoFROUND_H_INX:
14526 case RISCV::PseudoFROUND_S:
14527 case RISCV::PseudoFROUND_S_INX:
14528 case RISCV::PseudoFROUND_D:
14529 case RISCV::PseudoFROUND_D_INX:
14530 case RISCV::PseudoFROUND_D_IN32X:
14531 return emitFROUND(MI, BB, Subtarget);
14535 // Returns the index to the rounding mode immediate value if any, otherwise the
14536 // function will return None.
14537 static std::optional<unsigned> getRoundModeIdx(const MachineInstr &MI) {
14538 uint64_t TSFlags = MI.getDesc().TSFlags;
14539 if (!RISCVII::hasRoundModeOp(TSFlags))
14540 return std::nullopt;
14542 // The operand order
14543 // -------------------------------------
14544 // | n-1 (if any) | n-2 | n-3 | n-4 |
14545 // | policy | sew | vl | rm |
14546 // -------------------------------------
14547 return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3;
14550 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
14551 SDNode *Node) const {
14552 // Add FRM dependency to vector floating-point instructions with dynamic
14554 if (auto RoundModeIdx = getRoundModeIdx(MI)) {
14555 unsigned FRMImm = MI.getOperand(*RoundModeIdx).getImm();
14556 if (FRMImm == RISCVFPRndMode::DYN && !MI.readsRegister(RISCV::FRM)) {
14557 MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false,
14562 // Add FRM dependency to any instructions with dynamic rounding mode.
14563 unsigned Opc = MI.getOpcode();
14564 auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
14567 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
14569 // If the instruction already reads FRM, don't add another read.
14570 if (MI.readsRegister(RISCV::FRM))
14573 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
14576 // Calling Convention Implementation.
14577 // The expectations for frontend ABI lowering vary from target to target.
14578 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
14579 // details, but this is a longer term goal. For now, we simply try to keep the
14580 // role of the frontend as simple and well-defined as possible. The rules can
14581 // be summarised as:
14582 // * Never split up large scalar arguments. We handle them here.
14583 // * If a hardfloat calling convention is being used, and the struct may be
14584 // passed in a pair of registers (fp+fp, int+fp), and both registers are
14585 // available, then pass as two separate arguments. If either the GPRs or FPRs
14586 // are exhausted, then pass according to the rule below.
14587 // * If a struct could never be passed in registers or directly in a stack
14588 // slot (as it is larger than 2*XLEN and the floating point rules don't
14589 // apply), then pass it using a pointer with the byval attribute.
14590 // * If a struct is less than 2*XLEN, then coerce to either a two-element
14591 // word-sized array or a 2*XLEN scalar (depending on alignment).
14592 // * The frontend can determine whether a struct is returned by reference or
14593 // not based on its size and fields. If it will be returned by reference, the
14594 // frontend must modify the prototype so a pointer with the sret annotation is
14595 // passed as the first argument. This is not necessary for large scalar
14597 // * Struct return values and varargs should be coerced to structs containing
14598 // register-size fields in the same situations they would be for fixed
14601 static const MCPhysReg ArgGPRs[] = {
14602 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
14603 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
14605 static const MCPhysReg ArgFPR16s[] = {
14606 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
14607 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
14609 static const MCPhysReg ArgFPR32s[] = {
14610 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
14611 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
14613 static const MCPhysReg ArgFPR64s[] = {
14614 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
14615 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
14617 // This is an interim calling convention and it may be changed in the future.
14618 static const MCPhysReg ArgVRs[] = {
14619 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
14620 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
14621 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
14622 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
14623 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
14624 RISCV::V20M2, RISCV::V22M2};
14625 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
14627 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
14629 // Pass a 2*XLEN argument that has been split into two XLEN values through
14630 // registers or the stack as necessary.
14631 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
14632 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
14633 MVT ValVT2, MVT LocVT2,
14634 ISD::ArgFlagsTy ArgFlags2) {
14635 unsigned XLenInBytes = XLen / 8;
14636 if (Register Reg = State.AllocateReg(ArgGPRs)) {
14637 // At least one half can be passed via register.
14638 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
14639 VA1.getLocVT(), CCValAssign::Full));
14641 // Both halves must be passed on the stack, with proper alignment.
14643 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
14645 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
14646 State.AllocateStack(XLenInBytes, StackAlign),
14647 VA1.getLocVT(), CCValAssign::Full));
14648 State.addLoc(CCValAssign::getMem(
14649 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
14650 LocVT2, CCValAssign::Full));
14654 if (Register Reg = State.AllocateReg(ArgGPRs)) {
14655 // The second half can also be passed via register.
14657 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
14659 // The second half is passed via the stack, without additional alignment.
14660 State.addLoc(CCValAssign::getMem(
14661 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
14662 LocVT2, CCValAssign::Full));
14668 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
14669 std::optional<unsigned> FirstMaskArgument,
14670 CCState &State, const RISCVTargetLowering &TLI) {
14671 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
14672 if (RC == &RISCV::VRRegClass) {
14673 // Assign the first mask argument to V0.
14674 // This is an interim calling convention and it may be changed in the
14676 if (FirstMaskArgument && ValNo == *FirstMaskArgument)
14677 return State.AllocateReg(RISCV::V0);
14678 return State.AllocateReg(ArgVRs);
14680 if (RC == &RISCV::VRM2RegClass)
14681 return State.AllocateReg(ArgVRM2s);
14682 if (RC == &RISCV::VRM4RegClass)
14683 return State.AllocateReg(ArgVRM4s);
14684 if (RC == &RISCV::VRM8RegClass)
14685 return State.AllocateReg(ArgVRM8s);
14686 llvm_unreachable("Unhandled register class for ValueType");
14689 // Implements the RISC-V calling convention. Returns true upon failure.
14690 bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
14691 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
14692 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
14693 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
14694 std::optional<unsigned> FirstMaskArgument) {
14695 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
14696 assert(XLen == 32 || XLen == 64);
14697 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
14699 // Static chain parameter must not be passed in normal argument registers,
14700 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
14701 if (ArgFlags.isNest()) {
14702 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
14703 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
14708 // Any return value split in to more than two values can't be returned
14709 // directly. Vectors are returned via the available vector registers.
14710 if (!LocVT.isVector() && IsRet && ValNo > 1)
14713 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
14714 // variadic argument, or if no F16/F32 argument registers are available.
14715 bool UseGPRForF16_F32 = true;
14716 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
14717 // variadic argument, or if no F64 argument registers are available.
14718 bool UseGPRForF64 = true;
14722 llvm_unreachable("Unexpected ABI");
14723 case RISCVABI::ABI_ILP32:
14724 case RISCVABI::ABI_LP64:
14726 case RISCVABI::ABI_ILP32F:
14727 case RISCVABI::ABI_LP64F:
14728 UseGPRForF16_F32 = !IsFixed;
14730 case RISCVABI::ABI_ILP32D:
14731 case RISCVABI::ABI_LP64D:
14732 UseGPRForF16_F32 = !IsFixed;
14733 UseGPRForF64 = !IsFixed;
14737 // FPR16, FPR32, and FPR64 alias each other.
14738 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
14739 UseGPRForF16_F32 = true;
14740 UseGPRForF64 = true;
14743 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
14744 // similar local variables rather than directly checking against the target
14747 if (UseGPRForF16_F32 &&
14748 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
14750 LocInfo = CCValAssign::BCvt;
14751 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
14753 LocInfo = CCValAssign::BCvt;
14756 // If this is a variadic argument, the RISC-V calling convention requires
14757 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
14758 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
14759 // be used regardless of whether the original argument was split during
14760 // legalisation or not. The argument will not be passed by registers if the
14761 // original type is larger than 2*XLEN, so the register alignment rule does
14763 unsigned TwoXLenInBytes = (2 * XLen) / 8;
14764 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
14765 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
14766 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
14767 // Skip 'odd' register if necessary.
14768 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
14769 State.AllocateReg(ArgGPRs);
14772 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
14773 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
14774 State.getPendingArgFlags();
14776 assert(PendingLocs.size() == PendingArgFlags.size() &&
14777 "PendingLocs and PendingArgFlags out of sync");
14779 // Handle passing f64 on RV32D with a soft float ABI or when floating point
14780 // registers are exhausted.
14781 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
14782 assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
14783 "Can't lower f64 if it is split");
14784 // Depending on available argument GPRS, f64 may be passed in a pair of
14785 // GPRs, split between a GPR and the stack, or passed completely on the
14786 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
14788 Register Reg = State.AllocateReg(ArgGPRs);
14791 unsigned StackOffset = State.AllocateStack(8, Align(8));
14793 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
14796 if (!State.AllocateReg(ArgGPRs))
14797 State.AllocateStack(4, Align(4));
14798 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
14802 // Fixed-length vectors are located in the corresponding scalable-vector
14803 // container types.
14804 if (ValVT.isFixedLengthVector())
14805 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
14807 // Split arguments might be passed indirectly, so keep track of the pending
14808 // values. Split vectors are passed via a mix of registers and indirectly, so
14809 // treat them as we would any other argument.
14810 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
14812 LocInfo = CCValAssign::Indirect;
14813 PendingLocs.push_back(
14814 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
14815 PendingArgFlags.push_back(ArgFlags);
14816 if (!ArgFlags.isSplitEnd()) {
14821 // If the split argument only had two elements, it should be passed directly
14822 // in registers or on the stack.
14823 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
14824 PendingLocs.size() <= 2) {
14825 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
14826 // Apply the normal calling convention rules to the first half of the
14828 CCValAssign VA = PendingLocs[0];
14829 ISD::ArgFlagsTy AF = PendingArgFlags[0];
14830 PendingLocs.clear();
14831 PendingArgFlags.clear();
14832 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
14836 // Allocate to a register if possible, or else a stack slot.
14838 unsigned StoreSizeBytes = XLen / 8;
14839 Align StackAlign = Align(XLen / 8);
14841 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
14842 Reg = State.AllocateReg(ArgFPR16s);
14843 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
14844 Reg = State.AllocateReg(ArgFPR32s);
14845 else if (ValVT == MVT::f64 && !UseGPRForF64)
14846 Reg = State.AllocateReg(ArgFPR64s);
14847 else if (ValVT.isVector()) {
14848 Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
14850 // For return values, the vector must be passed fully via registers or
14852 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
14853 // but we're using all of them.
14856 // Try using a GPR to pass the address
14857 if ((Reg = State.AllocateReg(ArgGPRs))) {
14859 LocInfo = CCValAssign::Indirect;
14860 } else if (ValVT.isScalableVector()) {
14862 LocInfo = CCValAssign::Indirect;
14864 // Pass fixed-length vectors on the stack.
14866 StoreSizeBytes = ValVT.getStoreSize();
14867 // Align vectors to their element sizes, being careful for vXi1
14869 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
14873 Reg = State.AllocateReg(ArgGPRs);
14876 unsigned StackOffset =
14877 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
14879 // If we reach this point and PendingLocs is non-empty, we must be at the
14880 // end of a split argument that must be passed indirectly.
14881 if (!PendingLocs.empty()) {
14882 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
14883 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
14885 for (auto &It : PendingLocs) {
14887 It.convertToReg(Reg);
14889 It.convertToMem(StackOffset);
14892 PendingLocs.clear();
14893 PendingArgFlags.clear();
14897 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
14898 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
14899 "Expected an XLenVT or vector types at this stage");
14902 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
14906 // When a scalar floating-point value is passed on the stack, no
14907 // bit-conversion is needed.
14908 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
14909 assert(!ValVT.isVector());
14911 LocInfo = CCValAssign::Full;
14913 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
14917 template <typename ArgTy>
14918 static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
14919 for (const auto &ArgIdx : enumerate(Args)) {
14920 MVT ArgVT = ArgIdx.value().VT;
14921 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
14922 return ArgIdx.index();
14924 return std::nullopt;
14927 void RISCVTargetLowering::analyzeInputArgs(
14928 MachineFunction &MF, CCState &CCInfo,
14929 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
14930 RISCVCCAssignFn Fn) const {
14931 unsigned NumArgs = Ins.size();
14932 FunctionType *FType = MF.getFunction().getFunctionType();
14934 std::optional<unsigned> FirstMaskArgument;
14935 if (Subtarget.hasVInstructions())
14936 FirstMaskArgument = preAssignMask(Ins);
14938 for (unsigned i = 0; i != NumArgs; ++i) {
14939 MVT ArgVT = Ins[i].VT;
14940 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
14942 Type *ArgTy = nullptr;
14944 ArgTy = FType->getReturnType();
14945 else if (Ins[i].isOrigArg())
14946 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
14948 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
14949 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
14950 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
14951 FirstMaskArgument)) {
14952 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
14954 llvm_unreachable(nullptr);
14959 void RISCVTargetLowering::analyzeOutputArgs(
14960 MachineFunction &MF, CCState &CCInfo,
14961 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
14962 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
14963 unsigned NumArgs = Outs.size();
14965 std::optional<unsigned> FirstMaskArgument;
14966 if (Subtarget.hasVInstructions())
14967 FirstMaskArgument = preAssignMask(Outs);
14969 for (unsigned i = 0; i != NumArgs; i++) {
14970 MVT ArgVT = Outs[i].VT;
14971 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
14972 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
14974 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
14975 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
14976 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
14977 FirstMaskArgument)) {
14978 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
14980 llvm_unreachable(nullptr);
14985 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
14987 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
14988 const CCValAssign &VA, const SDLoc &DL,
14989 const RISCVSubtarget &Subtarget) {
14990 switch (VA.getLocInfo()) {
14992 llvm_unreachable("Unexpected CCValAssign::LocInfo");
14993 case CCValAssign::Full:
14994 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
14995 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
14997 case CCValAssign::BCvt:
14998 if (VA.getLocVT().isInteger() &&
14999 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
15000 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
15001 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
15002 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
15004 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
15010 // The caller is responsible for loading the full value if the argument is
15011 // passed with CCValAssign::Indirect.
15012 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
15013 const CCValAssign &VA, const SDLoc &DL,
15014 const ISD::InputArg &In,
15015 const RISCVTargetLowering &TLI) {
15016 MachineFunction &MF = DAG.getMachineFunction();
15017 MachineRegisterInfo &RegInfo = MF.getRegInfo();
15018 EVT LocVT = VA.getLocVT();
15020 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
15021 Register VReg = RegInfo.createVirtualRegister(RC);
15022 RegInfo.addLiveIn(VA.getLocReg(), VReg);
15023 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
15025 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
15026 if (In.isOrigArg()) {
15027 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
15028 if (OrigArg->getType()->isIntegerTy()) {
15029 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
15030 // An input zero extended from i31 can also be considered sign extended.
15031 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
15032 (BitWidth < 32 && In.Flags.isZExt())) {
15033 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
15034 RVFI->addSExt32Register(VReg);
15039 if (VA.getLocInfo() == CCValAssign::Indirect)
15042 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
15045 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
15046 const CCValAssign &VA, const SDLoc &DL,
15047 const RISCVSubtarget &Subtarget) {
15048 EVT LocVT = VA.getLocVT();
15050 switch (VA.getLocInfo()) {
15052 llvm_unreachable("Unexpected CCValAssign::LocInfo");
15053 case CCValAssign::Full:
15054 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
15055 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
15057 case CCValAssign::BCvt:
15058 if (VA.getLocVT().isInteger() &&
15059 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
15060 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
15061 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
15062 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
15064 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
15070 // The caller is responsible for loading the full value if the argument is
15071 // passed with CCValAssign::Indirect.
15072 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
15073 const CCValAssign &VA, const SDLoc &DL) {
15074 MachineFunction &MF = DAG.getMachineFunction();
15075 MachineFrameInfo &MFI = MF.getFrameInfo();
15076 EVT LocVT = VA.getLocVT();
15077 EVT ValVT = VA.getValVT();
15078 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
15079 if (ValVT.isScalableVector()) {
15080 // When the value is a scalable vector, we save the pointer which points to
15081 // the scalable vector value in the stack. The ValVT will be the pointer
15082 // type, instead of the scalable vector type.
15085 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
15086 /*IsImmutable=*/true);
15087 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
15090 ISD::LoadExtType ExtType;
15091 switch (VA.getLocInfo()) {
15093 llvm_unreachable("Unexpected CCValAssign::LocInfo");
15094 case CCValAssign::Full:
15095 case CCValAssign::Indirect:
15096 case CCValAssign::BCvt:
15097 ExtType = ISD::NON_EXTLOAD;
15100 Val = DAG.getExtLoad(
15101 ExtType, DL, LocVT, Chain, FIN,
15102 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
15106 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
15107 const CCValAssign &VA, const SDLoc &DL) {
15108 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
15110 MachineFunction &MF = DAG.getMachineFunction();
15111 MachineFrameInfo &MFI = MF.getFrameInfo();
15112 MachineRegisterInfo &RegInfo = MF.getRegInfo();
15114 if (VA.isMemLoc()) {
15115 // f64 is passed on the stack.
15117 MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
15118 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
15119 return DAG.getLoad(MVT::f64, DL, Chain, FIN,
15120 MachinePointerInfo::getFixedStack(MF, FI));
15123 assert(VA.isRegLoc() && "Expected register VA assignment");
15125 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
15126 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
15127 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
15129 if (VA.getLocReg() == RISCV::X17) {
15130 // Second half of f64 is passed on the stack.
15131 int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
15132 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
15133 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
15134 MachinePointerInfo::getFixedStack(MF, FI));
15136 // Second half of f64 is passed in another GPR.
15137 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
15138 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
15139 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
15141 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
15144 // FastCC has less than 1% performance improvement for some particular
15145 // benchmark. But theoretically, it may has benenfit for some cases.
15146 bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
15147 unsigned ValNo, MVT ValVT, MVT LocVT,
15148 CCValAssign::LocInfo LocInfo,
15149 ISD::ArgFlagsTy ArgFlags, CCState &State,
15150 bool IsFixed, bool IsRet, Type *OrigTy,
15151 const RISCVTargetLowering &TLI,
15152 std::optional<unsigned> FirstMaskArgument) {
15154 // X5 and X6 might be used for save-restore libcall.
15155 static const MCPhysReg GPRList[] = {
15156 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
15157 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
15158 RISCV::X29, RISCV::X30, RISCV::X31};
15160 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
15161 if (unsigned Reg = State.AllocateReg(GPRList)) {
15162 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15167 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
15169 if (LocVT == MVT::f16 &&
15170 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
15171 static const MCPhysReg FPR16List[] = {
15172 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
15173 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
15174 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
15175 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
15176 if (unsigned Reg = State.AllocateReg(FPR16List)) {
15177 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15182 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
15183 static const MCPhysReg FPR32List[] = {
15184 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
15185 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
15186 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
15187 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
15188 if (unsigned Reg = State.AllocateReg(FPR32List)) {
15189 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15194 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
15195 static const MCPhysReg FPR64List[] = {
15196 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
15197 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
15198 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
15199 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
15200 if (unsigned Reg = State.AllocateReg(FPR64List)) {
15201 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15206 // Check if there is an available GPR before hitting the stack.
15207 if ((LocVT == MVT::f16 &&
15208 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
15209 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
15210 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
15211 Subtarget.hasStdExtZdinx())) {
15212 if (unsigned Reg = State.AllocateReg(GPRList)) {
15213 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15218 if (LocVT == MVT::f16) {
15219 unsigned Offset2 = State.AllocateStack(2, Align(2));
15220 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
15224 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
15225 unsigned Offset4 = State.AllocateStack(4, Align(4));
15226 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
15230 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
15231 unsigned Offset5 = State.AllocateStack(8, Align(8));
15232 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
15236 if (LocVT.isVector()) {
15238 allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
15239 // Fixed-length vectors are located in the corresponding scalable-vector
15240 // container types.
15241 if (ValVT.isFixedLengthVector())
15242 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
15243 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15245 // Try and pass the address via a "fast" GPR.
15246 if (unsigned GPRReg = State.AllocateReg(GPRList)) {
15247 LocInfo = CCValAssign::Indirect;
15248 LocVT = TLI.getSubtarget().getXLenVT();
15249 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
15250 } else if (ValVT.isFixedLengthVector()) {
15252 MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
15253 unsigned StackOffset =
15254 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
15256 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
15258 // Can't pass scalable vectors on the stack.
15266 return true; // CC didn't match.
15269 bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
15270 CCValAssign::LocInfo LocInfo,
15271 ISD::ArgFlagsTy ArgFlags, CCState &State) {
15272 if (ArgFlags.isNest()) {
15273 report_fatal_error(
15274 "Attribute 'nest' is not supported in GHC calling convention");
15277 static const MCPhysReg GPRList[] = {
15278 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
15279 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
15281 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
15282 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
15283 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
15284 if (unsigned Reg = State.AllocateReg(GPRList)) {
15285 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15290 const RISCVSubtarget &Subtarget =
15291 State.getMachineFunction().getSubtarget<RISCVSubtarget>();
15293 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
15294 // Pass in STG registers: F1, ..., F6
15296 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
15297 RISCV::F18_F, RISCV::F19_F,
15298 RISCV::F20_F, RISCV::F21_F};
15299 if (unsigned Reg = State.AllocateReg(FPR32List)) {
15300 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15305 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
15306 // Pass in STG registers: D1, ..., D6
15308 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
15309 RISCV::F24_D, RISCV::F25_D,
15310 RISCV::F26_D, RISCV::F27_D};
15311 if (unsigned Reg = State.AllocateReg(FPR64List)) {
15312 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15317 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
15318 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
15319 Subtarget.is64Bit())) {
15320 if (unsigned Reg = State.AllocateReg(GPRList)) {
15321 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15326 report_fatal_error("No registers left in GHC calling convention");
15330 // Transform physical registers into virtual registers.
15331 SDValue RISCVTargetLowering::LowerFormalArguments(
15332 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
15333 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
15334 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
15336 MachineFunction &MF = DAG.getMachineFunction();
15338 switch (CallConv) {
15340 report_fatal_error("Unsupported calling convention");
15341 case CallingConv::C:
15342 case CallingConv::Fast:
15344 case CallingConv::GHC:
15345 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
15346 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
15347 "(Zdinx/D) instruction set extensions");
15350 const Function &Func = MF.getFunction();
15351 if (Func.hasFnAttribute("interrupt")) {
15352 if (!Func.arg_empty())
15353 report_fatal_error(
15354 "Functions with the interrupt attribute cannot have arguments!");
15357 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
15359 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
15360 report_fatal_error(
15361 "Function interrupt attribute argument not supported!");
15364 EVT PtrVT = getPointerTy(DAG.getDataLayout());
15365 MVT XLenVT = Subtarget.getXLenVT();
15366 unsigned XLenInBytes = Subtarget.getXLen() / 8;
15367 // Used with vargs to acumulate store chains.
15368 std::vector<SDValue> OutChains;
15370 // Assign locations to all of the incoming arguments.
15371 SmallVector<CCValAssign, 16> ArgLocs;
15372 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
15374 if (CallConv == CallingConv::GHC)
15375 CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC);
15377 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
15378 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
15379 : RISCV::CC_RISCV);
15381 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
15382 CCValAssign &VA = ArgLocs[i];
15384 // Passing f64 on RV32D with a soft float ABI must be handled as a special
15386 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
15387 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
15388 else if (VA.isRegLoc())
15389 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
15391 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
15393 if (VA.getLocInfo() == CCValAssign::Indirect) {
15394 // If the original argument was split and passed by reference (e.g. i128
15395 // on RV32), we need to load all parts of it here (using the same
15396 // address). Vectors may be partly split to registers and partly to the
15397 // stack, in which case the base address is partly offset and subsequent
15398 // stores are relative to that.
15399 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
15400 MachinePointerInfo()));
15401 unsigned ArgIndex = Ins[i].OrigArgIndex;
15402 unsigned ArgPartOffset = Ins[i].PartOffset;
15403 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
15404 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
15405 CCValAssign &PartVA = ArgLocs[i + 1];
15406 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
15407 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
15408 if (PartVA.getValVT().isScalableVector())
15409 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
15410 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
15411 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
15412 MachinePointerInfo()));
15417 InVals.push_back(ArgValue);
15420 if (any_of(ArgLocs,
15421 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
15422 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
15425 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
15426 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
15427 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
15428 MachineFrameInfo &MFI = MF.getFrameInfo();
15429 MachineRegisterInfo &RegInfo = MF.getRegInfo();
15430 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
15432 // Offset of the first variable argument from stack pointer, and size of
15433 // the vararg save area. For now, the varargs save area is either zero or
15434 // large enough to hold a0-a7.
15435 int VaArgOffset, VarArgsSaveSize;
15437 // If all registers are allocated, then all varargs must be passed on the
15438 // stack and we don't need to save any argregs.
15439 if (ArgRegs.size() == Idx) {
15440 VaArgOffset = CCInfo.getStackSize();
15441 VarArgsSaveSize = 0;
15443 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
15444 VaArgOffset = -VarArgsSaveSize;
15447 // Record the frame index of the first variable argument
15448 // which is a value necessary to VASTART.
15449 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
15450 RVFI->setVarArgsFrameIndex(FI);
15452 // If saving an odd number of registers then create an extra stack slot to
15453 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
15454 // offsets to even-numbered registered remain 2*XLEN-aligned.
15456 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
15457 VarArgsSaveSize += XLenInBytes;
15460 // Copy the integer registers that may have been used for passing varargs
15461 // to the vararg save area.
15462 for (unsigned I = Idx; I < ArgRegs.size();
15463 ++I, VaArgOffset += XLenInBytes) {
15464 const Register Reg = RegInfo.createVirtualRegister(RC);
15465 RegInfo.addLiveIn(ArgRegs[I], Reg);
15466 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
15467 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
15468 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
15469 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
15470 MachinePointerInfo::getFixedStack(MF, FI));
15471 cast<StoreSDNode>(Store.getNode())
15473 ->setValue((Value *)nullptr);
15474 OutChains.push_back(Store);
15476 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
15479 // All stores are grouped in one node to allow the matching between
15480 // the size of Ins and InVals. This only happens for vararg functions.
15481 if (!OutChains.empty()) {
15482 OutChains.push_back(Chain);
15483 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
15489 /// isEligibleForTailCallOptimization - Check whether the call is eligible
15490 /// for tail call optimization.
15491 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
15492 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
15493 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
15494 const SmallVector<CCValAssign, 16> &ArgLocs) const {
15496 auto CalleeCC = CLI.CallConv;
15497 auto &Outs = CLI.Outs;
15498 auto &Caller = MF.getFunction();
15499 auto CallerCC = Caller.getCallingConv();
15501 // Exception-handling functions need a special set of instructions to
15502 // indicate a return to the hardware. Tail-calling another function would
15503 // probably break this.
15504 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
15505 // should be expanded as new function attributes are introduced.
15506 if (Caller.hasFnAttribute("interrupt"))
15509 // Do not tail call opt if the stack is used to pass parameters.
15510 if (CCInfo.getStackSize() != 0)
15513 // Do not tail call opt if any parameters need to be passed indirectly.
15514 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
15515 // passed indirectly. So the address of the value will be passed in a
15516 // register, or if not available, then the address is put on the stack. In
15517 // order to pass indirectly, space on the stack often needs to be allocated
15518 // in order to store the value. In this case the CCInfo.getNextStackOffset()
15519 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
15520 // are passed CCValAssign::Indirect.
15521 for (auto &VA : ArgLocs)
15522 if (VA.getLocInfo() == CCValAssign::Indirect)
15525 // Do not tail call opt if either caller or callee uses struct return
15527 auto IsCallerStructRet = Caller.hasStructRetAttr();
15528 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
15529 if (IsCallerStructRet || IsCalleeStructRet)
15532 // The callee has to preserve all registers the caller needs to preserve.
15533 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
15534 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
15535 if (CalleeCC != CallerCC) {
15536 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
15537 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
15541 // Byval parameters hand the function a pointer directly into the stack area
15542 // we want to reuse during a tail call. Working around this *is* possible
15543 // but less efficient and uglier in LowerCall.
15544 for (auto &Arg : Outs)
15545 if (Arg.Flags.isByVal())
15551 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
15552 return DAG.getDataLayout().getPrefTypeAlign(
15553 VT.getTypeForEVT(*DAG.getContext()));
15556 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
15557 // and output parameter nodes.
15558 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
15559 SmallVectorImpl<SDValue> &InVals) const {
15560 SelectionDAG &DAG = CLI.DAG;
15561 SDLoc &DL = CLI.DL;
15562 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
15563 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
15564 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
15565 SDValue Chain = CLI.Chain;
15566 SDValue Callee = CLI.Callee;
15567 bool &IsTailCall = CLI.IsTailCall;
15568 CallingConv::ID CallConv = CLI.CallConv;
15569 bool IsVarArg = CLI.IsVarArg;
15570 EVT PtrVT = getPointerTy(DAG.getDataLayout());
15571 MVT XLenVT = Subtarget.getXLenVT();
15573 MachineFunction &MF = DAG.getMachineFunction();
15575 // Analyze the operands of the call, assigning locations to each operand.
15576 SmallVector<CCValAssign, 16> ArgLocs;
15577 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
15579 if (CallConv == CallingConv::GHC)
15580 ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC);
15582 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
15583 CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
15584 : RISCV::CC_RISCV);
15586 // Check if it's really possible to do a tail call.
15588 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
15592 else if (CLI.CB && CLI.CB->isMustTailCall())
15593 report_fatal_error("failed to perform tail call elimination on a call "
15594 "site marked musttail");
15596 // Get a count of how many bytes are to be pushed on the stack.
15597 unsigned NumBytes = ArgCCInfo.getStackSize();
15599 // Create local copies for byval args
15600 SmallVector<SDValue, 8> ByValArgs;
15601 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
15602 ISD::ArgFlagsTy Flags = Outs[i].Flags;
15603 if (!Flags.isByVal())
15606 SDValue Arg = OutVals[i];
15607 unsigned Size = Flags.getByValSize();
15608 Align Alignment = Flags.getNonZeroByValAlign();
15611 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
15612 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
15613 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
15615 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
15616 /*IsVolatile=*/false,
15617 /*AlwaysInline=*/false, IsTailCall,
15618 MachinePointerInfo(), MachinePointerInfo());
15619 ByValArgs.push_back(FIPtr);
15623 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
15625 // Copy argument values to their designated locations.
15626 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
15627 SmallVector<SDValue, 8> MemOpChains;
15629 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
15630 CCValAssign &VA = ArgLocs[i];
15631 SDValue ArgValue = OutVals[i];
15632 ISD::ArgFlagsTy Flags = Outs[i].Flags;
15634 // Handle passing f64 on RV32D with a soft float ABI as a special case.
15635 bool IsF64OnRV32DSoftABI =
15636 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
15637 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
15638 SDValue SplitF64 = DAG.getNode(
15639 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
15640 SDValue Lo = SplitF64.getValue(0);
15641 SDValue Hi = SplitF64.getValue(1);
15643 Register RegLo = VA.getLocReg();
15644 RegsToPass.push_back(std::make_pair(RegLo, Lo));
15646 if (RegLo == RISCV::X17) {
15647 // Second half of f64 is passed on the stack.
15648 // Work out the address of the stack slot.
15649 if (!StackPtr.getNode())
15650 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
15652 MemOpChains.push_back(
15653 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
15655 // Second half of f64 is passed in another GPR.
15656 assert(RegLo < RISCV::X31 && "Invalid register pair");
15657 Register RegHigh = RegLo + 1;
15658 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
15663 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
15664 // as any other MemLoc.
15666 // Promote the value if needed.
15667 // For now, only handle fully promoted and indirect arguments.
15668 if (VA.getLocInfo() == CCValAssign::Indirect) {
15669 // Store the argument in a stack slot and pass its address.
15671 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
15672 getPrefTypeAlign(ArgValue.getValueType(), DAG));
15673 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
15674 // If the original argument was split (e.g. i128), we need
15675 // to store the required parts of it here (and pass just one address).
15676 // Vectors may be partly split to registers and partly to the stack, in
15677 // which case the base address is partly offset and subsequent stores are
15678 // relative to that.
15679 unsigned ArgIndex = Outs[i].OrigArgIndex;
15680 unsigned ArgPartOffset = Outs[i].PartOffset;
15681 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
15682 // Calculate the total size to store. We don't have access to what we're
15683 // actually storing other than performing the loop and collecting the
15685 SmallVector<std::pair<SDValue, SDValue>> Parts;
15686 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
15687 SDValue PartValue = OutVals[i + 1];
15688 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
15689 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
15690 EVT PartVT = PartValue.getValueType();
15691 if (PartVT.isScalableVector())
15692 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
15693 StoredSize += PartVT.getStoreSize();
15694 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
15695 Parts.push_back(std::make_pair(PartValue, Offset));
15698 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
15699 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
15700 MemOpChains.push_back(
15701 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
15702 MachinePointerInfo::getFixedStack(MF, FI)));
15703 for (const auto &Part : Parts) {
15704 SDValue PartValue = Part.first;
15705 SDValue PartOffset = Part.second;
15707 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
15708 MemOpChains.push_back(
15709 DAG.getStore(Chain, DL, PartValue, Address,
15710 MachinePointerInfo::getFixedStack(MF, FI)));
15712 ArgValue = SpillSlot;
15714 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
15717 // Use local copy if it is a byval arg.
15718 if (Flags.isByVal())
15719 ArgValue = ByValArgs[j++];
15721 if (VA.isRegLoc()) {
15722 // Queue up the argument copies and emit them at the end.
15723 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
15725 assert(VA.isMemLoc() && "Argument not register or memory");
15726 assert(!IsTailCall && "Tail call not allowed if stack is used "
15727 "for passing parameters");
15729 // Work out the address of the stack slot.
15730 if (!StackPtr.getNode())
15731 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
15733 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
15734 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
15737 MemOpChains.push_back(
15738 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
15742 // Join the stores, which are independent of one another.
15743 if (!MemOpChains.empty())
15744 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
15748 // Build a sequence of copy-to-reg nodes, chained and glued together.
15749 for (auto &Reg : RegsToPass) {
15750 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
15751 Glue = Chain.getValue(1);
15754 // Validate that none of the argument registers have been marked as
15755 // reserved, if so report an error. Do the same for the return address if this
15756 // is not a tailcall.
15757 validateCCReservedRegs(RegsToPass, MF);
15759 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
15760 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
15762 "Return address register required, but has been reserved."});
15764 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
15765 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
15766 // split it and then direct call can be matched by PseudoCALL.
15767 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
15768 const GlobalValue *GV = S->getGlobal();
15770 unsigned OpFlags = RISCVII::MO_CALL;
15771 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
15772 OpFlags = RISCVII::MO_PLT;
15774 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
15775 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
15776 unsigned OpFlags = RISCVII::MO_CALL;
15778 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
15780 OpFlags = RISCVII::MO_PLT;
15782 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
15785 // The first call operand is the chain and the second is the target address.
15786 SmallVector<SDValue, 8> Ops;
15787 Ops.push_back(Chain);
15788 Ops.push_back(Callee);
15790 // Add argument registers to the end of the list so that they are
15791 // known live into the call.
15792 for (auto &Reg : RegsToPass)
15793 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
15796 // Add a register mask operand representing the call-preserved registers.
15797 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
15798 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
15799 assert(Mask && "Missing call preserved mask for calling convention");
15800 Ops.push_back(DAG.getRegisterMask(Mask));
15803 // Glue the call to the argument copies, if any.
15804 if (Glue.getNode())
15805 Ops.push_back(Glue);
15807 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
15808 "Unexpected CFI type for a direct call");
15811 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
15814 MF.getFrameInfo().setHasTailCall();
15815 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
15817 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
15818 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
15822 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
15824 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
15825 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
15826 Glue = Chain.getValue(1);
15828 // Mark the end of the call, which is glued to the call itself.
15829 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
15830 Glue = Chain.getValue(1);
15832 // Assign locations to each value returned by this call.
15833 SmallVector<CCValAssign, 16> RVLocs;
15834 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
15835 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
15837 // Copy all of the result registers out of their specified physreg.
15838 for (auto &VA : RVLocs) {
15839 // Copy the value out
15841 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
15842 // Glue the RetValue to the end of the call sequence
15843 Chain = RetValue.getValue(1);
15844 Glue = RetValue.getValue(2);
15846 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
15847 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
15848 SDValue RetValue2 =
15849 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
15850 Chain = RetValue2.getValue(1);
15851 Glue = RetValue2.getValue(2);
15852 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
15856 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
15858 InVals.push_back(RetValue);
15864 bool RISCVTargetLowering::CanLowerReturn(
15865 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
15866 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
15867 SmallVector<CCValAssign, 16> RVLocs;
15868 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
15870 std::optional<unsigned> FirstMaskArgument;
15871 if (Subtarget.hasVInstructions())
15872 FirstMaskArgument = preAssignMask(Outs);
15874 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
15875 MVT VT = Outs[i].VT;
15876 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
15877 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
15878 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
15879 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
15880 *this, FirstMaskArgument))
15887 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
15889 const SmallVectorImpl<ISD::OutputArg> &Outs,
15890 const SmallVectorImpl<SDValue> &OutVals,
15891 const SDLoc &DL, SelectionDAG &DAG) const {
15892 MachineFunction &MF = DAG.getMachineFunction();
15893 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
15895 // Stores the assignment of the return value to a location.
15896 SmallVector<CCValAssign, 16> RVLocs;
15898 // Info about the registers and stack slot.
15899 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
15900 *DAG.getContext());
15902 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
15903 nullptr, RISCV::CC_RISCV);
15905 if (CallConv == CallingConv::GHC && !RVLocs.empty())
15906 report_fatal_error("GHC functions return void only");
15909 SmallVector<SDValue, 4> RetOps(1, Chain);
15911 // Copy the result values into the output registers.
15912 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
15913 SDValue Val = OutVals[i];
15914 CCValAssign &VA = RVLocs[i];
15915 assert(VA.isRegLoc() && "Can only return in registers!");
15917 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
15918 // Handle returning f64 on RV32D with a soft float ABI.
15919 assert(VA.isRegLoc() && "Expected return via registers");
15920 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
15921 DAG.getVTList(MVT::i32, MVT::i32), Val);
15922 SDValue Lo = SplitF64.getValue(0);
15923 SDValue Hi = SplitF64.getValue(1);
15924 Register RegLo = VA.getLocReg();
15925 assert(RegLo < RISCV::X31 && "Invalid register pair");
15926 Register RegHi = RegLo + 1;
15928 if (STI.isRegisterReservedByUser(RegLo) ||
15929 STI.isRegisterReservedByUser(RegHi))
15930 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
15932 "Return value register required, but has been reserved."});
15934 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
15935 Glue = Chain.getValue(1);
15936 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
15937 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
15938 Glue = Chain.getValue(1);
15939 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
15941 // Handle a 'normal' return.
15942 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
15943 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
15945 if (STI.isRegisterReservedByUser(VA.getLocReg()))
15946 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
15948 "Return value register required, but has been reserved."});
15950 // Guarantee that all emitted copies are stuck together.
15951 Glue = Chain.getValue(1);
15952 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
15956 RetOps[0] = Chain; // Update chain.
15958 // Add the glue node if we have it.
15959 if (Glue.getNode()) {
15960 RetOps.push_back(Glue);
15964 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
15965 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
15967 unsigned RetOpc = RISCVISD::RET_GLUE;
15968 // Interrupt service routines use different return instructions.
15969 const Function &Func = DAG.getMachineFunction().getFunction();
15970 if (Func.hasFnAttribute("interrupt")) {
15971 if (!Func.getReturnType()->isVoidTy())
15972 report_fatal_error(
15973 "Functions with the interrupt attribute must have void return type!");
15975 MachineFunction &MF = DAG.getMachineFunction();
15977 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
15979 if (Kind == "supervisor")
15980 RetOpc = RISCVISD::SRET_GLUE;
15982 RetOpc = RISCVISD::MRET_GLUE;
15985 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
15988 void RISCVTargetLowering::validateCCReservedRegs(
15989 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
15990 MachineFunction &MF) const {
15991 const Function &F = MF.getFunction();
15992 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
15994 if (llvm::any_of(Regs, [&STI](auto Reg) {
15995 return STI.isRegisterReservedByUser(Reg.first);
15997 F.getContext().diagnose(DiagnosticInfoUnsupported{
15998 F, "Argument register required, but has been reserved."});
16001 // Check if the result of the node is only used as a return value, as
16002 // otherwise we can't perform a tail-call.
16003 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
16004 if (N->getNumValues() != 1)
16006 if (!N->hasNUsesOfValue(1, 0))
16009 SDNode *Copy = *N->use_begin();
16011 if (Copy->getOpcode() == ISD::BITCAST) {
16012 return isUsedByReturnOnly(Copy, Chain);
16015 // TODO: Handle additional opcodes in order to support tail-calling libcalls
16016 // with soft float ABIs.
16017 if (Copy->getOpcode() != ISD::CopyToReg) {
16021 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
16022 // isn't safe to perform a tail call.
16023 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
16026 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
16027 bool HasRet = false;
16028 for (SDNode *Node : Copy->uses()) {
16029 if (Node->getOpcode() != RISCVISD::RET_GLUE)
16036 Chain = Copy->getOperand(0);
16040 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16041 return CI->isTailCall();
16044 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
16045 #define NODE_NAME_CASE(NODE) \
16046 case RISCVISD::NODE: \
16047 return "RISCVISD::" #NODE;
16048 // clang-format off
16049 switch ((RISCVISD::NodeType)Opcode) {
16050 case RISCVISD::FIRST_NUMBER:
16052 NODE_NAME_CASE(RET_GLUE)
16053 NODE_NAME_CASE(SRET_GLUE)
16054 NODE_NAME_CASE(MRET_GLUE)
16055 NODE_NAME_CASE(CALL)
16056 NODE_NAME_CASE(SELECT_CC)
16057 NODE_NAME_CASE(BR_CC)
16058 NODE_NAME_CASE(BuildPairF64)
16059 NODE_NAME_CASE(SplitF64)
16060 NODE_NAME_CASE(TAIL)
16061 NODE_NAME_CASE(ADD_LO)
16063 NODE_NAME_CASE(LLA)
16064 NODE_NAME_CASE(LGA)
16065 NODE_NAME_CASE(ADD_TPREL)
16066 NODE_NAME_CASE(LA_TLS_IE)
16067 NODE_NAME_CASE(LA_TLS_GD)
16068 NODE_NAME_CASE(MULHSU)
16069 NODE_NAME_CASE(SLLW)
16070 NODE_NAME_CASE(SRAW)
16071 NODE_NAME_CASE(SRLW)
16072 NODE_NAME_CASE(DIVW)
16073 NODE_NAME_CASE(DIVUW)
16074 NODE_NAME_CASE(REMUW)
16075 NODE_NAME_CASE(ROLW)
16076 NODE_NAME_CASE(RORW)
16077 NODE_NAME_CASE(CLZW)
16078 NODE_NAME_CASE(CTZW)
16079 NODE_NAME_CASE(ABSW)
16080 NODE_NAME_CASE(FMV_H_X)
16081 NODE_NAME_CASE(FMV_X_ANYEXTH)
16082 NODE_NAME_CASE(FMV_X_SIGNEXTH)
16083 NODE_NAME_CASE(FMV_W_X_RV64)
16084 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
16085 NODE_NAME_CASE(FCVT_X)
16086 NODE_NAME_CASE(FCVT_XU)
16087 NODE_NAME_CASE(FCVT_W_RV64)
16088 NODE_NAME_CASE(FCVT_WU_RV64)
16089 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
16090 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
16091 NODE_NAME_CASE(FP_ROUND_BF16)
16092 NODE_NAME_CASE(FP_EXTEND_BF16)
16093 NODE_NAME_CASE(FROUND)
16094 NODE_NAME_CASE(FPCLASS)
16095 NODE_NAME_CASE(FMAX)
16096 NODE_NAME_CASE(FMIN)
16097 NODE_NAME_CASE(READ_CYCLE_WIDE)
16098 NODE_NAME_CASE(BREV8)
16099 NODE_NAME_CASE(ORC_B)
16100 NODE_NAME_CASE(ZIP)
16101 NODE_NAME_CASE(UNZIP)
16102 NODE_NAME_CASE(CLMUL)
16103 NODE_NAME_CASE(CLMULH)
16104 NODE_NAME_CASE(CLMULR)
16105 NODE_NAME_CASE(SHA256SIG0)
16106 NODE_NAME_CASE(SHA256SIG1)
16107 NODE_NAME_CASE(SHA256SUM0)
16108 NODE_NAME_CASE(SHA256SUM1)
16109 NODE_NAME_CASE(SM4KS)
16110 NODE_NAME_CASE(SM4ED)
16111 NODE_NAME_CASE(SM3P0)
16112 NODE_NAME_CASE(SM3P1)
16113 NODE_NAME_CASE(TH_LWD)
16114 NODE_NAME_CASE(TH_LWUD)
16115 NODE_NAME_CASE(TH_LDD)
16116 NODE_NAME_CASE(TH_SWD)
16117 NODE_NAME_CASE(TH_SDD)
16118 NODE_NAME_CASE(VMV_V_V_VL)
16119 NODE_NAME_CASE(VMV_V_X_VL)
16120 NODE_NAME_CASE(VFMV_V_F_VL)
16121 NODE_NAME_CASE(VMV_X_S)
16122 NODE_NAME_CASE(VMV_S_X_VL)
16123 NODE_NAME_CASE(VFMV_S_F_VL)
16124 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
16125 NODE_NAME_CASE(READ_VLENB)
16126 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
16127 NODE_NAME_CASE(VSLIDEUP_VL)
16128 NODE_NAME_CASE(VSLIDE1UP_VL)
16129 NODE_NAME_CASE(VSLIDEDOWN_VL)
16130 NODE_NAME_CASE(VSLIDE1DOWN_VL)
16131 NODE_NAME_CASE(VFSLIDE1UP_VL)
16132 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
16133 NODE_NAME_CASE(VID_VL)
16134 NODE_NAME_CASE(VFNCVT_ROD_VL)
16135 NODE_NAME_CASE(VECREDUCE_ADD_VL)
16136 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
16137 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
16138 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
16139 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
16140 NODE_NAME_CASE(VECREDUCE_AND_VL)
16141 NODE_NAME_CASE(VECREDUCE_OR_VL)
16142 NODE_NAME_CASE(VECREDUCE_XOR_VL)
16143 NODE_NAME_CASE(VECREDUCE_FADD_VL)
16144 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
16145 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
16146 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
16147 NODE_NAME_CASE(ADD_VL)
16148 NODE_NAME_CASE(AND_VL)
16149 NODE_NAME_CASE(MUL_VL)
16150 NODE_NAME_CASE(OR_VL)
16151 NODE_NAME_CASE(SDIV_VL)
16152 NODE_NAME_CASE(SHL_VL)
16153 NODE_NAME_CASE(SREM_VL)
16154 NODE_NAME_CASE(SRA_VL)
16155 NODE_NAME_CASE(SRL_VL)
16156 NODE_NAME_CASE(SUB_VL)
16157 NODE_NAME_CASE(UDIV_VL)
16158 NODE_NAME_CASE(UREM_VL)
16159 NODE_NAME_CASE(XOR_VL)
16160 NODE_NAME_CASE(SADDSAT_VL)
16161 NODE_NAME_CASE(UADDSAT_VL)
16162 NODE_NAME_CASE(SSUBSAT_VL)
16163 NODE_NAME_CASE(USUBSAT_VL)
16164 NODE_NAME_CASE(FADD_VL)
16165 NODE_NAME_CASE(FSUB_VL)
16166 NODE_NAME_CASE(FMUL_VL)
16167 NODE_NAME_CASE(FDIV_VL)
16168 NODE_NAME_CASE(FNEG_VL)
16169 NODE_NAME_CASE(FABS_VL)
16170 NODE_NAME_CASE(FSQRT_VL)
16171 NODE_NAME_CASE(FCLASS_VL)
16172 NODE_NAME_CASE(VFMADD_VL)
16173 NODE_NAME_CASE(VFNMADD_VL)
16174 NODE_NAME_CASE(VFMSUB_VL)
16175 NODE_NAME_CASE(VFNMSUB_VL)
16176 NODE_NAME_CASE(VFWMADD_VL)
16177 NODE_NAME_CASE(VFWNMADD_VL)
16178 NODE_NAME_CASE(VFWMSUB_VL)
16179 NODE_NAME_CASE(VFWNMSUB_VL)
16180 NODE_NAME_CASE(FCOPYSIGN_VL)
16181 NODE_NAME_CASE(SMIN_VL)
16182 NODE_NAME_CASE(SMAX_VL)
16183 NODE_NAME_CASE(UMIN_VL)
16184 NODE_NAME_CASE(UMAX_VL)
16185 NODE_NAME_CASE(BITREVERSE_VL)
16186 NODE_NAME_CASE(BSWAP_VL)
16187 NODE_NAME_CASE(CTLZ_VL)
16188 NODE_NAME_CASE(CTTZ_VL)
16189 NODE_NAME_CASE(CTPOP_VL)
16190 NODE_NAME_CASE(FMINNUM_VL)
16191 NODE_NAME_CASE(FMAXNUM_VL)
16192 NODE_NAME_CASE(MULHS_VL)
16193 NODE_NAME_CASE(MULHU_VL)
16194 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
16195 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
16196 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
16197 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
16198 NODE_NAME_CASE(VFCVT_X_F_VL)
16199 NODE_NAME_CASE(VFCVT_XU_F_VL)
16200 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
16201 NODE_NAME_CASE(SINT_TO_FP_VL)
16202 NODE_NAME_CASE(UINT_TO_FP_VL)
16203 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
16204 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
16205 NODE_NAME_CASE(FP_EXTEND_VL)
16206 NODE_NAME_CASE(FP_ROUND_VL)
16207 NODE_NAME_CASE(STRICT_FADD_VL)
16208 NODE_NAME_CASE(STRICT_FSUB_VL)
16209 NODE_NAME_CASE(STRICT_FMUL_VL)
16210 NODE_NAME_CASE(STRICT_FDIV_VL)
16211 NODE_NAME_CASE(STRICT_FSQRT_VL)
16212 NODE_NAME_CASE(STRICT_VFMADD_VL)
16213 NODE_NAME_CASE(STRICT_VFNMADD_VL)
16214 NODE_NAME_CASE(STRICT_VFMSUB_VL)
16215 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
16216 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
16217 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
16218 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
16219 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
16220 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
16221 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
16222 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
16223 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
16224 NODE_NAME_CASE(STRICT_FSETCC_VL)
16225 NODE_NAME_CASE(STRICT_FSETCCS_VL)
16226 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
16227 NODE_NAME_CASE(VWMUL_VL)
16228 NODE_NAME_CASE(VWMULU_VL)
16229 NODE_NAME_CASE(VWMULSU_VL)
16230 NODE_NAME_CASE(VWADD_VL)
16231 NODE_NAME_CASE(VWADDU_VL)
16232 NODE_NAME_CASE(VWSUB_VL)
16233 NODE_NAME_CASE(VWSUBU_VL)
16234 NODE_NAME_CASE(VWADD_W_VL)
16235 NODE_NAME_CASE(VWADDU_W_VL)
16236 NODE_NAME_CASE(VWSUB_W_VL)
16237 NODE_NAME_CASE(VWSUBU_W_VL)
16238 NODE_NAME_CASE(VFWMUL_VL)
16239 NODE_NAME_CASE(VFWADD_VL)
16240 NODE_NAME_CASE(VFWSUB_VL)
16241 NODE_NAME_CASE(VFWADD_W_VL)
16242 NODE_NAME_CASE(VFWSUB_W_VL)
16243 NODE_NAME_CASE(VWMACC_VL)
16244 NODE_NAME_CASE(VWMACCU_VL)
16245 NODE_NAME_CASE(VWMACCSU_VL)
16246 NODE_NAME_CASE(VNSRL_VL)
16247 NODE_NAME_CASE(SETCC_VL)
16248 NODE_NAME_CASE(VSELECT_VL)
16249 NODE_NAME_CASE(VP_MERGE_VL)
16250 NODE_NAME_CASE(VMAND_VL)
16251 NODE_NAME_CASE(VMOR_VL)
16252 NODE_NAME_CASE(VMXOR_VL)
16253 NODE_NAME_CASE(VMCLR_VL)
16254 NODE_NAME_CASE(VMSET_VL)
16255 NODE_NAME_CASE(VRGATHER_VX_VL)
16256 NODE_NAME_CASE(VRGATHER_VV_VL)
16257 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
16258 NODE_NAME_CASE(VSEXT_VL)
16259 NODE_NAME_CASE(VZEXT_VL)
16260 NODE_NAME_CASE(VCPOP_VL)
16261 NODE_NAME_CASE(VFIRST_VL)
16262 NODE_NAME_CASE(READ_CSR)
16263 NODE_NAME_CASE(WRITE_CSR)
16264 NODE_NAME_CASE(SWAP_CSR)
16265 NODE_NAME_CASE(CZERO_EQZ)
16266 NODE_NAME_CASE(CZERO_NEZ)
16270 #undef NODE_NAME_CASE
16273 /// getConstraintType - Given a constraint letter, return the type of
16274 /// constraint it is for this target.
16275 RISCVTargetLowering::ConstraintType
16276 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
16277 if (Constraint.size() == 1) {
16278 switch (Constraint[0]) {
16282 return C_RegisterClass;
16286 return C_Immediate;
16289 case 'S': // A symbolic address
16293 if (Constraint == "vr" || Constraint == "vm")
16294 return C_RegisterClass;
16296 return TargetLowering::getConstraintType(Constraint);
16299 std::pair<unsigned, const TargetRegisterClass *>
16300 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
16301 StringRef Constraint,
16303 // First, see if this is a constraint that directly corresponds to a RISC-V
16305 if (Constraint.size() == 1) {
16306 switch (Constraint[0]) {
16308 // TODO: Support fixed vectors up to XLen for P extension?
16311 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
16313 if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16)
16314 return std::make_pair(0U, &RISCV::FPR16RegClass);
16315 if (Subtarget.hasStdExtF() && VT == MVT::f32)
16316 return std::make_pair(0U, &RISCV::FPR32RegClass);
16317 if (Subtarget.hasStdExtD() && VT == MVT::f64)
16318 return std::make_pair(0U, &RISCV::FPR64RegClass);
16323 } else if (Constraint == "vr") {
16324 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
16325 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
16326 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
16327 return std::make_pair(0U, RC);
16329 } else if (Constraint == "vm") {
16330 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
16331 return std::make_pair(0U, &RISCV::VMV0RegClass);
16334 // Clang will correctly decode the usage of register name aliases into their
16335 // official names. However, other frontends like `rustc` do not. This allows
16336 // users of these frontends to use the ABI names for registers in LLVM-style
16337 // register constraints.
16338 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
16339 .Case("{zero}", RISCV::X0)
16340 .Case("{ra}", RISCV::X1)
16341 .Case("{sp}", RISCV::X2)
16342 .Case("{gp}", RISCV::X3)
16343 .Case("{tp}", RISCV::X4)
16344 .Case("{t0}", RISCV::X5)
16345 .Case("{t1}", RISCV::X6)
16346 .Case("{t2}", RISCV::X7)
16347 .Cases("{s0}", "{fp}", RISCV::X8)
16348 .Case("{s1}", RISCV::X9)
16349 .Case("{a0}", RISCV::X10)
16350 .Case("{a1}", RISCV::X11)
16351 .Case("{a2}", RISCV::X12)
16352 .Case("{a3}", RISCV::X13)
16353 .Case("{a4}", RISCV::X14)
16354 .Case("{a5}", RISCV::X15)
16355 .Case("{a6}", RISCV::X16)
16356 .Case("{a7}", RISCV::X17)
16357 .Case("{s2}", RISCV::X18)
16358 .Case("{s3}", RISCV::X19)
16359 .Case("{s4}", RISCV::X20)
16360 .Case("{s5}", RISCV::X21)
16361 .Case("{s6}", RISCV::X22)
16362 .Case("{s7}", RISCV::X23)
16363 .Case("{s8}", RISCV::X24)
16364 .Case("{s9}", RISCV::X25)
16365 .Case("{s10}", RISCV::X26)
16366 .Case("{s11}", RISCV::X27)
16367 .Case("{t3}", RISCV::X28)
16368 .Case("{t4}", RISCV::X29)
16369 .Case("{t5}", RISCV::X30)
16370 .Case("{t6}", RISCV::X31)
16371 .Default(RISCV::NoRegister);
16372 if (XRegFromAlias != RISCV::NoRegister)
16373 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
16375 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
16376 // TableGen record rather than the AsmName to choose registers for InlineAsm
16377 // constraints, plus we want to match those names to the widest floating point
16378 // register type available, manually select floating point registers here.
16380 // The second case is the ABI name of the register, so that frontends can also
16381 // use the ABI names in register constraint lists.
16382 if (Subtarget.hasStdExtF()) {
16383 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
16384 .Cases("{f0}", "{ft0}", RISCV::F0_F)
16385 .Cases("{f1}", "{ft1}", RISCV::F1_F)
16386 .Cases("{f2}", "{ft2}", RISCV::F2_F)
16387 .Cases("{f3}", "{ft3}", RISCV::F3_F)
16388 .Cases("{f4}", "{ft4}", RISCV::F4_F)
16389 .Cases("{f5}", "{ft5}", RISCV::F5_F)
16390 .Cases("{f6}", "{ft6}", RISCV::F6_F)
16391 .Cases("{f7}", "{ft7}", RISCV::F7_F)
16392 .Cases("{f8}", "{fs0}", RISCV::F8_F)
16393 .Cases("{f9}", "{fs1}", RISCV::F9_F)
16394 .Cases("{f10}", "{fa0}", RISCV::F10_F)
16395 .Cases("{f11}", "{fa1}", RISCV::F11_F)
16396 .Cases("{f12}", "{fa2}", RISCV::F12_F)
16397 .Cases("{f13}", "{fa3}", RISCV::F13_F)
16398 .Cases("{f14}", "{fa4}", RISCV::F14_F)
16399 .Cases("{f15}", "{fa5}", RISCV::F15_F)
16400 .Cases("{f16}", "{fa6}", RISCV::F16_F)
16401 .Cases("{f17}", "{fa7}", RISCV::F17_F)
16402 .Cases("{f18}", "{fs2}", RISCV::F18_F)
16403 .Cases("{f19}", "{fs3}", RISCV::F19_F)
16404 .Cases("{f20}", "{fs4}", RISCV::F20_F)
16405 .Cases("{f21}", "{fs5}", RISCV::F21_F)
16406 .Cases("{f22}", "{fs6}", RISCV::F22_F)
16407 .Cases("{f23}", "{fs7}", RISCV::F23_F)
16408 .Cases("{f24}", "{fs8}", RISCV::F24_F)
16409 .Cases("{f25}", "{fs9}", RISCV::F25_F)
16410 .Cases("{f26}", "{fs10}", RISCV::F26_F)
16411 .Cases("{f27}", "{fs11}", RISCV::F27_F)
16412 .Cases("{f28}", "{ft8}", RISCV::F28_F)
16413 .Cases("{f29}", "{ft9}", RISCV::F29_F)
16414 .Cases("{f30}", "{ft10}", RISCV::F30_F)
16415 .Cases("{f31}", "{ft11}", RISCV::F31_F)
16416 .Default(RISCV::NoRegister);
16417 if (FReg != RISCV::NoRegister) {
16418 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
16419 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
16420 unsigned RegNo = FReg - RISCV::F0_F;
16421 unsigned DReg = RISCV::F0_D + RegNo;
16422 return std::make_pair(DReg, &RISCV::FPR64RegClass);
16424 if (VT == MVT::f32 || VT == MVT::Other)
16425 return std::make_pair(FReg, &RISCV::FPR32RegClass);
16426 if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) {
16427 unsigned RegNo = FReg - RISCV::F0_F;
16428 unsigned HReg = RISCV::F0_H + RegNo;
16429 return std::make_pair(HReg, &RISCV::FPR16RegClass);
16434 if (Subtarget.hasVInstructions()) {
16435 Register VReg = StringSwitch<Register>(Constraint.lower())
16436 .Case("{v0}", RISCV::V0)
16437 .Case("{v1}", RISCV::V1)
16438 .Case("{v2}", RISCV::V2)
16439 .Case("{v3}", RISCV::V3)
16440 .Case("{v4}", RISCV::V4)
16441 .Case("{v5}", RISCV::V5)
16442 .Case("{v6}", RISCV::V6)
16443 .Case("{v7}", RISCV::V7)
16444 .Case("{v8}", RISCV::V8)
16445 .Case("{v9}", RISCV::V9)
16446 .Case("{v10}", RISCV::V10)
16447 .Case("{v11}", RISCV::V11)
16448 .Case("{v12}", RISCV::V12)
16449 .Case("{v13}", RISCV::V13)
16450 .Case("{v14}", RISCV::V14)
16451 .Case("{v15}", RISCV::V15)
16452 .Case("{v16}", RISCV::V16)
16453 .Case("{v17}", RISCV::V17)
16454 .Case("{v18}", RISCV::V18)
16455 .Case("{v19}", RISCV::V19)
16456 .Case("{v20}", RISCV::V20)
16457 .Case("{v21}", RISCV::V21)
16458 .Case("{v22}", RISCV::V22)
16459 .Case("{v23}", RISCV::V23)
16460 .Case("{v24}", RISCV::V24)
16461 .Case("{v25}", RISCV::V25)
16462 .Case("{v26}", RISCV::V26)
16463 .Case("{v27}", RISCV::V27)
16464 .Case("{v28}", RISCV::V28)
16465 .Case("{v29}", RISCV::V29)
16466 .Case("{v30}", RISCV::V30)
16467 .Case("{v31}", RISCV::V31)
16468 .Default(RISCV::NoRegister);
16469 if (VReg != RISCV::NoRegister) {
16470 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
16471 return std::make_pair(VReg, &RISCV::VMRegClass);
16472 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
16473 return std::make_pair(VReg, &RISCV::VRRegClass);
16474 for (const auto *RC :
16475 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
16476 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
16477 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
16478 return std::make_pair(VReg, RC);
16484 std::pair<Register, const TargetRegisterClass *> Res =
16485 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
16487 // If we picked one of the Zfinx register classes, remap it to the GPR class.
16488 // FIXME: When Zfinx is supported in CodeGen this will need to take the
16489 // Subtarget into account.
16490 if (Res.second == &RISCV::GPRF16RegClass ||
16491 Res.second == &RISCV::GPRF32RegClass ||
16492 Res.second == &RISCV::GPRPF64RegClass)
16493 return std::make_pair(Res.first, &RISCV::GPRRegClass);
16499 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
16500 // Currently only support length 1 constraints.
16501 if (ConstraintCode.size() == 1) {
16502 switch (ConstraintCode[0]) {
16504 return InlineAsm::Constraint_A;
16510 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
16513 void RISCVTargetLowering::LowerAsmOperandForConstraint(
16514 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
16515 SelectionDAG &DAG) const {
16516 // Currently only support length 1 constraints.
16517 if (Constraint.length() == 1) {
16518 switch (Constraint[0]) {
16520 // Validate & create a 12-bit signed immediate operand.
16521 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
16522 uint64_t CVal = C->getSExtValue();
16523 if (isInt<12>(CVal))
16525 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
16529 // Validate & create an integer zero operand.
16530 if (isNullConstant(Op))
16532 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
16535 // Validate & create a 5-bit unsigned immediate operand.
16536 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
16537 uint64_t CVal = C->getZExtValue();
16538 if (isUInt<5>(CVal))
16540 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
16544 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
16545 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
16546 GA->getValueType(0)));
16547 } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
16548 Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
16549 BA->getValueType(0)));
16556 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16559 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
16561 AtomicOrdering Ord) const {
16562 if (Subtarget.hasStdExtZtso()) {
16563 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
16564 return Builder.CreateFence(Ord);
16568 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
16569 return Builder.CreateFence(Ord);
16570 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
16571 return Builder.CreateFence(AtomicOrdering::Release);
16575 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
16577 AtomicOrdering Ord) const {
16578 if (Subtarget.hasStdExtZtso())
16581 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
16582 return Builder.CreateFence(AtomicOrdering::Acquire);
16583 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
16584 Ord == AtomicOrdering::SequentiallyConsistent)
16585 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
16589 TargetLowering::AtomicExpansionKind
16590 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
16591 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
16592 // point operations can't be used in an lr/sc sequence without breaking the
16593 // forward-progress guarantee.
16594 if (AI->isFloatingPointOperation() ||
16595 AI->getOperation() == AtomicRMWInst::UIncWrap ||
16596 AI->getOperation() == AtomicRMWInst::UDecWrap)
16597 return AtomicExpansionKind::CmpXChg;
16599 // Don't expand forced atomics, we want to have __sync libcalls instead.
16600 if (Subtarget.hasForcedAtomics())
16601 return AtomicExpansionKind::None;
16603 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
16604 if (Size == 8 || Size == 16)
16605 return AtomicExpansionKind::MaskedIntrinsic;
16606 return AtomicExpansionKind::None;
16609 static Intrinsic::ID
16610 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
16614 llvm_unreachable("Unexpected AtomicRMW BinOp");
16615 case AtomicRMWInst::Xchg:
16616 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
16617 case AtomicRMWInst::Add:
16618 return Intrinsic::riscv_masked_atomicrmw_add_i32;
16619 case AtomicRMWInst::Sub:
16620 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
16621 case AtomicRMWInst::Nand:
16622 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
16623 case AtomicRMWInst::Max:
16624 return Intrinsic::riscv_masked_atomicrmw_max_i32;
16625 case AtomicRMWInst::Min:
16626 return Intrinsic::riscv_masked_atomicrmw_min_i32;
16627 case AtomicRMWInst::UMax:
16628 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
16629 case AtomicRMWInst::UMin:
16630 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
16637 llvm_unreachable("Unexpected AtomicRMW BinOp");
16638 case AtomicRMWInst::Xchg:
16639 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
16640 case AtomicRMWInst::Add:
16641 return Intrinsic::riscv_masked_atomicrmw_add_i64;
16642 case AtomicRMWInst::Sub:
16643 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
16644 case AtomicRMWInst::Nand:
16645 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
16646 case AtomicRMWInst::Max:
16647 return Intrinsic::riscv_masked_atomicrmw_max_i64;
16648 case AtomicRMWInst::Min:
16649 return Intrinsic::riscv_masked_atomicrmw_min_i64;
16650 case AtomicRMWInst::UMax:
16651 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
16652 case AtomicRMWInst::UMin:
16653 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
16657 llvm_unreachable("Unexpected XLen\n");
16660 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
16661 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
16662 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
16663 unsigned XLen = Subtarget.getXLen();
16665 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
16666 Type *Tys[] = {AlignedAddr->getType()};
16667 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
16669 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
16672 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
16673 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
16674 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
16679 // Must pass the shift amount needed to sign extend the loaded value prior
16680 // to performing a signed comparison for min/max. ShiftAmt is the number of
16681 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
16682 // is the number of bits to left+right shift the value in order to
16684 if (AI->getOperation() == AtomicRMWInst::Min ||
16685 AI->getOperation() == AtomicRMWInst::Max) {
16686 const DataLayout &DL = AI->getModule()->getDataLayout();
16687 unsigned ValWidth =
16688 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
16690 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
16691 Result = Builder.CreateCall(LrwOpScwLoop,
16692 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
16695 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
16699 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
16703 TargetLowering::AtomicExpansionKind
16704 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
16705 AtomicCmpXchgInst *CI) const {
16706 // Don't expand forced atomics, we want to have __sync libcalls instead.
16707 if (Subtarget.hasForcedAtomics())
16708 return AtomicExpansionKind::None;
16710 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
16711 if (Size == 8 || Size == 16)
16712 return AtomicExpansionKind::MaskedIntrinsic;
16713 return AtomicExpansionKind::None;
16716 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
16717 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
16718 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
16719 unsigned XLen = Subtarget.getXLen();
16720 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
16721 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
16723 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
16724 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
16725 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
16726 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
16728 Type *Tys[] = {AlignedAddr->getType()};
16729 Function *MaskedCmpXchg =
16730 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
16731 Value *Result = Builder.CreateCall(
16732 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
16734 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
16738 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
16739 EVT DataVT) const {
16743 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
16745 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
16748 switch (FPVT.getSimpleVT().SimpleTy) {
16750 return Subtarget.hasStdExtZfhOrZfhmin();
16752 return Subtarget.hasStdExtF();
16754 return Subtarget.hasStdExtD();
16760 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
16761 // If we are using the small code model, we can reduce size of jump table
16762 // entry to 4 bytes.
16763 if (Subtarget.is64Bit() && !isPositionIndependent() &&
16764 getTargetMachine().getCodeModel() == CodeModel::Small) {
16765 return MachineJumpTableInfo::EK_Custom32;
16767 return TargetLowering::getJumpTableEncoding();
16770 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
16771 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
16772 unsigned uid, MCContext &Ctx) const {
16773 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
16774 getTargetMachine().getCodeModel() == CodeModel::Small);
16775 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
16778 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
16779 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
16780 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
16781 // a power of two as well.
16782 // FIXME: This doesn't work for zve32, but that's already broken
16783 // elsewhere for the same reason.
16784 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
16785 static_assert(RISCV::RVVBitsPerBlock == 64,
16786 "RVVBitsPerBlock changed, audit needed");
16790 bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
16792 ISD::MemIndexedMode &AM,
16794 SelectionDAG &DAG) const {
16795 // Target does not support indexed loads.
16796 if (!Subtarget.hasVendorXTHeadMemIdx())
16799 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
16802 Base = Op->getOperand(0);
16803 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
16804 int64_t RHSC = RHS->getSExtValue();
16805 if (Op->getOpcode() == ISD::SUB)
16806 RHSC = -(uint64_t)RHSC;
16808 // The constants that can be encoded in the THeadMemIdx instructions
16809 // are of the form (sign_extend(imm5) << imm2).
16810 bool isLegalIndexedOffset = false;
16811 for (unsigned i = 0; i < 4; i++)
16812 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
16813 isLegalIndexedOffset = true;
16817 if (!isLegalIndexedOffset)
16820 IsInc = (Op->getOpcode() == ISD::ADD);
16821 Offset = Op->getOperand(1);
16828 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
16830 ISD::MemIndexedMode &AM,
16831 SelectionDAG &DAG) const {
16834 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
16835 VT = LD->getMemoryVT();
16836 Ptr = LD->getBasePtr();
16837 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
16838 VT = ST->getMemoryVT();
16839 Ptr = ST->getBasePtr();
16844 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
16847 AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
16851 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
16854 ISD::MemIndexedMode &AM,
16855 SelectionDAG &DAG) const {
16858 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
16859 VT = LD->getMemoryVT();
16860 Ptr = LD->getBasePtr();
16861 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
16862 VT = ST->getMemoryVT();
16863 Ptr = ST->getBasePtr();
16868 if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
16870 // Post-indexing updates the base, so it's not a valid transform
16871 // if that's not the same as the load's pointer.
16875 AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
16879 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
16881 EVT SVT = VT.getScalarType();
16883 if (!SVT.isSimple())
16886 switch (SVT.getSimpleVT().SimpleTy) {
16888 return VT.isVector() ? Subtarget.hasVInstructionsF16()
16889 : Subtarget.hasStdExtZfhOrZhinx();
16891 return Subtarget.hasStdExtFOrZfinx();
16893 return Subtarget.hasStdExtDOrZdinx();
16901 Register RISCVTargetLowering::getExceptionPointerRegister(
16902 const Constant *PersonalityFn) const {
16906 Register RISCVTargetLowering::getExceptionSelectorRegister(
16907 const Constant *PersonalityFn) const {
16911 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
16912 // Return false to suppress the unnecessary extensions if the LibCall
16913 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
16914 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
16915 Type.getSizeInBits() < Subtarget.getXLen()))
16921 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
16922 if (Subtarget.is64Bit() && Type == MVT::i32)
16928 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
16930 // Check integral scalar types.
16931 const bool HasExtMOrZmmul =
16932 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
16933 if (!VT.isScalarInteger())
16936 // Omit the optimization if the sub target has the M extension and the data
16937 // size exceeds XLen.
16938 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
16941 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16942 // Break the MUL to a SLLI and an ADD/SUB.
16943 const APInt &Imm = ConstNode->getAPIntValue();
16944 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
16945 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
16948 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
16949 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
16950 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
16951 (Imm - 8).isPowerOf2()))
16954 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
16955 // a pair of LUI/ADDI.
16956 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
16957 ConstNode->hasOneUse()) {
16958 APInt ImmS = Imm.ashr(Imm.countr_zero());
16959 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
16960 (1 - ImmS).isPowerOf2())
16968 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
16969 SDValue ConstNode) const {
16970 // Let the DAGCombiner decide for vectors.
16971 EVT VT = AddNode.getValueType();
16975 // Let the DAGCombiner decide for larger types.
16976 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
16979 // It is worse if c1 is simm12 while c1*c2 is not.
16980 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
16981 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
16982 const APInt &C1 = C1Node->getAPIntValue();
16983 const APInt &C2 = C2Node->getAPIntValue();
16984 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
16987 // Default to true and let the DAGCombiner decide.
16991 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
16992 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
16993 unsigned *Fast) const {
16994 if (!VT.isVector()) {
16996 *Fast = Subtarget.enableUnalignedScalarMem();
16997 return Subtarget.enableUnalignedScalarMem();
17000 // All vector implementations must support element alignment
17001 EVT ElemVT = VT.getVectorElementType();
17002 if (Alignment >= ElemVT.getStoreSize()) {
17008 // Note: We lower an unmasked unaligned vector access to an equally sized
17009 // e8 element type access. Given this, we effectively support all unmasked
17010 // misaligned accesses. TODO: Work through the codegen implications of
17011 // allowing such accesses to be formed, and considered fast.
17013 *Fast = Subtarget.enableUnalignedVectorMem();
17014 return Subtarget.enableUnalignedVectorMem();
17017 bool RISCVTargetLowering::splitValueIntoRegisterParts(
17018 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
17019 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
17020 bool IsABIRegCopy = CC.has_value();
17021 EVT ValueVT = Val.getValueType();
17022 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
17023 PartVT == MVT::f32) {
17024 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
17025 // nan, and cast to f32.
17026 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
17027 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
17028 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
17029 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
17030 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
17035 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
17036 LLVMContext &Context = *DAG.getContext();
17037 EVT ValueEltVT = ValueVT.getVectorElementType();
17038 EVT PartEltVT = PartVT.getVectorElementType();
17039 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
17040 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
17041 if (PartVTBitSize % ValueVTBitSize == 0) {
17042 assert(PartVTBitSize >= ValueVTBitSize);
17043 // If the element types are different, bitcast to the same element type of
17045 // Give an example here, we want copy a <vscale x 1 x i8> value to
17046 // <vscale x 4 x i16>.
17047 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
17048 // subvector, then we can bitcast to <vscale x 4 x i16>.
17049 if (ValueEltVT != PartEltVT) {
17050 if (PartVTBitSize > ValueVTBitSize) {
17051 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
17052 assert(Count != 0 && "The number of element should not be zero.");
17053 EVT SameEltTypeVT =
17054 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
17055 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
17056 DAG.getUNDEF(SameEltTypeVT), Val,
17057 DAG.getVectorIdxConstant(0, DL));
17059 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
17062 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
17063 Val, DAG.getVectorIdxConstant(0, DL));
17072 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
17073 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
17074 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
17075 bool IsABIRegCopy = CC.has_value();
17076 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
17077 PartVT == MVT::f32) {
17078 SDValue Val = Parts[0];
17080 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
17081 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
17082 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
17083 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
17087 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
17088 LLVMContext &Context = *DAG.getContext();
17089 SDValue Val = Parts[0];
17090 EVT ValueEltVT = ValueVT.getVectorElementType();
17091 EVT PartEltVT = PartVT.getVectorElementType();
17092 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
17093 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
17094 if (PartVTBitSize % ValueVTBitSize == 0) {
17095 assert(PartVTBitSize >= ValueVTBitSize);
17096 EVT SameEltTypeVT = ValueVT;
17097 // If the element types are different, convert it to the same element type
17099 // Give an example here, we want copy a <vscale x 1 x i8> value from
17100 // <vscale x 4 x i16>.
17101 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
17102 // then we can extract <vscale x 1 x i8>.
17103 if (ValueEltVT != PartEltVT) {
17104 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
17105 assert(Count != 0 && "The number of element should not be zero.");
17107 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
17108 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
17110 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
17111 DAG.getVectorIdxConstant(0, DL));
17118 bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
17119 // When aggressively optimizing for code size, we prefer to use a div
17120 // instruction, as it is usually smaller than the alternative sequence.
17121 // TODO: Add vector division?
17122 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
17123 return OptSize && !VT.isVector();
17126 bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {
17127 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
17129 unsigned Opc = N->getOpcode();
17130 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
17135 static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
17136 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
17137 Function *ThreadPointerFunc =
17138 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
17139 return IRB.CreatePointerCast(
17140 IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
17141 IRB.CreateCall(ThreadPointerFunc), Offset),
17142 IRB.getInt8PtrTy()->getPointerTo(0));
17145 Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
17146 // Fuchsia provides a fixed TLS slot for the stack cookie.
17147 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
17148 if (Subtarget.isTargetFuchsia())
17149 return useTpOffset(IRB, -0x10);
17151 return TargetLowering::getIRStackGuard(IRB);
17154 bool RISCVTargetLowering::isLegalInterleavedAccessType(
17155 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
17156 const DataLayout &DL) const {
17157 EVT VT = getValueType(DL, VTy);
17158 // Don't lower vlseg/vsseg for vector types that can't be split.
17159 if (!isTypeLegal(VT))
17162 if (!isLegalElementTypeForRVV(VT.getScalarType()) ||
17163 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
17167 MVT ContainerVT = VT.getSimpleVT();
17169 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
17170 if (!Subtarget.useRVVForFixedLengthVectors())
17172 // Sometimes the interleaved access pass picks up splats as interleaves of
17173 // one element. Don't lower these.
17174 if (FVTy->getNumElements() < 2)
17177 ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT());
17180 // Need to make sure that EMUL * NFIELDS ≤ 8
17181 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
17184 return Factor * LMUL <= 8;
17187 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
17188 Align Alignment) const {
17189 if (!Subtarget.hasVInstructions())
17192 // Only support fixed vectors if we know the minimum vector size.
17193 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
17196 EVT ScalarType = DataType.getScalarType();
17197 if (!isLegalElementTypeForRVV(ScalarType))
17200 if (!Subtarget.enableUnalignedVectorMem() &&
17201 Alignment < ScalarType.getStoreSize())
17207 static const Intrinsic::ID FixedVlsegIntrIds[] = {
17208 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
17209 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
17210 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
17211 Intrinsic::riscv_seg8_load};
17213 /// Lower an interleaved load into a vlsegN intrinsic.
17215 /// E.g. Lower an interleaved load (Factor = 2):
17216 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
17217 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
17218 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
17221 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
17223 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
17224 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
17225 bool RISCVTargetLowering::lowerInterleavedLoad(
17226 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
17227 ArrayRef<unsigned> Indices, unsigned Factor) const {
17228 IRBuilder<> Builder(LI);
17230 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
17231 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
17232 LI->getPointerAddressSpace(),
17233 LI->getModule()->getDataLayout()))
17236 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
17238 Function *VlsegNFunc =
17239 Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2],
17240 {VTy, LI->getPointerOperandType(), XLenTy});
17242 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
17245 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
17247 for (unsigned i = 0; i < Shuffles.size(); i++) {
17248 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
17249 Shuffles[i]->replaceAllUsesWith(SubVec);
17255 static const Intrinsic::ID FixedVssegIntrIds[] = {
17256 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
17257 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
17258 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
17259 Intrinsic::riscv_seg8_store};
17261 /// Lower an interleaved store into a vssegN intrinsic.
17263 /// E.g. Lower an interleaved store (Factor = 3):
17264 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
17265 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
17266 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
17269 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
17270 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
17271 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
17272 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
17275 /// Note that the new shufflevectors will be removed and we'll only generate one
17276 /// vsseg3 instruction in CodeGen.
17277 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
17278 ShuffleVectorInst *SVI,
17279 unsigned Factor) const {
17280 IRBuilder<> Builder(SI);
17281 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
17282 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
17283 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
17284 ShuffleVTy->getNumElements() / Factor);
17285 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
17286 SI->getPointerAddressSpace(),
17287 SI->getModule()->getDataLayout()))
17290 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
17292 Function *VssegNFunc =
17293 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
17294 {VTy, SI->getPointerOperandType(), XLenTy});
17296 auto Mask = SVI->getShuffleMask();
17297 SmallVector<Value *, 10> Ops;
17299 for (unsigned i = 0; i < Factor; i++) {
17300 Value *Shuffle = Builder.CreateShuffleVector(
17301 SVI->getOperand(0), SVI->getOperand(1),
17302 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
17303 Ops.push_back(Shuffle);
17305 // This VL should be OK (should be executable in one vsseg instruction,
17306 // potentially under larger LMULs) because we checked that the fixed vector
17307 // type fits in isLegalInterleavedAccessType
17308 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
17309 Ops.append({SI->getPointerOperand(), VL});
17311 Builder.CreateCall(VssegNFunc, Ops);
17316 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
17317 LoadInst *LI) const {
17318 assert(LI->isSimple());
17319 IRBuilder<> Builder(LI);
17321 // Only deinterleave2 supported at present.
17322 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
17325 unsigned Factor = 2;
17327 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
17328 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
17330 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
17331 LI->getPointerAddressSpace(),
17332 LI->getModule()->getDataLayout()))
17335 Function *VlsegNFunc;
17337 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
17338 SmallVector<Value *, 10> Ops;
17340 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
17341 VlsegNFunc = Intrinsic::getDeclaration(
17342 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
17343 {ResVTy, LI->getPointerOperandType(), XLenTy});
17344 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
17346 static const Intrinsic::ID IntrIds[] = {
17347 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
17348 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
17349 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
17350 Intrinsic::riscv_vlseg8};
17352 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
17354 VL = Constant::getAllOnesValue(XLenTy);
17355 Ops.append(Factor, PoisonValue::get(ResVTy));
17358 Ops.append({LI->getPointerOperand(), VL});
17360 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
17361 DI->replaceAllUsesWith(Vlseg);
17366 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
17367 StoreInst *SI) const {
17368 assert(SI->isSimple());
17369 IRBuilder<> Builder(SI);
17371 // Only interleave2 supported at present.
17372 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
17375 unsigned Factor = 2;
17377 VectorType *VTy = cast<VectorType>(II->getType());
17378 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
17380 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
17381 SI->getPointerAddressSpace(),
17382 SI->getModule()->getDataLayout()))
17385 Function *VssegNFunc;
17387 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
17389 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
17390 VssegNFunc = Intrinsic::getDeclaration(
17391 SI->getModule(), FixedVssegIntrIds[Factor - 2],
17392 {InVTy, SI->getPointerOperandType(), XLenTy});
17393 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
17395 static const Intrinsic::ID IntrIds[] = {
17396 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
17397 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
17398 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
17399 Intrinsic::riscv_vsseg8};
17401 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
17403 VL = Constant::getAllOnesValue(XLenTy);
17406 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
17407 SI->getPointerOperand(), VL});
17413 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
17414 MachineBasicBlock::instr_iterator &MBBI,
17415 const TargetInstrInfo *TII) const {
17416 assert(MBBI->isCall() && MBBI->getCFIType() &&
17417 "Invalid call instruction for a KCFI check");
17418 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
17419 MBBI->getOpcode()));
17421 MachineOperand &Target = MBBI->getOperand(0);
17422 Target.setIsRenamable(false);
17424 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
17425 .addReg(Target.getReg())
17426 .addImm(MBBI->getCFIType())
17430 #define GET_REGISTER_MATCHER
17431 #include "RISCVGenAsmMatcher.inc"
17434 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
17435 const MachineFunction &MF) const {
17436 Register Reg = MatchRegisterAltName(RegName);
17437 if (Reg == RISCV::NoRegister)
17438 Reg = MatchRegisterName(RegName);
17439 if (Reg == RISCV::NoRegister)
17440 report_fatal_error(
17441 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
17442 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
17443 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
17444 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
17445 StringRef(RegName) + "\"."));
17449 MachineMemOperand::Flags
17450 RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {
17451 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
17453 if (NontemporalInfo == nullptr)
17454 return MachineMemOperand::MONone;
17456 // 1 for default value work as __RISCV_NTLH_ALL
17457 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
17458 // 3 -> __RISCV_NTLH_ALL_PRIVATE
17459 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
17460 // 5 -> __RISCV_NTLH_ALL
17461 int NontemporalLevel = 5;
17462 const MDNode *RISCVNontemporalInfo =
17463 I.getMetadata("riscv-nontemporal-domain");
17464 if (RISCVNontemporalInfo != nullptr)
17467 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
17471 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
17472 "RISC-V target doesn't support this non-temporal domain.");
17474 NontemporalLevel -= 2;
17475 MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
17476 if (NontemporalLevel & 0b1)
17477 Flags |= MONontemporalBit0;
17478 if (NontemporalLevel & 0b10)
17479 Flags |= MONontemporalBit1;
17484 MachineMemOperand::Flags
17485 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {
17487 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
17488 MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;
17489 TargetFlags |= (NodeFlags & MONontemporalBit0);
17490 TargetFlags |= (NodeFlags & MONontemporalBit1);
17492 return TargetFlags;
17495 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
17496 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
17497 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
17500 namespace llvm::RISCVVIntrinsicsTable {
17502 #define GET_RISCVVIntrinsicsTable_IMPL
17503 #include "RISCVGenSearchableTables.inc"
17505 } // namespace llvm::RISCVVIntrinsicsTable