1 //===-- ARMInstrMVE.td - MVE support for ARM ---------------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the ARM MVE instruction set.
11 //===----------------------------------------------------------------------===//
13 class ExpandImmAsmOp<string shift> : AsmOperandClass {
14 let Name = !strconcat("ExpandImm", shift);
15 let PredicateMethod = !strconcat("isExpImm<", shift, ">");
16 let RenderMethod = "addImmOperands";
18 class InvertedExpandImmAsmOp<string shift, string size> : AsmOperandClass {
19 let Name = !strconcat("InvertedExpandImm", shift, "_", size);
20 let PredicateMethod = !strconcat("isInvertedExpImm<", shift, ",", size, ">");
21 let RenderMethod = "addImmOperands";
24 class ExpandImm<string shift> : Operand<i32> {
25 let ParserMatchClass = ExpandImmAsmOp<shift>;
26 let EncoderMethod = !strconcat("getExpandedImmOpValue<",shift,",false>");
27 let DecoderMethod = !strconcat("DecodeExpandedImmOperand<",shift,">");
28 let PrintMethod = "printExpandedImmOperand";
30 class InvertedExpandImm<string shift, string size> : Operand<i32> {
31 let ParserMatchClass = InvertedExpandImmAsmOp<shift, size>;
32 let EncoderMethod = !strconcat("getExpandedImmOpValue<",shift,",true>");
33 let PrintMethod = "printExpandedImmOperand";
34 // No decoder method needed, because this operand type is only used
35 // by aliases (VAND and VORN)
38 def expzero00 : ExpandImm<"0">;
39 def expzero08 : ExpandImm<"8">;
40 def expzero16 : ExpandImm<"16">;
41 def expzero24 : ExpandImm<"24">;
43 def expzero00inv16 : InvertedExpandImm<"0", "16">;
44 def expzero08inv16 : InvertedExpandImm<"8", "16">;
46 def expzero00inv32 : InvertedExpandImm<"0", "32">;
47 def expzero08inv32 : InvertedExpandImm<"8", "32">;
48 def expzero16inv32 : InvertedExpandImm<"16", "32">;
49 def expzero24inv32 : InvertedExpandImm<"24", "32">;
52 def vpt_mask : Operand<i32> {
53 let PrintMethod = "printVPTMask";
54 let ParserMatchClass = it_mask_asmoperand;
55 let EncoderMethod = "getVPTMaskOpValue";
56 let DecoderMethod = "DecodeVPTMaskOperand";
59 // VPT/VCMP restricted predicate for sign invariant types
60 def pred_restricted_i_asmoperand : AsmOperandClass {
61 let Name = "CondCodeRestrictedI";
62 let RenderMethod = "addITCondCodeOperands";
63 let PredicateMethod = "isITCondCodeRestrictedI";
64 let ParserMethod = "parseITCondCode";
65 let DiagnosticString = "condition code for sign-independent integer "#
66 "comparison must be EQ or NE";
69 // VPT/VCMP restricted predicate for signed types
70 def pred_restricted_s_asmoperand : AsmOperandClass {
71 let Name = "CondCodeRestrictedS";
72 let RenderMethod = "addITCondCodeOperands";
73 let PredicateMethod = "isITCondCodeRestrictedS";
74 let ParserMethod = "parseITCondCode";
75 let DiagnosticString = "condition code for signed integer "#
76 "comparison must be EQ, NE, LT, GT, LE or GE";
79 // VPT/VCMP restricted predicate for unsigned types
80 def pred_restricted_u_asmoperand : AsmOperandClass {
81 let Name = "CondCodeRestrictedU";
82 let RenderMethod = "addITCondCodeOperands";
83 let PredicateMethod = "isITCondCodeRestrictedU";
84 let ParserMethod = "parseITCondCode";
85 let DiagnosticString = "condition code for unsigned integer "#
86 "comparison must be EQ, NE, HS or HI";
89 // VPT/VCMP restricted predicate for floating point
90 def pred_restricted_fp_asmoperand : AsmOperandClass {
91 let Name = "CondCodeRestrictedFP";
92 let RenderMethod = "addITCondCodeOperands";
93 let PredicateMethod = "isITCondCodeRestrictedFP";
94 let ParserMethod = "parseITCondCode";
95 let DiagnosticString = "condition code for floating-point "#
96 "comparison must be EQ, NE, LT, GT, LE or GE";
99 class VCMPPredicateOperand : Operand<i32>;
101 def pred_basic_i : VCMPPredicateOperand {
102 let PrintMethod = "printMandatoryRestrictedPredicateOperand";
103 let ParserMatchClass = pred_restricted_i_asmoperand;
104 let DecoderMethod = "DecodeRestrictedIPredicateOperand";
105 let EncoderMethod = "getRestrictedCondCodeOpValue";
108 def pred_basic_u : VCMPPredicateOperand {
109 let PrintMethod = "printMandatoryRestrictedPredicateOperand";
110 let ParserMatchClass = pred_restricted_u_asmoperand;
111 let DecoderMethod = "DecodeRestrictedUPredicateOperand";
112 let EncoderMethod = "getRestrictedCondCodeOpValue";
115 def pred_basic_s : VCMPPredicateOperand {
116 let PrintMethod = "printMandatoryRestrictedPredicateOperand";
117 let ParserMatchClass = pred_restricted_s_asmoperand;
118 let DecoderMethod = "DecodeRestrictedSPredicateOperand";
119 let EncoderMethod = "getRestrictedCondCodeOpValue";
122 def pred_basic_fp : VCMPPredicateOperand {
123 let PrintMethod = "printMandatoryRestrictedPredicateOperand";
124 let ParserMatchClass = pred_restricted_fp_asmoperand;
125 let DecoderMethod = "DecodeRestrictedFPPredicateOperand";
126 let EncoderMethod = "getRestrictedCondCodeOpValue";
129 // Register list operands for interleaving load/stores
130 def VecList2QAsmOperand : AsmOperandClass {
131 let Name = "VecListTwoMQ";
132 let ParserMethod = "parseVectorList";
133 let RenderMethod = "addMVEVecListOperands";
134 let DiagnosticString = "operand must be a list of two consecutive "#
135 "q-registers in range [q0,q7]";
138 def VecList2Q : RegisterOperand<QQPR, "printMVEVectorListTwoQ"> {
139 let ParserMatchClass = VecList2QAsmOperand;
140 let PrintMethod = "printMVEVectorList<2>";
143 def VecList4QAsmOperand : AsmOperandClass {
144 let Name = "VecListFourMQ";
145 let ParserMethod = "parseVectorList";
146 let RenderMethod = "addMVEVecListOperands";
147 let DiagnosticString = "operand must be a list of four consecutive "#
148 "q-registers in range [q0,q7]";
151 def VecList4Q : RegisterOperand<QQQQPR, "printMVEVectorListFourQ"> {
152 let ParserMatchClass = VecList4QAsmOperand;
153 let PrintMethod = "printMVEVectorList<4>";
156 // taddrmode_imm7 := reg[r0-r7] +/- (imm7 << shift)
157 class TMemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass {
158 let Name = "TMemImm7Shift"#shift#"Offset";
159 let PredicateMethod = "isMemImm7ShiftedOffset<"#shift#",ARM::tGPRRegClassID>";
160 let RenderMethod = "addMemImmOffsetOperands";
163 class taddrmode_imm7<int shift> : MemOperand,
164 ComplexPattern<i32, 2, "SelectTAddrModeImm7<"#shift#">", []> {
165 let ParserMatchClass = TMemImm7ShiftOffsetAsmOperand<shift>;
166 // They are printed the same way as the T2 imm8 version
167 let PrintMethod = "printT2AddrModeImm8Operand<false>";
168 // This can also be the same as the T2 version.
169 let EncoderMethod = "getT2AddrModeImmOpValue<7,"#shift#">";
170 let DecoderMethod = "DecodeTAddrModeImm7<"#shift#">";
171 let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
174 // t2addrmode_imm7 := reg +/- (imm7)
175 class MemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass {
176 let Name = "MemImm7Shift"#shift#"Offset";
177 let PredicateMethod = "isMemImm7ShiftedOffset<" # shift #
178 ",ARM::GPRnopcRegClassID>";
179 let RenderMethod = "addMemImmOffsetOperands";
182 def MemImm7Shift0OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<0>;
183 def MemImm7Shift1OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<1>;
184 def MemImm7Shift2OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<2>;
185 class T2AddrMode_Imm7<int shift> : MemOperand,
186 ComplexPattern<i32, 2, "SelectT2AddrModeImm7<"#shift#">", []> {
187 let EncoderMethod = "getT2AddrModeImmOpValue<7,"#shift#">";
188 let DecoderMethod = "DecodeT2AddrModeImm7<"#shift#", 0>";
189 let ParserMatchClass =
190 !cast<AsmOperandClass>("MemImm7Shift"#shift#"OffsetAsmOperand");
191 let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
194 class t2addrmode_imm7<int shift> : T2AddrMode_Imm7<shift> {
195 // They are printed the same way as the imm8 version
196 let PrintMethod = "printT2AddrModeImm8Operand<false>";
199 class MemImm7ShiftOffsetWBAsmOperand<int shift> : AsmOperandClass {
200 let Name = "MemImm7Shift"#shift#"OffsetWB";
201 let PredicateMethod = "isMemImm7ShiftedOffset<" # shift #
202 ",ARM::rGPRRegClassID>";
203 let RenderMethod = "addMemImmOffsetOperands";
206 def MemImm7Shift0OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<0>;
207 def MemImm7Shift1OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<1>;
208 def MemImm7Shift2OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<2>;
210 class t2addrmode_imm7_pre<int shift> : T2AddrMode_Imm7<shift> {
211 // They are printed the same way as the imm8 version
212 let PrintMethod = "printT2AddrModeImm8Operand<true>";
213 let ParserMatchClass =
214 !cast<AsmOperandClass>("MemImm7Shift"#shift#"OffsetWBAsmOperand");
215 let DecoderMethod = "DecodeT2AddrModeImm7<"#shift#", 1>";
216 let MIOperandInfo = (ops rGPR:$base, i32imm:$offsim);
219 class t2am_imm7shiftOffsetAsmOperand<int shift>
220 : AsmOperandClass { let Name = "Imm7Shift"#shift; }
221 def t2am_imm7shift0OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<0>;
222 def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>;
223 def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>;
225 class t2am_imm7_offset<int shift> : MemOperand,
226 ComplexPattern<i32, 1, "SelectT2AddrModeImm7Offset<"#shift#">",
227 [], [SDNPWantRoot]> {
228 // They are printed the same way as the imm8 version
229 let PrintMethod = "printT2AddrModeImm8OffsetOperand";
230 let ParserMatchClass =
231 !cast<AsmOperandClass>("t2am_imm7shift"#shift#"OffsetAsmOperand");
232 let EncoderMethod = "getT2ScaledImmOpValue<7,"#shift#">";
233 let DecoderMethod = "DecodeT2Imm7<"#shift#">";
236 // Operands for gather/scatter loads of the form [Rbase, Qoffsets]
237 class MemRegRQOffsetAsmOperand<int shift> : AsmOperandClass {
238 let Name = "MemRegRQS"#shift#"Offset";
239 let PredicateMethod = "isMemRegRQOffset<"#shift#">";
240 let RenderMethod = "addMemRegRQOffsetOperands";
243 def MemRegRQS0OffsetAsmOperand : MemRegRQOffsetAsmOperand<0>;
244 def MemRegRQS1OffsetAsmOperand : MemRegRQOffsetAsmOperand<1>;
245 def MemRegRQS2OffsetAsmOperand : MemRegRQOffsetAsmOperand<2>;
246 def MemRegRQS3OffsetAsmOperand : MemRegRQOffsetAsmOperand<3>;
248 // mve_addr_rq_shift := reg + vreg{ << UXTW #shift}
249 class mve_addr_rq_shift<int shift> : MemOperand {
250 let EncoderMethod = "getMveAddrModeRQOpValue";
251 let PrintMethod = "printMveAddrModeRQOperand<"#shift#">";
252 let ParserMatchClass =
253 !cast<AsmOperandClass>("MemRegRQS"#shift#"OffsetAsmOperand");
254 let DecoderMethod = "DecodeMveAddrModeRQ";
255 let MIOperandInfo = (ops GPRnopc:$base, MQPR:$offsreg);
258 class MemRegQOffsetAsmOperand<int shift> : AsmOperandClass {
259 let Name = "MemRegQS"#shift#"Offset";
260 let PredicateMethod = "isMemRegQOffset<"#shift#">";
261 let RenderMethod = "addMemImmOffsetOperands";
264 def MemRegQS2OffsetAsmOperand : MemRegQOffsetAsmOperand<2>;
265 def MemRegQS3OffsetAsmOperand : MemRegQOffsetAsmOperand<3>;
267 // mve_addr_q_shift := vreg {+ #imm7s2/4}
268 class mve_addr_q_shift<int shift> : MemOperand {
269 let EncoderMethod = "getMveAddrModeQOpValue<"#shift#">";
270 // Can be printed same way as other reg + imm operands
271 let PrintMethod = "printT2AddrModeImm8Operand<false>";
272 let ParserMatchClass =
273 !cast<AsmOperandClass>("MemRegQS"#shift#"OffsetAsmOperand");
274 let DecoderMethod = "DecodeMveAddrModeQ<"#shift#">";
275 let MIOperandInfo = (ops MQPR:$base, i32imm:$imm);
278 // A family of classes wrapping up information about the vector types
280 class MVEVectorVTInfo<ValueType vec, ValueType dblvec, ValueType pred,
281 bits<2> size, string suffixletter, bit unsigned> {
282 // The LLVM ValueType representing the vector, so we can use it in
286 // The LLVM ValueType representing a vector with elements double the size
287 // of those in Vec, so we can use it in ISel patterns. It is up to the
288 // invoker of this class to ensure that this is a correct choice.
289 ValueType DblVec = dblvec;
291 // An LLVM ValueType representing a corresponding vector of
292 // predicate bits, for use in ISel patterns that handle an IR
293 // intrinsic describing the predicated form of the instruction.
295 // Usually, for a vector of N things, this will be vNi1. But for
296 // vectors of 2 values, we make an exception, and use v4i1 instead
297 // of v2i1. Rationale: MVE codegen doesn't support doing all the
298 // auxiliary operations on v2i1 (vector shuffles etc), and also,
299 // there's no MVE compare instruction that will _generate_ v2i1
301 ValueType Pred = pred;
303 // The most common representation of the vector element size in MVE
304 // instruction encodings: a 2-bit value V representing an (8<<V)-bit
308 // For vectors explicitly mentioning a signedness of integers: 0 for
309 // signed and 1 for unsigned. For anything else, undefined.
310 bit Unsigned = unsigned;
312 // The number of bits in a vector element, in integer form.
313 int LaneBits = !shl(8, Size);
315 // The suffix used in assembly language on an instruction operating
316 // on this lane if it only cares about number of bits.
317 string BitsSuffix = !if(!eq(suffixletter, "p"),
318 !if(!eq(unsigned, 0b0), "8", "16"),
319 !cast<string>(LaneBits));
321 // The suffix used on an instruction that mentions the whole type.
322 string Suffix = suffixletter ## BitsSuffix;
324 // The letter part of the suffix only.
325 string SuffixLetter = suffixletter;
328 // Integer vector types that don't treat signed and unsigned differently.
329 def MVE_v16i8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "i", ?>;
330 def MVE_v8i16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "i", ?>;
331 def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "i", ?>;
332 def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "i", ?>;
334 // Explicitly signed and unsigned integer vectors. They map to the
335 // same set of LLVM ValueTypes as above, but are represented
336 // differently in assembly and instruction encodings.
337 def MVE_v16s8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "s", 0b0>;
338 def MVE_v8s16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "s", 0b0>;
339 def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "s", 0b0>;
340 def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "s", 0b0>;
341 def MVE_v16u8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "u", 0b1>;
342 def MVE_v8u16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "u", 0b1>;
343 def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "u", 0b1>;
344 def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "u", 0b1>;
347 def MVE_v8f16 : MVEVectorVTInfo<v8f16, v4f32, v8i1, 0b01, "f", ?>;
348 def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, 0b10, "f", ?>;
349 def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, 0b11, "f", ?>;
351 // Polynomial vector types.
352 def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b11, "p", 0b0>;
353 def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b11, "p", 0b1>;
355 // --------- Start of base classes for the instructions themselves
357 class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
358 string ops, string cstr, list<dag> pattern>
359 : Thumb2XI<oops, iops, AddrModeNone, 4, itin, !strconcat(asm, "\t", ops), cstr,
361 Requires<[HasMVEInt]> {
363 let DecoderNamespace = "MVE";
366 // MVE_p is used for most predicated instructions, to add the cluster
367 // of input operands that provides the VPT suffix (none, T or E) and
368 // the input predicate register.
369 class MVE_p<dag oops, dag iops, InstrItinClass itin, string iname,
370 string suffix, string ops, vpred_ops vpred, string cstr,
371 list<dag> pattern=[]>
372 : MVE_MI<oops, !con(iops, (ins vpred:$vp)), itin,
373 // If the instruction has a suffix, like vadd.f32, then the
374 // VPT predication suffix goes before the dot, so the full
375 // name has to be "vadd${vp}.f32".
376 !strconcat(iname, "${vp}",
377 !if(!eq(suffix, ""), "", !strconcat(".", suffix))),
378 ops, !strconcat(cstr, vpred.vpred_constraint), pattern> {
379 let Inst{31-29} = 0b111;
380 let Inst{27-26} = 0b11;
383 class MVE_f<dag oops, dag iops, InstrItinClass itin, string iname,
384 string suffix, string ops, vpred_ops vpred, string cstr,
385 list<dag> pattern=[]>
386 : MVE_p<oops, iops, itin, iname, suffix, ops, vpred, cstr, pattern> {
387 let Predicates = [HasMVEFloat];
390 class MVE_MI_with_pred<dag oops, dag iops, InstrItinClass itin, string asm,
391 string ops, string cstr, list<dag> pattern>
392 : Thumb2I<oops, iops, AddrModeNone, 4, itin, asm, !strconcat("\t", ops), cstr,
394 Requires<[HasV8_1MMainline, HasMVEInt]> {
396 let DecoderNamespace = "MVE";
399 class MVE_VMOV_lane_base<dag oops, dag iops, InstrItinClass itin, string asm,
400 string suffix, string ops, string cstr,
402 : Thumb2I<oops, iops, AddrModeNone, 4, itin, asm,
403 !if(!eq(suffix, ""), "", "." # suffix) # "\t" # ops,
405 Requires<[HasV8_1MMainline, HasMVEInt]> {
407 let DecoderNamespace = "MVE";
410 class MVE_ScalarShift<string iname, dag oops, dag iops, string asm, string cstr,
411 list<dag> pattern=[]>
412 : MVE_MI_with_pred<oops, iops, NoItinerary, iname, asm, cstr, pattern> {
413 let Inst{31-20} = 0b111010100101;
418 class MVE_ScalarShiftSingleReg<string iname, dag iops, string asm, string cstr,
419 list<dag> pattern=[]>
420 : MVE_ScalarShift<iname, (outs rGPR:$RdaDest), iops, asm, cstr, pattern> {
423 let Inst{19-16} = RdaDest{3-0};
426 class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4>
427 : MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, long_shift:$imm),
428 "$RdaSrc, $imm", "$RdaDest = $RdaSrc",
430 (i32 (!cast<Intrinsic>("int_arm_mve_" # iname)
431 (i32 rGPR:$RdaSrc), (i32 imm:$imm))))]> {
435 let Inst{14-12} = imm{4-2};
436 let Inst{11-8} = 0b1111;
437 let Inst{7-6} = imm{1-0};
438 let Inst{5-4} = op5_4{1-0};
439 let Inst{3-0} = 0b1111;
442 def MVE_SQSHL : MVE_ScalarShiftSRegImm<"sqshl", 0b11>;
443 def MVE_SRSHR : MVE_ScalarShiftSRegImm<"srshr", 0b10>;
444 def MVE_UQSHL : MVE_ScalarShiftSRegImm<"uqshl", 0b00>;
445 def MVE_URSHR : MVE_ScalarShiftSRegImm<"urshr", 0b01>;
447 class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4>
448 : MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, rGPR:$Rm),
449 "$RdaSrc, $Rm", "$RdaDest = $RdaSrc",
451 (i32 (!cast<Intrinsic>("int_arm_mve_" # iname)
452 (i32 rGPR:$RdaSrc), (i32 rGPR:$Rm))))]> {
455 let Inst{15-12} = Rm{3-0};
456 let Inst{11-8} = 0b1111;
457 let Inst{7-6} = 0b00;
458 let Inst{5-4} = op5_4{1-0};
459 let Inst{3-0} = 0b1101;
461 let Unpredictable{8-6} = 0b111;
464 def MVE_SQRSHR : MVE_ScalarShiftSRegReg<"sqrshr", 0b10>;
465 def MVE_UQRSHL : MVE_ScalarShiftSRegReg<"uqrshl", 0b00>;
467 class MVE_ScalarShiftDoubleReg<string iname, dag iops, string asm,
468 string cstr, list<dag> pattern=[]>
469 : MVE_ScalarShift<iname, (outs tGPREven:$RdaLo, tGPROdd:$RdaHi),
470 iops, asm, cstr, pattern> {
474 let Inst{19-17} = RdaLo{3-1};
475 let Inst{11-9} = RdaHi{3-1};
478 class MVE_ScalarShiftDRegImm<string iname, bits<2> op5_4, bit op16,
479 list<dag> pattern=[]>
480 : MVE_ScalarShiftDoubleReg<
481 iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, long_shift:$imm),
482 "$RdaLo, $RdaHi, $imm", "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
488 let Inst{14-12} = imm{4-2};
489 let Inst{7-6} = imm{1-0};
490 let Inst{5-4} = op5_4{1-0};
491 let Inst{3-0} = 0b1111;
494 class MVE_ScalarShiftDRegRegBase<string iname, dag iops, string asm,
495 bit op5, bit op16, list<dag> pattern=[]>
496 : MVE_ScalarShiftDoubleReg<
497 iname, iops, asm, "@earlyclobber $RdaHi,@earlyclobber $RdaLo,"
498 "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
503 let Inst{15-12} = Rm{3-0};
507 let Inst{3-0} = 0b1101;
509 // Custom decoder method because of the following overlapping encodings:
512 // SQRSHRL and SQRSHR
513 // UQRSHLL and UQRSHL
514 let DecoderMethod = "DecodeMVEOverlappingLongShift";
517 class MVE_ScalarShiftDRegReg<string iname, bit op5, list<dag> pattern=[]>
518 : MVE_ScalarShiftDRegRegBase<
519 iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm),
520 "$RdaLo, $RdaHi, $Rm", op5, 0b0, pattern> {
525 class MVE_ScalarShiftDRegRegWithSat<string iname, bit op5, list<dag> pattern=[]>
526 : MVE_ScalarShiftDRegRegBase<
527 iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm, saturateop:$sat),
528 "$RdaLo, $RdaHi, $sat, $Rm", op5, 0b1, pattern> {
534 def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
535 (ARMasrl tGPREven:$RdaLo_src,
536 tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
537 def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
538 (ARMasrl tGPREven:$RdaLo_src,
539 tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
540 def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
541 (ARMlsll tGPREven:$RdaLo_src,
542 tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
543 def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
544 (ARMlsll tGPREven:$RdaLo_src,
545 tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
546 def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
547 (ARMlsrl tGPREven:$RdaLo_src,
548 tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
550 def MVE_SQRSHRL : MVE_ScalarShiftDRegRegWithSat<"sqrshrl", 0b1>;
551 def MVE_SQSHLL : MVE_ScalarShiftDRegImm<"sqshll", 0b11, 0b1>;
552 def MVE_SRSHRL : MVE_ScalarShiftDRegImm<"srshrl", 0b10, 0b1>;
554 def MVE_UQRSHLL : MVE_ScalarShiftDRegRegWithSat<"uqrshll", 0b0>;
555 def MVE_UQSHLL : MVE_ScalarShiftDRegImm<"uqshll", 0b00, 0b1>;
556 def MVE_URSHRL : MVE_ScalarShiftDRegImm<"urshrl", 0b01, 0b1>;
558 // start of mve_rDest instructions
560 class MVE_rDest<dag oops, dag iops, InstrItinClass itin,
561 string iname, string suffix,
562 string ops, string cstr, list<dag> pattern=[]>
563 // Always use vpred_n and not vpred_r: with the output register being
564 // a GPR and not a vector register, there can't be any question of
565 // what to put in its inactive lanes.
566 : MVE_p<oops, iops, itin, iname, suffix, ops, vpred_n, cstr, pattern> {
568 let Inst{25-23} = 0b101;
569 let Inst{11-9} = 0b111;
573 class MVE_VABAV<string suffix, bit U, bits<2> size>
574 : MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm),
575 NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src",
583 let Inst{21-20} = size{1-0};
584 let Inst{19-17} = Qn{2-0};
586 let Inst{15-12} = Rda{3-0};
591 let Inst{3-1} = Qm{2-0};
595 multiclass MVE_VABAV_m<MVEVectorVTInfo VTI> {
596 def "" : MVE_VABAV<VTI.Suffix, VTI.Unsigned, VTI.Size>;
597 defvar Inst = !cast<Instruction>(NAME);
599 let Predicates = [HasMVEInt] in {
600 def : Pat<(i32 (int_arm_mve_vabav
603 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
604 (i32 (Inst (i32 rGPR:$Rda_src),
605 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
607 def : Pat<(i32 (int_arm_mve_vabav_predicated
610 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
611 (VTI.Pred VCCR:$mask))),
612 (i32 (Inst (i32 rGPR:$Rda_src),
613 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
614 ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
618 defm MVE_VABAVs8 : MVE_VABAV_m<MVE_v16s8>;
619 defm MVE_VABAVs16 : MVE_VABAV_m<MVE_v8s16>;
620 defm MVE_VABAVs32 : MVE_VABAV_m<MVE_v4s32>;
621 defm MVE_VABAVu8 : MVE_VABAV_m<MVE_v16u8>;
622 defm MVE_VABAVu16 : MVE_VABAV_m<MVE_v8u16>;
623 defm MVE_VABAVu32 : MVE_VABAV_m<MVE_v4u32>;
625 class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
626 bit A, bit U, bits<2> size, list<dag> pattern=[]>
627 : MVE_rDest<(outs tGPREven:$Rda), iops, NoItinerary,
628 iname, suffix, "$Rda, $Qm", cstr, pattern> {
633 let Inst{22-20} = 0b111;
634 let Inst{19-18} = size{1-0};
635 let Inst{17-16} = 0b01;
636 let Inst{15-13} = Rda{3-1};
638 let Inst{8-6} = 0b100;
640 let Inst{3-1} = Qm{2-0};
644 multiclass MVE_VADDV_A<string suffix, bit U, bits<2> size,
645 list<dag> pattern=[]> {
646 def acc : MVE_VADDV<"vaddva", suffix,
647 (ins tGPREven:$Rda_src, MQPR:$Qm), "$Rda = $Rda_src",
648 0b1, U, size, pattern>;
649 def no_acc : MVE_VADDV<"vaddv", suffix,
651 0b0, U, size, pattern>;
654 defm MVE_VADDVs8 : MVE_VADDV_A<"s8", 0b0, 0b00>;
655 defm MVE_VADDVs16 : MVE_VADDV_A<"s16", 0b0, 0b01>;
656 defm MVE_VADDVs32 : MVE_VADDV_A<"s32", 0b0, 0b10>;
657 defm MVE_VADDVu8 : MVE_VADDV_A<"u8", 0b1, 0b00>;
658 defm MVE_VADDVu16 : MVE_VADDV_A<"u16", 0b1, 0b01>;
659 defm MVE_VADDVu32 : MVE_VADDV_A<"u32", 0b1, 0b10>;
661 let Predicates = [HasMVEInt] in {
662 def : Pat<(i32 (vecreduce_add (v4i32 MQPR:$src))), (i32 (MVE_VADDVu32no_acc $src))>;
663 def : Pat<(i32 (vecreduce_add (v8i16 MQPR:$src))), (i32 (MVE_VADDVu16no_acc $src))>;
664 def : Pat<(i32 (vecreduce_add (v16i8 MQPR:$src))), (i32 (MVE_VADDVu8no_acc $src))>;
665 def : Pat<(i32 (add (i32 (vecreduce_add (v4i32 MQPR:$src1))), (i32 tGPR:$src2))),
666 (i32 (MVE_VADDVu32acc $src2, $src1))>;
667 def : Pat<(i32 (add (i32 (vecreduce_add (v8i16 MQPR:$src1))), (i32 tGPR:$src2))),
668 (i32 (MVE_VADDVu16acc $src2, $src1))>;
669 def : Pat<(i32 (add (i32 (vecreduce_add (v16i8 MQPR:$src1))), (i32 tGPR:$src2))),
670 (i32 (MVE_VADDVu8acc $src2, $src1))>;
674 class MVE_VADDLV<string iname, string suffix, dag iops, string cstr,
675 bit A, bit U, list<dag> pattern=[]>
676 : MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname,
677 suffix, "$RdaLo, $RdaHi, $Qm", cstr, pattern> {
683 let Inst{22-20} = RdaHi{3-1};
684 let Inst{19-18} = 0b10;
685 let Inst{17-16} = 0b01;
686 let Inst{15-13} = RdaLo{3-1};
688 let Inst{8-6} = 0b100;
690 let Inst{3-1} = Qm{2-0};
694 multiclass MVE_VADDLV_A<string suffix, bit U, list<dag> pattern=[]> {
695 def acc : MVE_VADDLV<"vaddlva", suffix,
696 (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, MQPR:$Qm),
697 "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
699 def no_acc : MVE_VADDLV<"vaddlv", suffix,
705 defm MVE_VADDLVs32 : MVE_VADDLV_A<"s32", 0b0>;
706 defm MVE_VADDLVu32 : MVE_VADDLV_A<"u32", 0b1>;
708 class MVE_VMINMAXNMV<string iname, string suffix, bit sz,
709 bit bit_17, bit bit_7, list<dag> pattern=[]>
710 : MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm),
711 NoItinerary, iname, suffix, "$RdaSrc, $Qm",
712 "$RdaDest = $RdaSrc", pattern> {
717 let Inst{22-20} = 0b110;
718 let Inst{19-18} = 0b11;
719 let Inst{17} = bit_17;
721 let Inst{15-12} = RdaDest{3-0};
724 let Inst{6-5} = 0b00;
725 let Inst{3-1} = Qm{2-0};
728 let Predicates = [HasMVEFloat];
731 multiclass MVE_VMINMAXNMV_fty<string iname, bit bit_7, list<dag> pattern=[]> {
732 def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b1, bit_7, pattern>;
733 def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b1, bit_7, pattern>;
736 defm MVE_VMINNMV : MVE_VMINMAXNMV_fty<"vminnmv", 0b1>;
737 defm MVE_VMAXNMV : MVE_VMINMAXNMV_fty<"vmaxnmv", 0b0>;
739 multiclass MVE_VMINMAXNMAV_fty<string iname, bit bit_7, list<dag> pattern=[]> {
740 def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b0, bit_7, pattern>;
741 def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b0, bit_7, pattern>;
744 defm MVE_VMINNMAV : MVE_VMINMAXNMAV_fty<"vminnmav", 0b1>;
745 defm MVE_VMAXNMAV : MVE_VMINMAXNMAV_fty<"vmaxnmav", 0b0>;
747 class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
748 bit bit_17, bit bit_7, list<dag> pattern=[]>
749 : MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary,
750 iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", pattern> {
755 let Inst{22-20} = 0b110;
756 let Inst{19-18} = size{1-0};
757 let Inst{17} = bit_17;
759 let Inst{15-12} = RdaDest{3-0};
762 let Inst{6-5} = 0b00;
763 let Inst{3-1} = Qm{2-0};
767 multiclass MVE_VMINMAXV_p<string iname, bit bit_17, bit bit_7,
768 MVEVectorVTInfo VTI, Intrinsic intr> {
769 def "": MVE_VMINMAXV<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
771 defvar Inst = !cast<Instruction>(NAME);
773 let Predicates = [HasMVEInt] in
774 def _pat : Pat<(i32 (intr (i32 rGPR:$prev), (VTI.Vec MQPR:$vec))),
775 (i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;
778 multiclass MVE_VMINMAXV_ty<string iname, bit bit_7,
779 Intrinsic intr_s, Intrinsic intr_u> {
780 defm s8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16s8, intr_s>;
781 defm s16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8s16, intr_s>;
782 defm s32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4s32, intr_s>;
783 defm u8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16u8, intr_u>;
784 defm u16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8u16, intr_u>;
785 defm u32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4u32, intr_u>;
788 defm MVE_VMINV : MVE_VMINMAXV_ty<
789 "vminv", 0b1, int_arm_mve_minv_s, int_arm_mve_minv_u>;
790 defm MVE_VMAXV : MVE_VMINMAXV_ty<
791 "vmaxv", 0b0, int_arm_mve_maxv_s, int_arm_mve_maxv_u>;
793 let Predicates = [HasMVEInt] in {
794 def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),
795 (i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>;
796 def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))),
797 (i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>;
798 def : Pat<(i32 (vecreduce_smax (v4i32 MQPR:$src))),
799 (i32 (MVE_VMAXVs32 (t2MOVi (i32 -2147483648)), $src))>;
800 def : Pat<(i32 (vecreduce_umax (v16i8 MQPR:$src))),
801 (i32 (MVE_VMAXVu8 (t2MOVi (i32 0)), $src))>;
802 def : Pat<(i32 (vecreduce_umax (v8i16 MQPR:$src))),
803 (i32 (MVE_VMAXVu16 (t2MOVi (i32 0)), $src))>;
804 def : Pat<(i32 (vecreduce_umax (v4i32 MQPR:$src))),
805 (i32 (MVE_VMAXVu32 (t2MOVi (i32 0)), $src))>;
807 def : Pat<(i32 (vecreduce_smin (v16i8 MQPR:$src))),
808 (i32 (MVE_VMINVs8 (t2MOVi (i32 127)), $src))>;
809 def : Pat<(i32 (vecreduce_smin (v8i16 MQPR:$src))),
810 (i32 (MVE_VMINVs16 (t2MOVi16 (i32 32767)), $src))>;
811 def : Pat<(i32 (vecreduce_smin (v4i32 MQPR:$src))),
812 (i32 (MVE_VMINVs32 (t2MVNi (i32 -2147483648)), $src))>;
813 def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))),
814 (i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>;
815 def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))),
816 (i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>;
817 def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),
818 (i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;
822 multiclass MVE_VMINMAXAV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
823 def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b0, bit_7>;
824 def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b0, bit_7>;
825 def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b0, bit_7>;
828 defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>;
829 defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>;
831 class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
832 bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0>
833 : MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix,
834 "$RdaDest, $Qn, $Qm", cstr, []> {
839 let Inst{28} = bit_28;
840 let Inst{22-20} = 0b111;
841 let Inst{19-17} = Qn{2-0};
843 let Inst{15-13} = RdaDest{3-1};
846 let Inst{7-6} = 0b00;
848 let Inst{3-1} = Qm{2-0};
852 multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
853 bit sz, bit bit_28, bit X, bit bit_8, bit bit_0> {
854 def ""#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # x, VTI.Suffix,
855 (ins MQPR:$Qn, MQPR:$Qm), "",
856 sz, bit_28, 0b0, X, bit_8, bit_0>;
857 def "a"#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # "a" # x, VTI.Suffix,
858 (ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm),
859 "$RdaDest = $RdaSrc",
860 sz, bit_28, 0b1, X, bit_8, bit_0>;
861 let Predicates = [HasMVEInt] in {
862 def : Pat<(i32 (int_arm_mve_vmldava
864 (i32 bit_0) /* subtract */,
865 (i32 X) /* exchange */,
866 (i32 0) /* accumulator */,
867 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
868 (i32 (!cast<Instruction>(NAME # x # VTI.Suffix)
869 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
871 def : Pat<(i32 (int_arm_mve_vmldava_predicated
873 (i32 bit_0) /* subtract */,
874 (i32 X) /* exchange */,
875 (i32 0) /* accumulator */,
876 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
877 (VTI.Pred VCCR:$mask))),
878 (i32 (!cast<Instruction>(NAME # x # VTI.Suffix)
879 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
880 ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
882 def : Pat<(i32 (int_arm_mve_vmldava
884 (i32 bit_0) /* subtract */,
885 (i32 X) /* exchange */,
886 (i32 tGPREven:$RdaSrc),
887 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
888 (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix)
889 (i32 tGPREven:$RdaSrc),
890 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
892 def : Pat<(i32 (int_arm_mve_vmldava_predicated
894 (i32 bit_0) /* subtract */,
895 (i32 X) /* exchange */,
896 (i32 tGPREven:$RdaSrc),
897 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
898 (VTI.Pred VCCR:$mask))),
899 (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix)
900 (i32 tGPREven:$RdaSrc),
901 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
902 ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
906 multiclass MVE_VMLAMLSDAV_AX<string iname, MVEVectorVTInfo VTI, bit sz,
907 bit bit_28, bit bit_8, bit bit_0> {
908 defm "" : MVE_VMLAMLSDAV_A<iname, "", VTI, sz, bit_28,
910 defm "" : MVE_VMLAMLSDAV_A<iname, "x", VTI, sz, bit_28,
914 multiclass MVE_VMLADAV_multi<MVEVectorVTInfo SVTI, MVEVectorVTInfo UVTI,
916 defm "" : MVE_VMLAMLSDAV_AX<"vmladav", SVTI,
917 sz, 0b0, bit_8, 0b0>;
918 defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", UVTI,
919 sz, 0b1, 0b0, bit_8, 0b0>;
922 multiclass MVE_VMLSDAV_multi<MVEVectorVTInfo VTI, bit sz, bit bit_28> {
923 defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", VTI,
924 sz, bit_28, 0b0, 0b1>;
927 defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v16s8, MVE_v16u8, 0b0, 0b1>;
928 defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v8s16, MVE_v8u16, 0b0, 0b0>;
929 defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v4s32, MVE_v4u32, 0b1, 0b0>;
931 defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v16s8, 0b0, 0b1>;
932 defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v8s16, 0b0, 0b0>;
933 defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v4s32, 0b1, 0b0>;
935 // vmlav aliases vmladav
936 foreach acc = ["", "a"] in {
937 foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32"] in {
938 def : MVEInstAlias<"vmlav"#acc#"${vp}."#suffix#"\t$RdaDest, $Qn, $Qm",
939 (!cast<Instruction>("MVE_VMLADAV"#acc#suffix)
940 tGPREven:$RdaDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
944 // Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH
945 class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
946 bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
947 list<dag> pattern=[]>
948 : MVE_rDest<(outs tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest), iops, NoItinerary,
949 iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, pattern> {
955 let Inst{28} = bit_28;
956 let Inst{22-20} = RdaHiDest{3-1};
957 let Inst{19-17} = Qn{2-0};
959 let Inst{15-13} = RdaLoDest{3-1};
962 let Inst{7-6} = 0b00;
964 let Inst{3-1} = Qm{2-0};
968 multiclass MVE_VMLALDAVBase_A<string iname, string x, string suffix,
969 bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
970 list<dag> pattern=[]> {
971 def ""#x#suffix : MVE_VMLALDAVBase<
972 iname # x, suffix, (ins MQPR:$Qn, MQPR:$Qm), "",
973 sz, bit_28, 0b0, X, bit_8, bit_0, pattern>;
974 def "a"#x#suffix : MVE_VMLALDAVBase<
975 iname # "a" # x, suffix,
976 (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, MQPR:$Qn, MQPR:$Qm),
977 "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc",
978 sz, bit_28, 0b1, X, bit_8, bit_0, pattern>;
982 multiclass MVE_VMLALDAVBase_AX<string iname, string suffix, bit sz, bit bit_28,
983 bit bit_8, bit bit_0, list<dag> pattern=[]> {
984 defm "" : MVE_VMLALDAVBase_A<iname, "", suffix, sz,
985 bit_28, 0b0, bit_8, bit_0, pattern>;
986 defm "" : MVE_VMLALDAVBase_A<iname, "x", suffix, sz,
987 bit_28, 0b1, bit_8, bit_0, pattern>;
990 multiclass MVE_VRMLALDAVH_multi<string suffix, list<dag> pattern=[]> {
991 defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#suffix,
992 0b0, 0b0, 0b1, 0b0, pattern>;
993 defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#suffix,
994 0b0, 0b1, 0b0, 0b1, 0b0, pattern>;
997 defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<"32">;
999 // vrmlalvh aliases for vrmlaldavh
1000 def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
1002 tGPREven:$RdaLo, tGPROdd:$RdaHi,
1003 MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
1004 def : MVEInstAlias<"vrmlalvha${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
1006 tGPREven:$RdaLo, tGPROdd:$RdaHi,
1007 MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
1008 def : MVEInstAlias<"vrmlalvh${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
1010 tGPREven:$RdaLo, tGPROdd:$RdaHi,
1011 MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
1012 def : MVEInstAlias<"vrmlalvha${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
1014 tGPREven:$RdaLo, tGPROdd:$RdaHi,
1015 MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
1017 multiclass MVE_VMLALDAV_multi<string suffix, bit sz, list<dag> pattern=[]> {
1018 defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#suffix, sz, 0b0, 0b0, 0b0, pattern>;
1019 defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#suffix,
1020 sz, 0b1, 0b0, 0b0, 0b0, pattern>;
1023 defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>;
1024 defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>;
1026 // vmlalv aliases vmlaldav
1027 foreach acc = ["", "a"] in {
1028 foreach suffix = ["s16", "s32", "u16", "u32"] in {
1029 def : MVEInstAlias<"vmlalv" # acc # "${vp}." # suffix #
1030 "\t$RdaLoDest, $RdaHiDest, $Qn, $Qm",
1031 (!cast<Instruction>("MVE_VMLALDAV"#acc#suffix)
1032 tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest,
1033 MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
1037 multiclass MVE_VMLSLDAV_multi<string iname, string suffix, bit sz,
1038 bit bit_28, list<dag> pattern=[]> {
1039 defm "" : MVE_VMLALDAVBase_AX<iname, suffix, sz, bit_28, 0b0, 0b1, pattern>;
1042 defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>;
1043 defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>;
1044 defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>;
1046 // end of mve_rDest instructions
1048 // start of mve_comp instructions
1050 class MVE_comp<InstrItinClass itin, string iname, string suffix,
1051 string cstr, list<dag> pattern=[]>
1052 : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), itin, iname, suffix,
1053 "$Qd, $Qn, $Qm", vpred_r, cstr, pattern> {
1058 let Inst{22} = Qd{3};
1059 let Inst{19-17} = Qn{2-0};
1061 let Inst{15-13} = Qd{2-0};
1063 let Inst{10-9} = 0b11;
1064 let Inst{7} = Qn{3};
1065 let Inst{5} = Qm{3};
1066 let Inst{3-1} = Qm{2-0};
1070 class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
1071 list<dag> pattern=[]>
1072 : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
1075 let Inst{25-24} = 0b11;
1077 let Inst{21} = bit_21;
1084 let Predicates = [HasMVEFloat];
1087 def MVE_VMAXNMf32 : MVE_VMINMAXNM<"vmaxnm", "f32", 0b0, 0b0>;
1088 def MVE_VMAXNMf16 : MVE_VMINMAXNM<"vmaxnm", "f16", 0b1, 0b0>;
1090 let Predicates = [HasMVEFloat] in {
1091 def : Pat<(v4f32 (fmaxnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
1092 (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
1093 def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
1094 (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
1095 def : Pat<(v4f32 (int_arm_mve_max_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), (i32 0),
1096 (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
1097 (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
1098 ARMVCCThen, (v4i1 VCCR:$mask),
1099 (v4f32 MQPR:$inactive)))>;
1100 def : Pat<(v8f16 (int_arm_mve_max_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), (i32 0),
1101 (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
1102 (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
1103 ARMVCCThen, (v8i1 VCCR:$mask),
1104 (v8f16 MQPR:$inactive)))>;
1107 def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>;
1108 def MVE_VMINNMf16 : MVE_VMINMAXNM<"vminnm", "f16", 0b1, 0b1>;
1110 let Predicates = [HasMVEFloat] in {
1111 def : Pat<(v4f32 (fminnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
1112 (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
1113 def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
1114 (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
1115 def : Pat<(v4f32 (int_arm_mve_min_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
1116 (i32 0), (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
1117 (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
1118 ARMVCCThen, (v4i1 VCCR:$mask),
1119 (v4f32 MQPR:$inactive)))>;
1120 def : Pat<(v8f16 (int_arm_mve_min_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
1121 (i32 0), (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
1122 (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
1123 ARMVCCThen, (v8i1 VCCR:$mask),
1124 (v8f16 MQPR:$inactive)))>;
1128 class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
1129 bit bit_4, list<dag> pattern=[]>
1130 : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
1133 let Inst{25-24} = 0b11;
1135 let Inst{21-20} = size{1-0};
1139 let Inst{4} = bit_4;
1140 let validForTailPredication = 1;
1143 multiclass MVE_VMINMAX_m<string iname, bit bit_4, MVEVectorVTInfo VTI,
1144 SDNode unpred_op, Intrinsic pred_int> {
1145 def "" : MVE_VMINMAX<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, bit_4>;
1146 defvar Inst = !cast<Instruction>(NAME);
1148 let Predicates = [HasMVEInt] in {
1149 // Unpredicated min/max
1150 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
1151 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
1153 // Predicated min/max
1154 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1155 (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
1156 (VTI.Vec MQPR:$inactive))),
1157 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1158 ARMVCCThen, (VTI.Pred VCCR:$mask),
1159 (VTI.Vec MQPR:$inactive)))>;
1163 multiclass MVE_VMAX<MVEVectorVTInfo VTI>
1164 : MVE_VMINMAX_m<"vmax", 0b0, VTI, !if(VTI.Unsigned, umax, smax), int_arm_mve_max_predicated>;
1165 multiclass MVE_VMIN<MVEVectorVTInfo VTI>
1166 : MVE_VMINMAX_m<"vmin", 0b1, VTI, !if(VTI.Unsigned, umin, smin), int_arm_mve_min_predicated>;
1168 defm MVE_VMINs8 : MVE_VMIN<MVE_v16s8>;
1169 defm MVE_VMINs16 : MVE_VMIN<MVE_v8s16>;
1170 defm MVE_VMINs32 : MVE_VMIN<MVE_v4s32>;
1171 defm MVE_VMINu8 : MVE_VMIN<MVE_v16u8>;
1172 defm MVE_VMINu16 : MVE_VMIN<MVE_v8u16>;
1173 defm MVE_VMINu32 : MVE_VMIN<MVE_v4u32>;
1175 defm MVE_VMAXs8 : MVE_VMAX<MVE_v16s8>;
1176 defm MVE_VMAXs16 : MVE_VMAX<MVE_v8s16>;
1177 defm MVE_VMAXs32 : MVE_VMAX<MVE_v4s32>;
1178 defm MVE_VMAXu8 : MVE_VMAX<MVE_v16u8>;
1179 defm MVE_VMAXu16 : MVE_VMAX<MVE_v8u16>;
1180 defm MVE_VMAXu32 : MVE_VMAX<MVE_v4u32>;
1182 // end of mve_comp instructions
1184 // start of mve_bit instructions
1186 class MVE_bit_arith<dag oops, dag iops, string iname, string suffix,
1187 string ops, string cstr, list<dag> pattern=[]>
1188 : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred_r, cstr, pattern> {
1192 let Inst{22} = Qd{3};
1193 let Inst{15-13} = Qd{2-0};
1194 let Inst{5} = Qm{3};
1195 let Inst{3-1} = Qm{2-0};
1198 def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
1199 "vbic", "", "$Qd, $Qn, $Qm", ""> {
1203 let Inst{25-23} = 0b110;
1204 let Inst{21-20} = 0b01;
1205 let Inst{19-17} = Qn{2-0};
1207 let Inst{12-8} = 0b00001;
1208 let Inst{7} = Qn{3};
1212 let validForTailPredication = 1;
1215 class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7, string cstr="">
1216 : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname,
1217 suffix, "$Qd, $Qm", cstr> {
1220 let Inst{25-23} = 0b111;
1221 let Inst{21-20} = 0b11;
1222 let Inst{19-18} = size;
1223 let Inst{17-16} = 0b00;
1224 let Inst{12-9} = 0b0000;
1225 let Inst{8-7} = bit_8_7;
1231 def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, "@earlyclobber $Qd">;
1232 def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, "@earlyclobber $Qd">;
1233 def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, "@earlyclobber $Qd">;
1235 def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01>;
1236 def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>;
1238 def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10>;
1240 let Predicates = [HasMVEInt] in {
1241 def : Pat<(v8i16 (bswap (v8i16 MQPR:$src))),
1242 (v8i16 (MVE_VREV16_8 (v8i16 MQPR:$src)))>;
1243 def : Pat<(v4i32 (bswap (v4i32 MQPR:$src))),
1244 (v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>;
1247 let Predicates = [HasMVEInt] in {
1248 def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
1249 (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
1250 def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
1251 (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
1252 def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
1253 (v16i8 (MVE_VREV64_8 (v16i8 MQPR:$src)))>;
1255 def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
1256 (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
1257 def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
1258 (v16i8 (MVE_VREV32_8 (v16i8 MQPR:$src)))>;
1260 def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
1261 (v16i8 (MVE_VREV16_8 (v16i8 MQPR:$src)))>;
1263 def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
1264 (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
1265 def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
1266 (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
1267 def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
1268 (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
1271 def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
1272 "vmvn", "", "$Qd, $Qm", ""> {
1274 let Inst{25-23} = 0b111;
1275 let Inst{21-16} = 0b110000;
1276 let Inst{12-6} = 0b0010111;
1279 let validForTailPredication = 1;
1282 let Predicates = [HasMVEInt] in {
1283 def : Pat<(v16i8 (vnotq (v16i8 MQPR:$val1))),
1284 (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>;
1285 def : Pat<(v8i16 (vnotq (v8i16 MQPR:$val1))),
1286 (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
1287 def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))),
1288 (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
1289 def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))),
1290 (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
1293 class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
1294 : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
1295 iname, "", "$Qd, $Qn, $Qm", ""> {
1298 let Inst{28} = bit_28;
1299 let Inst{25-23} = 0b110;
1300 let Inst{21-20} = bit_21_20;
1301 let Inst{19-17} = Qn{2-0};
1303 let Inst{12-8} = 0b00001;
1304 let Inst{7} = Qn{3};
1308 let validForTailPredication = 1;
1311 def MVE_VEOR : MVE_bit_ops<"veor", 0b00, 0b1>;
1312 def MVE_VORN : MVE_bit_ops<"vorn", 0b11, 0b0>;
1313 def MVE_VORR : MVE_bit_ops<"vorr", 0b10, 0b0>;
1314 def MVE_VAND : MVE_bit_ops<"vand", 0b00, 0b0>;
1316 // add ignored suffixes as aliases
1318 foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f32"] in {
1319 def : MVEInstAlias<"vbic${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
1320 (MVE_VBIC MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
1321 def : MVEInstAlias<"veor${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
1322 (MVE_VEOR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
1323 def : MVEInstAlias<"vorn${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
1324 (MVE_VORN MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
1325 def : MVEInstAlias<"vorr${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
1326 (MVE_VORR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
1327 def : MVEInstAlias<"vand${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
1328 (MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
1331 multiclass MVE_bit_op<MVEVectorVTInfo VTI, SDNode unpred_op, Intrinsic pred_int, MVE_bit_ops instruction> {
1332 let Predicates = [HasMVEInt] in {
1333 // Unpredicated operation
1334 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
1335 (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
1336 // Predicated operation
1337 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1338 (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
1339 (VTI.Vec (instruction
1340 (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1341 ARMVCCThen, (VTI.Pred VCCR:$mask),
1342 (VTI.Vec MQPR:$inactive)))>;
1346 defm : MVE_bit_op<MVE_v16i8, and, int_arm_mve_and_predicated, MVE_VAND>;
1347 defm : MVE_bit_op<MVE_v8i16, and, int_arm_mve_and_predicated, MVE_VAND>;
1348 defm : MVE_bit_op<MVE_v4i32, and, int_arm_mve_and_predicated, MVE_VAND>;
1349 defm : MVE_bit_op<MVE_v2i64, and, int_arm_mve_and_predicated, MVE_VAND>;
1351 defm : MVE_bit_op<MVE_v16i8, or, int_arm_mve_orr_predicated, MVE_VORR>;
1352 defm : MVE_bit_op<MVE_v8i16, or, int_arm_mve_orr_predicated, MVE_VORR>;
1353 defm : MVE_bit_op<MVE_v4i32, or, int_arm_mve_orr_predicated, MVE_VORR>;
1354 defm : MVE_bit_op<MVE_v2i64, or, int_arm_mve_orr_predicated, MVE_VORR>;
1356 defm : MVE_bit_op<MVE_v16i8, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
1357 defm : MVE_bit_op<MVE_v8i16, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
1358 defm : MVE_bit_op<MVE_v4i32, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
1359 defm : MVE_bit_op<MVE_v2i64, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
1361 multiclass MVE_bit_op_with_inv<MVEVectorVTInfo VTI, SDNode unpred_op, Intrinsic pred_int, MVE_bit_ops instruction> {
1362 let Predicates = [HasMVEInt] in {
1363 // Unpredicated operation
1364 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (vnotq (VTI.Vec MQPR:$Qn)))),
1365 (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
1366 // Predicated operation
1367 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1368 (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
1369 (VTI.Vec (instruction
1370 (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1371 ARMVCCThen, (VTI.Pred VCCR:$mask),
1372 (VTI.Vec MQPR:$inactive)))>;
1376 defm : MVE_bit_op_with_inv<MVE_v16i8, and, int_arm_mve_bic_predicated, MVE_VBIC>;
1377 defm : MVE_bit_op_with_inv<MVE_v8i16, and, int_arm_mve_bic_predicated, MVE_VBIC>;
1378 defm : MVE_bit_op_with_inv<MVE_v4i32, and, int_arm_mve_bic_predicated, MVE_VBIC>;
1379 defm : MVE_bit_op_with_inv<MVE_v2i64, and, int_arm_mve_bic_predicated, MVE_VBIC>;
1381 defm : MVE_bit_op_with_inv<MVE_v16i8, or, int_arm_mve_orn_predicated, MVE_VORN>;
1382 defm : MVE_bit_op_with_inv<MVE_v8i16, or, int_arm_mve_orn_predicated, MVE_VORN>;
1383 defm : MVE_bit_op_with_inv<MVE_v4i32, or, int_arm_mve_orn_predicated, MVE_VORN>;
1384 defm : MVE_bit_op_with_inv<MVE_v2i64, or, int_arm_mve_orn_predicated, MVE_VORN>;
1386 class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
1387 : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
1388 iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
1392 let Inst{28} = imm{7};
1393 let Inst{27-23} = 0b11111;
1394 let Inst{22} = Qd{3};
1395 let Inst{21-19} = 0b000;
1396 let Inst{18-16} = imm{6-4};
1397 let Inst{15-13} = Qd{2-0};
1399 let Inst{11-8} = cmode;
1400 let Inst{7-6} = 0b01;
1402 let Inst{3-0} = imm{3-0};
1405 class MVE_VORR<string suffix, bits<4> cmode, ExpandImm imm_type>
1406 : MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
1408 let validForTailPredication = 1;
1411 def MVE_VORRIZ0v4i32 : MVE_VORR<"i32", 0b0001, expzero00>;
1412 def MVE_VORRIZ0v8i16 : MVE_VORR<"i16", 0b1001, expzero00>;
1413 def MVE_VORRIZ8v4i32 : MVE_VORR<"i32", 0b0011, expzero08>;
1414 def MVE_VORRIZ8v8i16 : MVE_VORR<"i16", 0b1011, expzero08>;
1415 def MVE_VORRIZ16v4i32 : MVE_VORR<"i32", 0b0101, expzero16>;
1416 def MVE_VORRIZ24v4i32 : MVE_VORR<"i32", 0b0111, expzero24>;
1418 def MVE_VORNIZ0v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
1419 (ins MQPR:$Qd_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
1420 def MVE_VORNIZ0v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
1421 (ins MQPR:$Qd_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
1422 def MVE_VORNIZ8v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
1423 (ins MQPR:$Qd_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
1424 def MVE_VORNIZ8v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
1425 (ins MQPR:$Qd_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
1426 def MVE_VORNIZ16v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
1427 (ins MQPR:$Qd_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
1428 def MVE_VORNIZ24v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
1429 (ins MQPR:$Qd_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
1431 def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
1432 (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
1434 class MVE_VBIC<string suffix, bits<4> cmode, ExpandImm imm_type>
1435 : MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
1437 let validForTailPredication = 1;
1440 def MVE_VBICIZ0v4i32 : MVE_VBIC<"i32", 0b0001, expzero00>;
1441 def MVE_VBICIZ0v8i16 : MVE_VBIC<"i16", 0b1001, expzero00>;
1442 def MVE_VBICIZ8v4i32 : MVE_VBIC<"i32", 0b0011, expzero08>;
1443 def MVE_VBICIZ8v8i16 : MVE_VBIC<"i16", 0b1011, expzero08>;
1444 def MVE_VBICIZ16v4i32 : MVE_VBIC<"i32", 0b0101, expzero16>;
1445 def MVE_VBICIZ24v4i32 : MVE_VBIC<"i32", 0b0111, expzero24>;
1447 def MVE_VANDIZ0v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
1448 (ins MQPR:$Qda_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
1449 def MVE_VANDIZ0v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
1450 (ins MQPR:$Qda_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
1451 def MVE_VANDIZ8v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
1452 (ins MQPR:$Qda_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
1453 def MVE_VANDIZ8v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
1454 (ins MQPR:$Qda_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
1455 def MVE_VANDIZ16v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
1456 (ins MQPR:$Qda_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
1457 def MVE_VANDIZ24v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
1458 (ins MQPR:$Qda_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
1460 class MVE_VMOV_lane_direction {
1467 def MVE_VMOV_from_lane : MVE_VMOV_lane_direction {
1469 let oops = (outs rGPR:$Rt);
1470 let iops = (ins MQPR:$Qd);
1471 let ops = "$Rt, $Qd$Idx";
1474 def MVE_VMOV_to_lane : MVE_VMOV_lane_direction {
1476 let oops = (outs MQPR:$Qd);
1477 let iops = (ins MQPR:$Qd_src, rGPR:$Rt);
1478 let ops = "$Qd$Idx, $Rt";
1479 let cstr = "$Qd = $Qd_src";
1482 class MVE_VMOV_lane<string suffix, bit U, dag indexop,
1483 MVE_VMOV_lane_direction dir>
1484 : MVE_VMOV_lane_base<dir.oops, !con(dir.iops, indexop), NoItinerary,
1485 "vmov", suffix, dir.ops, dir.cstr, []> {
1489 let Inst{31-24} = 0b11101110;
1491 let Inst{20} = dir.bit_20;
1492 let Inst{19-17} = Qd{2-0};
1493 let Inst{15-12} = Rt{3-0};
1494 let Inst{11-8} = 0b1011;
1495 let Inst{7} = Qd{3};
1496 let Inst{4-0} = 0b10000;
1499 class MVE_VMOV_lane_32<MVE_VMOV_lane_direction dir>
1500 : MVE_VMOV_lane<"32", 0b0, (ins MVEVectorIndex<4>:$Idx), dir> {
1503 let Inst{6-5} = 0b00;
1504 let Inst{16} = Idx{1};
1505 let Inst{21} = Idx{0};
1507 let Predicates = [HasFPRegsV8_1M];
1510 class MVE_VMOV_lane_16<string suffix, bit U, MVE_VMOV_lane_direction dir>
1511 : MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<8>:$Idx), dir> {
1515 let Inst{16} = Idx{2};
1516 let Inst{21} = Idx{1};
1517 let Inst{6} = Idx{0};
1520 class MVE_VMOV_lane_8<string suffix, bit U, MVE_VMOV_lane_direction dir>
1521 : MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<16>:$Idx), dir> {
1524 let Inst{16} = Idx{3};
1525 let Inst{21} = Idx{2};
1526 let Inst{6} = Idx{1};
1527 let Inst{5} = Idx{0};
1530 def MVE_VMOV_from_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_from_lane>;
1531 def MVE_VMOV_to_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_to_lane>;
1532 def MVE_VMOV_from_lane_s16 : MVE_VMOV_lane_16<"s16", 0b0, MVE_VMOV_from_lane>;
1533 def MVE_VMOV_from_lane_u16 : MVE_VMOV_lane_16<"u16", 0b1, MVE_VMOV_from_lane>;
1534 def MVE_VMOV_to_lane_16 : MVE_VMOV_lane_16< "16", 0b0, MVE_VMOV_to_lane>;
1535 def MVE_VMOV_from_lane_s8 : MVE_VMOV_lane_8 < "s8", 0b0, MVE_VMOV_from_lane>;
1536 def MVE_VMOV_from_lane_u8 : MVE_VMOV_lane_8 < "u8", 0b1, MVE_VMOV_from_lane>;
1537 def MVE_VMOV_to_lane_8 : MVE_VMOV_lane_8 < "8", 0b0, MVE_VMOV_to_lane>;
1539 let Predicates = [HasMVEInt] in {
1540 def : Pat<(extractelt (v2f64 MQPR:$src), imm:$lane),
1541 (f64 (EXTRACT_SUBREG MQPR:$src, (DSubReg_f64_reg imm:$lane)))>;
1542 def : Pat<(insertelt (v2f64 MQPR:$src1), DPR:$src2, imm:$lane),
1543 (INSERT_SUBREG (v2f64 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), DPR:$src2, (DSubReg_f64_reg imm:$lane))>;
1545 def : Pat<(extractelt (v4i32 MQPR:$src), imm:$lane),
1547 (i32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), rGPR)>;
1548 def : Pat<(insertelt (v4i32 MQPR:$src1), rGPR:$src2, imm:$lane),
1549 (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$src2, imm:$lane)>;
1551 def : Pat<(vector_insert (v16i8 MQPR:$src1), rGPR:$src2, imm:$lane),
1552 (MVE_VMOV_to_lane_8 MQPR:$src1, rGPR:$src2, imm:$lane)>;
1553 def : Pat<(vector_insert (v8i16 MQPR:$src1), rGPR:$src2, imm:$lane),
1554 (MVE_VMOV_to_lane_16 MQPR:$src1, rGPR:$src2, imm:$lane)>;
1556 def : Pat<(ARMvgetlanes (v16i8 MQPR:$src), imm:$lane),
1557 (MVE_VMOV_from_lane_s8 MQPR:$src, imm:$lane)>;
1558 def : Pat<(ARMvgetlanes (v8i16 MQPR:$src), imm:$lane),
1559 (MVE_VMOV_from_lane_s16 MQPR:$src, imm:$lane)>;
1560 def : Pat<(ARMvgetlaneu (v16i8 MQPR:$src), imm:$lane),
1561 (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane)>;
1562 def : Pat<(ARMvgetlaneu (v8i16 MQPR:$src), imm:$lane),
1563 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
1565 def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
1566 (MVE_VMOV_to_lane_8 (v16i8 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
1567 def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
1568 (MVE_VMOV_to_lane_16 (v8i16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
1569 def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
1570 (MVE_VMOV_to_lane_32 (v4i32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
1572 // Floating point patterns, still enabled under HasMVEInt
1573 def : Pat<(extractelt (v4f32 MQPR:$src), imm:$lane),
1574 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), SPR)>;
1575 def : Pat<(insertelt (v4f32 MQPR:$src1), (f32 SPR:$src2), imm:$lane),
1576 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), SPR:$src2, (SSubReg_f32_reg imm:$lane))>;
1578 def : Pat<(insertelt (v8f16 MQPR:$src1), HPR:$src2, imm:$lane),
1579 (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS HPR:$src2, rGPR), imm:$lane)>;
1580 def : Pat<(extractelt (v8f16 MQPR:$src), imm_even:$lane),
1581 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_even:$lane))>;
1582 def : Pat<(extractelt (v8f16 MQPR:$src), imm_odd:$lane),
1584 (VMOVH (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane))),
1587 def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
1588 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
1589 def : Pat<(v4f32 (scalar_to_vector GPR:$src)),
1590 (MVE_VMOV_to_lane_32 (v4f32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
1591 def : Pat<(v8f16 (scalar_to_vector HPR:$src)),
1592 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
1593 def : Pat<(v8f16 (scalar_to_vector GPR:$src)),
1594 (MVE_VMOV_to_lane_16 (v8f16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
1597 // end of mve_bit instructions
1599 // start of MVE Integer instructions
1601 class MVE_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
1602 : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
1603 iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
1608 let Inst{22} = Qd{3};
1609 let Inst{21-20} = size;
1610 let Inst{19-17} = Qn{2-0};
1611 let Inst{15-13} = Qd{2-0};
1612 let Inst{7} = Qn{3};
1614 let Inst{5} = Qm{3};
1615 let Inst{3-1} = Qm{2-0};
1618 class MVE_VMULt1<string iname, string suffix, bits<2> size,
1619 list<dag> pattern=[]>
1620 : MVE_int<iname, suffix, size, pattern> {
1623 let Inst{25-23} = 0b110;
1625 let Inst{12-8} = 0b01001;
1628 let validForTailPredication = 1;
1631 multiclass MVE_VMUL_m<string iname, MVEVectorVTInfo VTI,
1632 SDNode unpred_op, Intrinsic pred_int> {
1633 def "" : MVE_VMULt1<iname, VTI.Suffix, VTI.Size>;
1634 defvar Inst = !cast<Instruction>(NAME);
1636 let Predicates = [HasMVEInt] in {
1637 // Unpredicated multiply
1638 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
1639 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
1641 // Predicated multiply
1642 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1643 (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
1644 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1645 ARMVCCThen, (VTI.Pred VCCR:$mask),
1646 (VTI.Vec MQPR:$inactive)))>;
1650 multiclass MVE_VMUL<MVEVectorVTInfo VTI>
1651 : MVE_VMUL_m<"vmul", VTI, mul, int_arm_mve_mul_predicated>;
1653 defm MVE_VMULi8 : MVE_VMUL<MVE_v16i8>;
1654 defm MVE_VMULi16 : MVE_VMUL<MVE_v8i16>;
1655 defm MVE_VMULi32 : MVE_VMUL<MVE_v4i32>;
1657 class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding,
1658 list<dag> pattern=[]>
1659 : MVE_int<iname, suffix, size, pattern> {
1661 let Inst{28} = rounding;
1662 let Inst{25-23} = 0b110;
1664 let Inst{12-8} = 0b01011;
1669 multiclass MVE_VQxDMULH_m<string iname, MVEVectorVTInfo VTI,
1670 SDNode unpred_op, Intrinsic pred_int,
1672 def "" : MVE_VQxDMULH_Base<iname, VTI.Suffix, VTI.Size, rounding>;
1673 defvar Inst = !cast<Instruction>(NAME);
1675 let Predicates = [HasMVEInt] in {
1676 // Unpredicated multiply
1677 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
1678 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
1680 // Predicated multiply
1681 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1682 (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
1683 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1684 ARMVCCThen, (VTI.Pred VCCR:$mask),
1685 (VTI.Vec MQPR:$inactive)))>;
1689 multiclass MVE_VQxDMULH<string iname, MVEVectorVTInfo VTI, bit rounding>
1690 : MVE_VQxDMULH_m<iname, VTI, !if(rounding, int_arm_mve_vqrdmulh,
1691 int_arm_mve_vqdmulh),
1692 !if(rounding, int_arm_mve_qrdmulh_predicated,
1693 int_arm_mve_qdmulh_predicated),
1696 defm MVE_VQDMULHi8 : MVE_VQxDMULH<"vqdmulh", MVE_v16s8, 0b0>;
1697 defm MVE_VQDMULHi16 : MVE_VQxDMULH<"vqdmulh", MVE_v8s16, 0b0>;
1698 defm MVE_VQDMULHi32 : MVE_VQxDMULH<"vqdmulh", MVE_v4s32, 0b0>;
1700 defm MVE_VQRDMULHi8 : MVE_VQxDMULH<"vqrdmulh", MVE_v16s8, 0b1>;
1701 defm MVE_VQRDMULHi16 : MVE_VQxDMULH<"vqrdmulh", MVE_v8s16, 0b1>;
1702 defm MVE_VQRDMULHi32 : MVE_VQxDMULH<"vqrdmulh", MVE_v4s32, 0b1>;
1704 class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
1705 list<dag> pattern=[]>
1706 : MVE_int<iname, suffix, size, pattern> {
1708 let Inst{28} = subtract;
1709 let Inst{25-23} = 0b110;
1711 let Inst{12-8} = 0b01000;
1714 let validForTailPredication = 1;
1717 multiclass MVE_VADDSUB_m<string iname, MVEVectorVTInfo VTI, bit subtract,
1718 SDNode unpred_op, Intrinsic pred_int> {
1719 def "" : MVE_VADDSUB<iname, VTI.Suffix, VTI.Size, subtract>;
1720 defvar Inst = !cast<Instruction>(NAME);
1722 let Predicates = [HasMVEInt] in {
1723 // Unpredicated add/subtract
1724 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
1725 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
1727 // Predicated add/subtract
1728 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1729 (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
1730 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1731 ARMVCCThen, (VTI.Pred VCCR:$mask),
1732 (VTI.Vec MQPR:$inactive)))>;
1736 multiclass MVE_VADD<MVEVectorVTInfo VTI>
1737 : MVE_VADDSUB_m<"vadd", VTI, 0b0, add, int_arm_mve_add_predicated>;
1738 multiclass MVE_VSUB<MVEVectorVTInfo VTI>
1739 : MVE_VADDSUB_m<"vsub", VTI, 0b1, sub, int_arm_mve_sub_predicated>;
1741 defm MVE_VADDi8 : MVE_VADD<MVE_v16i8>;
1742 defm MVE_VADDi16 : MVE_VADD<MVE_v8i16>;
1743 defm MVE_VADDi32 : MVE_VADD<MVE_v4i32>;
1745 defm MVE_VSUBi8 : MVE_VSUB<MVE_v16i8>;
1746 defm MVE_VSUBi16 : MVE_VSUB<MVE_v8i16>;
1747 defm MVE_VSUBi32 : MVE_VSUB<MVE_v4i32>;
1749 class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
1751 : MVE_int<iname, suffix, size, []> {
1754 let Inst{25-23} = 0b110;
1756 let Inst{12-10} = 0b000;
1757 let Inst{9} = subtract;
1761 let validForTailPredication = 1;
1764 class MVE_VQADD_<string suffix, bit U, bits<2> size>
1765 : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size>;
1766 class MVE_VQSUB_<string suffix, bit U, bits<2> size>
1767 : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size>;
1769 multiclass MVE_VQADD_m<MVEVectorVTInfo VTI,
1770 SDNode unpred_op, Intrinsic pred_int> {
1771 def "" : MVE_VQADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
1772 defvar Inst = !cast<Instruction>(NAME);
1774 let Predicates = [HasMVEInt] in {
1775 // Unpredicated saturating add
1776 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
1777 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
1779 // Predicated saturating add
1780 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1781 (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
1782 (VTI.Vec MQPR:$inactive))),
1783 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1784 ARMVCCThen, (VTI.Pred VCCR:$mask),
1785 (VTI.Vec MQPR:$inactive)))>;
1789 multiclass MVE_VQADD<MVEVectorVTInfo VTI, SDNode unpred_op>
1790 : MVE_VQADD_m<VTI, unpred_op, int_arm_mve_qadd_predicated>;
1792 defm MVE_VQADDs8 : MVE_VQADD<MVE_v16s8, saddsat>;
1793 defm MVE_VQADDs16 : MVE_VQADD<MVE_v8s16, saddsat>;
1794 defm MVE_VQADDs32 : MVE_VQADD<MVE_v4s32, saddsat>;
1795 defm MVE_VQADDu8 : MVE_VQADD<MVE_v16u8, uaddsat>;
1796 defm MVE_VQADDu16 : MVE_VQADD<MVE_v8u16, uaddsat>;
1797 defm MVE_VQADDu32 : MVE_VQADD<MVE_v4u32, uaddsat>;
1799 multiclass MVE_VQSUB_m<MVEVectorVTInfo VTI,
1800 SDNode unpred_op, Intrinsic pred_int> {
1801 def "" : MVE_VQSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
1802 defvar Inst = !cast<Instruction>(NAME);
1804 let Predicates = [HasMVEInt] in {
1805 // Unpredicated saturating subtract
1806 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
1807 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
1809 // Predicated saturating subtract
1810 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1811 (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
1812 (VTI.Vec MQPR:$inactive))),
1813 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1814 ARMVCCThen, (VTI.Pred VCCR:$mask),
1815 (VTI.Vec MQPR:$inactive)))>;
1819 multiclass MVE_VQSUB<MVEVectorVTInfo VTI, SDNode unpred_op>
1820 : MVE_VQSUB_m<VTI, unpred_op, int_arm_mve_qsub_predicated>;
1822 defm MVE_VQSUBs8 : MVE_VQSUB<MVE_v16s8, ssubsat>;
1823 defm MVE_VQSUBs16 : MVE_VQSUB<MVE_v8s16, ssubsat>;
1824 defm MVE_VQSUBs32 : MVE_VQSUB<MVE_v4s32, ssubsat>;
1825 defm MVE_VQSUBu8 : MVE_VQSUB<MVE_v16u8, usubsat>;
1826 defm MVE_VQSUBu16 : MVE_VQSUB<MVE_v8u16, usubsat>;
1827 defm MVE_VQSUBu32 : MVE_VQSUB<MVE_v4u32, usubsat>;
1829 class MVE_VABD_int<string suffix, bit U, bits<2> size,
1830 list<dag> pattern=[]>
1831 : MVE_int<"vabd", suffix, size, pattern> {
1834 let Inst{25-23} = 0b110;
1836 let Inst{12-8} = 0b00111;
1839 let validForTailPredication = 1;
1842 multiclass MVE_VABD_m<MVEVectorVTInfo VTI,
1843 Intrinsic unpred_int, Intrinsic pred_int> {
1844 def "" : MVE_VABD_int<VTI.Suffix, VTI.Unsigned, VTI.Size>;
1845 defvar Inst = !cast<Instruction>(NAME);
1847 let Predicates = [HasMVEInt] in {
1848 // Unpredicated absolute difference
1849 def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1850 (i32 VTI.Unsigned))),
1851 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
1853 // Predicated absolute difference
1854 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1855 (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
1856 (VTI.Vec MQPR:$inactive))),
1857 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1858 ARMVCCThen, (VTI.Pred VCCR:$mask),
1859 (VTI.Vec MQPR:$inactive)))>;
1863 multiclass MVE_VABD<MVEVectorVTInfo VTI>
1864 : MVE_VABD_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
1866 defm MVE_VABDs8 : MVE_VABD<MVE_v16s8>;
1867 defm MVE_VABDs16 : MVE_VABD<MVE_v8s16>;
1868 defm MVE_VABDs32 : MVE_VABD<MVE_v4s32>;
1869 defm MVE_VABDu8 : MVE_VABD<MVE_v16u8>;
1870 defm MVE_VABDu16 : MVE_VABD<MVE_v8u16>;
1871 defm MVE_VABDu32 : MVE_VABD<MVE_v4u32>;
1873 class MVE_VRHADD_Base<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
1874 : MVE_int<"vrhadd", suffix, size, pattern> {
1877 let Inst{25-23} = 0b110;
1879 let Inst{12-8} = 0b00001;
1882 let validForTailPredication = 1;
1885 multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI,
1886 SDNode unpred_op, Intrinsic pred_int> {
1887 def "" : MVE_VRHADD_Base<VTI.Suffix, VTI.Unsigned, VTI.Size>;
1888 defvar Inst = !cast<Instruction>(NAME);
1890 let Predicates = [HasMVEInt] in {
1891 // Unpredicated rounding add-with-divide-by-two
1892 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1893 (i32 VTI.Unsigned))),
1894 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
1896 // Predicated add-with-divide-by-two
1897 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1898 (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
1899 (VTI.Vec MQPR:$inactive))),
1900 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1901 ARMVCCThen, (VTI.Pred VCCR:$mask),
1902 (VTI.Vec MQPR:$inactive)))>;
1906 multiclass MVE_VRHADD<MVEVectorVTInfo VTI>
1907 : MVE_VRHADD_m<VTI, int_arm_mve_vrhadd, int_arm_mve_rhadd_predicated>;
1909 defm MVE_VRHADDs8 : MVE_VRHADD<MVE_v16s8>;
1910 defm MVE_VRHADDs16 : MVE_VRHADD<MVE_v8s16>;
1911 defm MVE_VRHADDs32 : MVE_VRHADD<MVE_v4s32>;
1912 defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8>;
1913 defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16>;
1914 defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>;
1916 class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
1917 bits<2> size, list<dag> pattern=[]>
1918 : MVE_int<iname, suffix, size, pattern> {
1921 let Inst{25-23} = 0b110;
1923 let Inst{12-10} = 0b000;
1924 let Inst{9} = subtract;
1928 let validForTailPredication = 1;
1931 class MVE_VHADD_<string suffix, bit U, bits<2> size,
1932 list<dag> pattern=[]>
1933 : MVE_VHADDSUB<"vhadd", suffix, U, 0b0, size, pattern>;
1934 class MVE_VHSUB_<string suffix, bit U, bits<2> size,
1935 list<dag> pattern=[]>
1936 : MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
1938 multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
1939 SDNode unpred_op, Intrinsic pred_int> {
1940 def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
1941 defvar Inst = !cast<Instruction>(NAME);
1943 let Predicates = [HasMVEInt] in {
1944 // Unpredicated add-and-divide-by-two
1945 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))),
1946 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
1948 // Predicated add-and-divide-by-two
1949 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned),
1950 (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
1951 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1952 ARMVCCThen, (VTI.Pred VCCR:$mask),
1953 (VTI.Vec MQPR:$inactive)))>;
1957 multiclass MVE_VHADD<MVEVectorVTInfo VTI>
1958 : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated>;
1960 defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8>;
1961 defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16>;
1962 defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32>;
1963 defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8>;
1964 defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16>;
1965 defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32>;
1967 multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
1968 SDNode unpred_op, Intrinsic pred_int> {
1969 def "" : MVE_VHSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
1970 defvar Inst = !cast<Instruction>(NAME);
1972 let Predicates = [HasMVEInt] in {
1973 // Unpredicated subtract-and-divide-by-two
1974 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1975 (i32 VTI.Unsigned))),
1976 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
1978 // Predicated subtract-and-divide-by-two
1979 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1980 (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
1981 (VTI.Vec MQPR:$inactive))),
1982 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
1983 ARMVCCThen, (VTI.Pred VCCR:$mask),
1984 (VTI.Vec MQPR:$inactive)))>;
1988 multiclass MVE_VHSUB<MVEVectorVTInfo VTI>
1989 : MVE_VHSUB_m<VTI, int_arm_mve_vhsub, int_arm_mve_hsub_predicated>;
1991 defm MVE_VHSUBs8 : MVE_VHSUB<MVE_v16s8>;
1992 defm MVE_VHSUBs16 : MVE_VHSUB<MVE_v8s16>;
1993 defm MVE_VHSUBs32 : MVE_VHSUB<MVE_v4s32>;
1994 defm MVE_VHSUBu8 : MVE_VHSUB<MVE_v16u8>;
1995 defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16>;
1996 defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32>;
1998 class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
1999 : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
2000 "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> {
2005 let Inst{25-23} = 0b101;
2007 let Inst{21-20} = 0b10;
2008 let Inst{19-17} = Qd{2-0};
2010 let Inst{15-12} = Rt;
2011 let Inst{11-8} = 0b1011;
2012 let Inst{7} = Qd{3};
2015 let Inst{4-0} = 0b10000;
2016 let validForTailPredication = 1;
2019 def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>;
2020 def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1>;
2021 def MVE_VDUP8 : MVE_VDUP<"8", 0b1, 0b0>;
2023 let Predicates = [HasMVEInt] in {
2024 def : Pat<(v16i8 (ARMvdup (i32 rGPR:$elem))),
2025 (MVE_VDUP8 rGPR:$elem)>;
2026 def : Pat<(v8i16 (ARMvdup (i32 rGPR:$elem))),
2027 (MVE_VDUP16 rGPR:$elem)>;
2028 def : Pat<(v4i32 (ARMvdup (i32 rGPR:$elem))),
2029 (MVE_VDUP32 rGPR:$elem)>;
2031 def : Pat<(v4i32 (ARMvduplane (v4i32 MQPR:$src), imm:$lane)),
2032 (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
2033 // For the 16-bit and 8-bit vduplanes we don't care about the signedness
2034 // of the lane move operation as we only want the lowest 8/16 bits anyway.
2035 def : Pat<(v8i16 (ARMvduplane (v8i16 MQPR:$src), imm:$lane)),
2036 (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
2037 def : Pat<(v16i8 (ARMvduplane (v16i8 MQPR:$src), imm:$lane)),
2038 (MVE_VDUP8 (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane))>;
2040 def : Pat<(v4f32 (ARMvdup (f32 SPR:$elem))),
2041 (v4f32 (MVE_VDUP32 (i32 (COPY_TO_REGCLASS (f32 SPR:$elem), rGPR))))>;
2042 def : Pat<(v8f16 (ARMvdup (f16 HPR:$elem))),
2043 (v8f16 (MVE_VDUP16 (i32 (COPY_TO_REGCLASS (f16 HPR:$elem), rGPR))))>;
2045 def : Pat<(v4f32 (ARMvduplane (v4f32 MQPR:$src), imm:$lane)),
2046 (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
2047 def : Pat<(v8f16 (ARMvduplane (v8f16 MQPR:$src), imm:$lane)),
2048 (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
2052 class MVEIntSingleSrc<string iname, string suffix, bits<2> size,
2053 list<dag> pattern=[]>
2054 : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm), NoItinerary,
2055 iname, suffix, "$Qd, $Qm", vpred_r, "", pattern> {
2059 let Inst{22} = Qd{3};
2060 let Inst{19-18} = size{1-0};
2061 let Inst{15-13} = Qd{2-0};
2062 let Inst{5} = Qm{3};
2063 let Inst{3-1} = Qm{2-0};
2066 class MVE_VCLSCLZ<string iname, string suffix, bits<2> size,
2067 bit count_zeroes, list<dag> pattern=[]>
2068 : MVEIntSingleSrc<iname, suffix, size, pattern> {
2071 let Inst{25-23} = 0b111;
2072 let Inst{21-20} = 0b11;
2073 let Inst{17-16} = 0b00;
2074 let Inst{12-8} = 0b00100;
2075 let Inst{7} = count_zeroes;
2079 let validForTailPredication = 1;
2082 def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>;
2083 def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>;
2084 def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>;
2086 def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>;
2087 def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
2088 def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
2090 let Predicates = [HasMVEInt] in {
2091 def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))),
2092 (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>;
2093 def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))),
2094 (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>;
2095 def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))),
2096 (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>;
2099 class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
2100 list<dag> pattern=[]>
2101 : MVEIntSingleSrc<iname, suffix, size, pattern> {
2104 let Inst{25-23} = 0b111;
2105 let Inst{21-20} = 0b11;
2106 let Inst{17-16} = 0b01;
2107 let Inst{12-8} = 0b00011;
2108 let Inst{7} = negate;
2112 let validForTailPredication = 1;
2115 def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>;
2116 def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>;
2117 def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>;
2119 let Predicates = [HasMVEInt] in {
2120 def : Pat<(v16i8 (abs (v16i8 MQPR:$v))),
2121 (v16i8 (MVE_VABSs8 $v))>;
2122 def : Pat<(v8i16 (abs (v8i16 MQPR:$v))),
2123 (v8i16 (MVE_VABSs16 $v))>;
2124 def : Pat<(v4i32 (abs (v4i32 MQPR:$v))),
2125 (v4i32 (MVE_VABSs32 $v))>;
2128 def MVE_VNEGs8 : MVE_VABSNEG_int<"vneg", "s8", 0b00, 0b1>;
2129 def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
2130 def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
2132 let Predicates = [HasMVEInt] in {
2133 def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))),
2134 (v16i8 (MVE_VNEGs8 $v))>;
2135 def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))),
2136 (v8i16 (MVE_VNEGs16 $v))>;
2137 def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))),
2138 (v4i32 (MVE_VNEGs32 $v))>;
2141 class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
2142 bit negate, list<dag> pattern=[]>
2143 : MVEIntSingleSrc<iname, suffix, size, pattern> {
2146 let Inst{25-23} = 0b111;
2147 let Inst{21-20} = 0b11;
2148 let Inst{17-16} = 0b00;
2149 let Inst{12-8} = 0b00111;
2150 let Inst{7} = negate;
2154 let validForTailPredication = 1;
2157 def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>;
2158 def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>;
2159 def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>;
2161 def MVE_VQNEGs8 : MVE_VQABSNEG<"vqneg", "s8", 0b00, 0b1>;
2162 def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
2163 def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
2165 // int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times
2166 // zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert
2167 multiclass vqabsneg_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max,
2168 dag zero_vec, MVE_VQABSNEG vqabs_instruction,
2169 MVE_VQABSNEG vqneg_instruction> {
2170 let Predicates = [HasMVEInt] in {
2171 // The below tree can be replaced by a vqabs instruction, as it represents
2172 // the following vectorized expression (r being the value in $reg):
2173 // r > 0 ? r : (r == INT_MIN ? INT_MAX : -r)
2174 def : Pat<(VTI.Vec (vselect
2175 (VTI.Pred (ARMvcmpz (VTI.Vec MQPR:$reg), ARMCCgt)),
2176 (VTI.Vec MQPR:$reg),
2178 (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)),
2180 (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))))),
2181 (VTI.Vec (vqabs_instruction (VTI.Vec MQPR:$reg)))>;
2182 // Similarly, this tree represents vqneg, i.e. the following vectorized expression:
2183 // r == INT_MIN ? INT_MAX : -r
2184 def : Pat<(VTI.Vec (vselect
2185 (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)),
2187 (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))),
2188 (VTI.Vec (vqneg_instruction (VTI.Vec MQPR:$reg)))>;
2192 defm MVE_VQABSNEG_Ps8 : vqabsneg_pattern<MVE_v16i8,
2193 (v16i8 (ARMvmovImm (i32 3712))),
2194 (v16i8 (ARMvmovImm (i32 3711))),
2195 (bitconvert (v4i32 (ARMvmovImm (i32 0)))),
2196 MVE_VQABSs8, MVE_VQNEGs8>;
2197 defm MVE_VQABSNEG_Ps16 : vqabsneg_pattern<MVE_v8i16,
2198 (v8i16 (ARMvmovImm (i32 2688))),
2199 (v8i16 (ARMvmvnImm (i32 2688))),
2200 (bitconvert (v4i32 (ARMvmovImm (i32 0)))),
2201 MVE_VQABSs16, MVE_VQNEGs16>;
2202 defm MVE_VQABSNEG_Ps32 : vqabsneg_pattern<MVE_v4i32,
2203 (v4i32 (ARMvmovImm (i32 1664))),
2204 (v4i32 (ARMvmvnImm (i32 1664))),
2205 (ARMvmovImm (i32 0)),
2206 MVE_VQABSs32, MVE_VQNEGs32>;
2208 class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
2209 dag iops, list<dag> pattern=[]>
2210 : MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm",
2211 vpred_r, "", pattern> {
2215 let Inst{28} = imm{7};
2216 let Inst{25-23} = 0b111;
2217 let Inst{22} = Qd{3};
2218 let Inst{21-19} = 0b000;
2219 let Inst{18-16} = imm{6-4};
2220 let Inst{15-13} = Qd{2-0};
2222 let Inst{11-8} = cmode{3-0};
2223 let Inst{7-6} = 0b01;
2226 let Inst{3-0} = imm{3-0};
2228 let DecoderMethod = "DecodeMVEModImmInstruction";
2229 let validForTailPredication = 1;
2232 let isReMaterializable = 1 in {
2233 let isAsCheapAsAMove = 1 in {
2234 def MVE_VMOVimmi8 : MVE_mod_imm<"vmov", "i8", {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm)>;
2235 def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm)> {
2236 let Inst{9} = imm{9};
2238 def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm)> {
2239 let Inst{11-8} = imm{11-8};
2241 def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm)>;
2242 def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm)>;
2243 } // let isAsCheapAsAMove = 1
2245 def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm)> {
2246 let Inst{9} = imm{9};
2248 def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm)> {
2249 let Inst{11-8} = imm{11-8};
2251 } // let isReMaterializable = 1
2253 let Predicates = [HasMVEInt] in {
2254 def : Pat<(v16i8 (ARMvmovImm timm:$simm)),
2255 (v16i8 (MVE_VMOVimmi8 nImmSplatI8:$simm))>;
2256 def : Pat<(v8i16 (ARMvmovImm timm:$simm)),
2257 (v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>;
2258 def : Pat<(v4i32 (ARMvmovImm timm:$simm)),
2259 (v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>;
2261 def : Pat<(v8i16 (ARMvmvnImm timm:$simm)),
2262 (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>;
2263 def : Pat<(v4i32 (ARMvmvnImm timm:$simm)),
2264 (v4i32 (MVE_VMVNimmi32 nImmVMOVI32:$simm))>;
2266 def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)),
2267 (v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>;
2270 class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
2271 bit bit_12, list<dag> pattern=[]>
2272 : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
2273 NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
2279 let Inst{25-23} = 0b100;
2280 let Inst{22} = Qd{3};
2281 let Inst{21-20} = 0b11;
2282 let Inst{19-18} = size;
2283 let Inst{17-16} = 0b11;
2284 let Inst{15-13} = Qd{2-0};
2285 let Inst{12} = bit_12;
2286 let Inst{11-6} = 0b111010;
2287 let Inst{5} = Qm{3};
2289 let Inst{3-1} = Qm{2-0};
2291 let validForTailPredication = 1;
2294 def MVE_VMAXAs8 : MVE_VMINMAXA<"vmaxa", "s8", 0b00, 0b0>;
2295 def MVE_VMAXAs16 : MVE_VMINMAXA<"vmaxa", "s16", 0b01, 0b0>;
2296 def MVE_VMAXAs32 : MVE_VMINMAXA<"vmaxa", "s32", 0b10, 0b0>;
2298 def MVE_VMINAs8 : MVE_VMINMAXA<"vmina", "s8", 0b00, 0b1>;
2299 def MVE_VMINAs16 : MVE_VMINMAXA<"vmina", "s16", 0b01, 0b1>;
2300 def MVE_VMINAs32 : MVE_VMINMAXA<"vmina", "s32", 0b10, 0b1>;
2302 // end of MVE Integer instructions
2304 // start of mve_imm_shift instructions
2306 def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd),
2307 (ins MQPR:$QdSrc, rGPR:$RdmSrc, long_shift:$imm),
2308 NoItinerary, "vshlc", "", "$QdSrc, $RdmSrc, $imm",
2309 vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc"> {
2315 let Inst{25-23} = 0b101;
2316 let Inst{22} = Qd{3};
2318 let Inst{20-16} = imm{4-0};
2319 let Inst{15-13} = Qd{2-0};
2320 let Inst{12-4} = 0b011111100;
2321 let Inst{3-0} = RdmDest{3-0};
2324 class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
2325 string ops, vpred_ops vpred, string cstr,
2326 list<dag> pattern=[]>
2327 : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
2331 let Inst{22} = Qd{3};
2332 let Inst{15-13} = Qd{2-0};
2333 let Inst{5} = Qm{3};
2334 let Inst{3-1} = Qm{2-0};
2337 class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
2338 list<dag> pattern=[]>
2339 : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
2340 iname, suffix, "$Qd, $Qm", vpred_r, "",
2343 let Inst{25-23} = 0b101;
2345 let Inst{20-19} = sz{1-0};
2346 let Inst{18-16} = 0b000;
2347 let Inst{11-6} = 0b111101;
2352 multiclass MVE_VMOVL_shift_half<string iname, string suffix, bits<2> sz, bit U,
2353 list<dag> pattern=[]> {
2354 def bh : MVE_VMOVL<!strconcat(iname, "b"), suffix, sz, U, pattern> {
2357 def th : MVE_VMOVL<!strconcat(iname, "t"), suffix, sz, U, pattern> {
2362 defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>;
2363 defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>;
2364 defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>;
2365 defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>;
2367 let Predicates = [HasMVEInt] in {
2368 def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
2369 (MVE_VMOVLs16bh MQPR:$src)>;
2370 def : Pat<(sext_inreg (v8i16 MQPR:$src), v8i8),
2371 (MVE_VMOVLs8bh MQPR:$src)>;
2372 def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8),
2373 (MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>;
2375 // zext_inreg 16 -> 32
2376 def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))),
2377 (MVE_VMOVLu16bh MQPR:$src)>;
2378 // zext_inreg 8 -> 16
2379 def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))),
2380 (MVE_VMOVLu8bh MQPR:$src)>;
2384 class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
2385 Operand immtype, list<dag> pattern=[]>
2386 : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm, immtype:$imm),
2387 iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> {
2389 let Inst{25-23} = 0b101;
2392 let Inst{11-6} = 0b111101;
2396 // For the MVE_VSHLL_patterns multiclass to refer to
2397 Operand immediateType = immtype;
2400 // The immediate VSHLL instructions accept shift counts from 1 up to
2401 // the lane width (8 or 16), but the full-width shifts have an
2402 // entirely separate encoding, given below with 'lw' in the name.
2404 class MVE_VSHLL_imm8<string iname, string suffix,
2405 bit U, bit th, list<dag> pattern=[]>
2406 : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_7, pattern> {
2408 let Inst{20-19} = 0b01;
2409 let Inst{18-16} = imm;
2412 class MVE_VSHLL_imm16<string iname, string suffix,
2413 bit U, bit th, list<dag> pattern=[]>
2414 : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_15, pattern> {
2417 let Inst{19-16} = imm;
2420 def MVE_VSHLL_imms8bh : MVE_VSHLL_imm8 <"vshllb", "s8", 0b0, 0b0>;
2421 def MVE_VSHLL_imms8th : MVE_VSHLL_imm8 <"vshllt", "s8", 0b0, 0b1>;
2422 def MVE_VSHLL_immu8bh : MVE_VSHLL_imm8 <"vshllb", "u8", 0b1, 0b0>;
2423 def MVE_VSHLL_immu8th : MVE_VSHLL_imm8 <"vshllt", "u8", 0b1, 0b1>;
2424 def MVE_VSHLL_imms16bh : MVE_VSHLL_imm16<"vshllb", "s16", 0b0, 0b0>;
2425 def MVE_VSHLL_imms16th : MVE_VSHLL_imm16<"vshllt", "s16", 0b0, 0b1>;
2426 def MVE_VSHLL_immu16bh : MVE_VSHLL_imm16<"vshllb", "u16", 0b1, 0b0>;
2427 def MVE_VSHLL_immu16th : MVE_VSHLL_imm16<"vshllt", "u16", 0b1, 0b1>;
2429 class MVE_VSHLL_by_lane_width<string iname, string suffix, bits<2> size,
2430 bit U, string ops, list<dag> pattern=[]>
2431 : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
2432 iname, suffix, ops, vpred_r, "", pattern> {
2434 let Inst{25-23} = 0b100;
2435 let Inst{21-20} = 0b11;
2436 let Inst{19-18} = size{1-0};
2437 let Inst{17-16} = 0b01;
2438 let Inst{11-6} = 0b111000;
2443 multiclass MVE_VSHLL_lw<string iname, string suffix, bits<2> sz, bit U,
2444 string ops, list<dag> pattern=[]> {
2445 def bh : MVE_VSHLL_by_lane_width<iname#"b", suffix, sz, U, ops, pattern> {
2448 def th : MVE_VSHLL_by_lane_width<iname#"t", suffix, sz, U, ops, pattern> {
2453 defm MVE_VSHLL_lws8 : MVE_VSHLL_lw<"vshll", "s8", 0b00, 0b0, "$Qd, $Qm, #8">;
2454 defm MVE_VSHLL_lws16 : MVE_VSHLL_lw<"vshll", "s16", 0b01, 0b0, "$Qd, $Qm, #16">;
2455 defm MVE_VSHLL_lwu8 : MVE_VSHLL_lw<"vshll", "u8", 0b00, 0b1, "$Qd, $Qm, #8">;
2456 defm MVE_VSHLL_lwu16 : MVE_VSHLL_lw<"vshll", "u16", 0b01, 0b1, "$Qd, $Qm, #16">;
2458 multiclass MVE_VSHLL_patterns<MVEVectorVTInfo VTI, int top> {
2459 defvar suffix = !strconcat(VTI.Suffix, !if(top, "th", "bh"));
2460 defvar inst_imm = !cast<MVE_VSHLL_imm>("MVE_VSHLL_imm" # suffix);
2461 defvar inst_lw = !cast<MVE_VSHLL_by_lane_width>("MVE_VSHLL_lw" # suffix);
2462 defvar unpred_int = int_arm_mve_vshll_imm;
2463 defvar pred_int = int_arm_mve_vshll_imm_predicated;
2464 defvar imm = inst_imm.immediateType;
2466 def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), imm:$imm,
2467 (i32 VTI.Unsigned), (i32 top))),
2468 (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm))>;
2469 def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
2470 (i32 VTI.Unsigned), (i32 top))),
2471 (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src)))>;
2473 def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), imm:$imm,
2474 (i32 VTI.Unsigned), (i32 top),
2475 (VTI.Pred VCCR:$mask),
2476 (VTI.DblVec MQPR:$inactive))),
2477 (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm,
2478 ARMVCCThen, (VTI.Pred VCCR:$mask),
2479 (VTI.DblVec MQPR:$inactive)))>;
2480 def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
2481 (i32 VTI.Unsigned), (i32 top),
2482 (VTI.Pred VCCR:$mask),
2483 (VTI.DblVec MQPR:$inactive))),
2484 (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src), ARMVCCThen,
2485 (VTI.Pred VCCR:$mask),
2486 (VTI.DblVec MQPR:$inactive)))>;
2489 foreach VTI = [MVE_v16s8, MVE_v8s16, MVE_v16u8, MVE_v8u16] in
2490 foreach top = [0, 1] in
2491 defm : MVE_VSHLL_patterns<VTI, top>;
2493 class MVE_shift_imm_partial<Operand imm, string iname, string suffix>
2494 : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$QdSrc, MQPR:$Qm, imm:$imm),
2495 iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc"> {
2496 Operand immediateType = imm;
2499 class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
2500 Operand imm, list<dag> pattern=[]>
2501 : MVE_shift_imm_partial<imm, iname, suffix> {
2504 let Inst{28} = bit_28;
2505 let Inst{25-23} = 0b101;
2507 let Inst{20-16} = imm{4-0};
2508 let Inst{12} = bit_12;
2509 let Inst{11-6} = 0b111111;
2514 def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> {
2515 let Inst{20-19} = 0b01;
2517 def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8> {
2518 let Inst{20-19} = 0b01;
2520 def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16> {
2523 def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16> {
2527 def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8> {
2528 let Inst{20-19} = 0b01;
2530 def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8> {
2531 let Inst{20-19} = 0b01;
2533 def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16> {
2536 def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16> {
2540 class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12,
2541 Operand imm, list<dag> pattern=[]>
2542 : MVE_shift_imm_partial<imm, iname, suffix> {
2545 let Inst{28} = bit_28;
2546 let Inst{25-23} = 0b101;
2548 let Inst{20-16} = imm{4-0};
2549 let Inst{12} = bit_12;
2550 let Inst{11-6} = 0b111111;
2555 def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
2556 "vqrshrunb", "s16", 0b1, 0b0, shr_imm8> {
2557 let Inst{20-19} = 0b01;
2559 def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN<
2560 "vqrshrunt", "s16", 0b1, 0b1, shr_imm8> {
2561 let Inst{20-19} = 0b01;
2563 def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN<
2564 "vqrshrunb", "s32", 0b1, 0b0, shr_imm16> {
2567 def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN<
2568 "vqrshrunt", "s32", 0b1, 0b1, shr_imm16> {
2572 def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN<
2573 "vqshrunb", "s16", 0b0, 0b0, shr_imm8> {
2574 let Inst{20-19} = 0b01;
2576 def MVE_VQSHRUNs16th : MVE_VxQRSHRUN<
2577 "vqshrunt", "s16", 0b0, 0b1, shr_imm8> {
2578 let Inst{20-19} = 0b01;
2580 def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN<
2581 "vqshrunb", "s32", 0b0, 0b0, shr_imm16> {
2584 def MVE_VQSHRUNs32th : MVE_VxQRSHRUN<
2585 "vqshrunt", "s32", 0b0, 0b1, shr_imm16> {
2589 class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
2590 Operand imm, list<dag> pattern=[]>
2591 : MVE_shift_imm_partial<imm, iname, suffix> {
2594 let Inst{25-23} = 0b101;
2596 let Inst{20-16} = imm{4-0};
2597 let Inst{12} = bit_12;
2598 let Inst{11-6} = 0b111101;
2600 let Inst{0} = bit_0;
2603 multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
2604 def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, shr_imm8> {
2606 let Inst{20-19} = 0b01;
2608 def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, shr_imm8> {
2610 let Inst{20-19} = 0b01;
2612 def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, shr_imm16> {
2616 def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, shr_imm16> {
2622 defm MVE_VQRSHRNbh : MVE_VxQRSHRN_types<"vqrshrnb", 0b1, 0b0>;
2623 defm MVE_VQRSHRNth : MVE_VxQRSHRN_types<"vqrshrnt", 0b1, 0b1>;
2624 defm MVE_VQSHRNbh : MVE_VxQRSHRN_types<"vqshrnb", 0b0, 0b0>;
2625 defm MVE_VQSHRNth : MVE_VxQRSHRN_types<"vqshrnt", 0b0, 0b1>;
2627 multiclass MVE_VSHRN_patterns<MVE_shift_imm_partial inst,
2628 MVEVectorVTInfo OutVTI, MVEVectorVTInfo InVTI,
2629 bit q, bit r, bit top> {
2630 defvar inparams = (? (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
2631 (inst.immediateType:$imm), (i32 q), (i32 r),
2632 (i32 OutVTI.Unsigned), (i32 InVTI.Unsigned), (i32 top));
2633 defvar outparams = (inst (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
2636 def : Pat<(OutVTI.Vec !setop(inparams, int_arm_mve_vshrn)),
2637 (OutVTI.Vec outparams)>;
2638 def : Pat<(OutVTI.Vec !con(inparams, (int_arm_mve_vshrn_predicated
2639 (InVTI.Pred VCCR:$pred)))),
2640 (OutVTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
2643 defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,0,0>;
2644 defm : MVE_VSHRN_patterns<MVE_VSHRNi16th, MVE_v16s8, MVE_v8s16, 0,0,1>;
2645 defm : MVE_VSHRN_patterns<MVE_VSHRNi32bh, MVE_v8s16, MVE_v4s32, 0,0,0>;
2646 defm : MVE_VSHRN_patterns<MVE_VSHRNi32th, MVE_v8s16, MVE_v4s32, 0,0,1>;
2647 defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16u8, MVE_v8u16, 0,0,0>;
2648 defm : MVE_VSHRN_patterns<MVE_VSHRNi16th, MVE_v16u8, MVE_v8u16, 0,0,1>;
2649 defm : MVE_VSHRN_patterns<MVE_VSHRNi32bh, MVE_v8u16, MVE_v4u32, 0,0,0>;
2650 defm : MVE_VSHRN_patterns<MVE_VSHRNi32th, MVE_v8u16, MVE_v4u32, 0,0,1>;
2651 defm : MVE_VSHRN_patterns<MVE_VRSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,1,0>;
2652 defm : MVE_VSHRN_patterns<MVE_VRSHRNi16th, MVE_v16s8, MVE_v8s16, 0,1,1>;
2653 defm : MVE_VSHRN_patterns<MVE_VRSHRNi32bh, MVE_v8s16, MVE_v4s32, 0,1,0>;
2654 defm : MVE_VSHRN_patterns<MVE_VRSHRNi32th, MVE_v8s16, MVE_v4s32, 0,1,1>;
2655 defm : MVE_VSHRN_patterns<MVE_VRSHRNi16bh, MVE_v16u8, MVE_v8u16, 0,1,0>;
2656 defm : MVE_VSHRN_patterns<MVE_VRSHRNi16th, MVE_v16u8, MVE_v8u16, 0,1,1>;
2657 defm : MVE_VSHRN_patterns<MVE_VRSHRNi32bh, MVE_v8u16, MVE_v4u32, 0,1,0>;
2658 defm : MVE_VSHRN_patterns<MVE_VRSHRNi32th, MVE_v8u16, MVE_v4u32, 0,1,1>;
2659 defm : MVE_VSHRN_patterns<MVE_VQSHRNbhs16, MVE_v16s8, MVE_v8s16, 1,0,0>;
2660 defm : MVE_VSHRN_patterns<MVE_VQSHRNths16, MVE_v16s8, MVE_v8s16, 1,0,1>;
2661 defm : MVE_VSHRN_patterns<MVE_VQSHRNbhs32, MVE_v8s16, MVE_v4s32, 1,0,0>;
2662 defm : MVE_VSHRN_patterns<MVE_VQSHRNths32, MVE_v8s16, MVE_v4s32, 1,0,1>;
2663 defm : MVE_VSHRN_patterns<MVE_VQSHRNbhu16, MVE_v16u8, MVE_v8u16, 1,0,0>;
2664 defm : MVE_VSHRN_patterns<MVE_VQSHRNthu16, MVE_v16u8, MVE_v8u16, 1,0,1>;
2665 defm : MVE_VSHRN_patterns<MVE_VQSHRNbhu32, MVE_v8u16, MVE_v4u32, 1,0,0>;
2666 defm : MVE_VSHRN_patterns<MVE_VQSHRNthu32, MVE_v8u16, MVE_v4u32, 1,0,1>;
2667 defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhs16, MVE_v16s8, MVE_v8s16, 1,1,0>;
2668 defm : MVE_VSHRN_patterns<MVE_VQRSHRNths16, MVE_v16s8, MVE_v8s16, 1,1,1>;
2669 defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhs32, MVE_v8s16, MVE_v4s32, 1,1,0>;
2670 defm : MVE_VSHRN_patterns<MVE_VQRSHRNths32, MVE_v8s16, MVE_v4s32, 1,1,1>;
2671 defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhu16, MVE_v16u8, MVE_v8u16, 1,1,0>;
2672 defm : MVE_VSHRN_patterns<MVE_VQRSHRNthu16, MVE_v16u8, MVE_v8u16, 1,1,1>;
2673 defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhu32, MVE_v8u16, MVE_v4u32, 1,1,0>;
2674 defm : MVE_VSHRN_patterns<MVE_VQRSHRNthu32, MVE_v8u16, MVE_v4u32, 1,1,1>;
2675 defm : MVE_VSHRN_patterns<MVE_VQSHRUNs16bh, MVE_v16u8, MVE_v8s16, 1,0,0>;
2676 defm : MVE_VSHRN_patterns<MVE_VQSHRUNs16th, MVE_v16u8, MVE_v8s16, 1,0,1>;
2677 defm : MVE_VSHRN_patterns<MVE_VQSHRUNs32bh, MVE_v8u16, MVE_v4s32, 1,0,0>;
2678 defm : MVE_VSHRN_patterns<MVE_VQSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,0,1>;
2679 defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs16bh, MVE_v16u8, MVE_v8s16, 1,1,0>;
2680 defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs16th, MVE_v16u8, MVE_v8s16, 1,1,1>;
2681 defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32bh, MVE_v8u16, MVE_v4s32, 1,1,0>;
2682 defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,1,1>;
2684 // end of mve_imm_shift instructions
2686 // start of mve_shift instructions
2688 class MVE_shift_by_vec<string iname, string suffix, bit U,
2689 bits<2> size, bit bit_4, bit bit_8>
2690 : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm, MQPR:$Qn), NoItinerary,
2691 iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", []> {
2692 // Shift instructions which take a vector of shift counts
2698 let Inst{25-24} = 0b11;
2700 let Inst{22} = Qd{3};
2701 let Inst{21-20} = size;
2702 let Inst{19-17} = Qn{2-0};
2704 let Inst{15-13} = Qd{2-0};
2705 let Inst{12-9} = 0b0010;
2706 let Inst{8} = bit_8;
2707 let Inst{7} = Qn{3};
2709 let Inst{5} = Qm{3};
2710 let Inst{4} = bit_4;
2711 let Inst{3-1} = Qm{2-0};
2713 let validForTailPredication = 1;
2716 multiclass MVE_shift_by_vec_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
2717 def "" : MVE_shift_by_vec<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>;
2718 defvar Inst = !cast<Instruction>(NAME);
2720 def : Pat<(VTI.Vec (int_arm_mve_vshl_vector
2721 (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
2722 (i32 q), (i32 r), (i32 VTI.Unsigned))),
2723 (VTI.Vec (Inst (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh)))>;
2725 def : Pat<(VTI.Vec (int_arm_mve_vshl_vector_predicated
2726 (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
2727 (i32 q), (i32 r), (i32 VTI.Unsigned),
2728 (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
2729 (VTI.Vec (Inst (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
2730 ARMVCCThen, (VTI.Pred VCCR:$mask),
2731 (VTI.Vec MQPR:$inactive)))>;
2734 multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> {
2735 defm s8 : MVE_shift_by_vec_p<iname, MVE_v16s8, bit_4, bit_8>;
2736 defm s16 : MVE_shift_by_vec_p<iname, MVE_v8s16, bit_4, bit_8>;
2737 defm s32 : MVE_shift_by_vec_p<iname, MVE_v4s32, bit_4, bit_8>;
2738 defm u8 : MVE_shift_by_vec_p<iname, MVE_v16u8, bit_4, bit_8>;
2739 defm u16 : MVE_shift_by_vec_p<iname, MVE_v8u16, bit_4, bit_8>;
2740 defm u32 : MVE_shift_by_vec_p<iname, MVE_v4u32, bit_4, bit_8>;
2743 defm MVE_VSHL_by_vec : mve_shift_by_vec_multi<"vshl", 0b0, 0b0>;
2744 defm MVE_VQSHL_by_vec : mve_shift_by_vec_multi<"vqshl", 0b1, 0b0>;
2745 defm MVE_VQRSHL_by_vec : mve_shift_by_vec_multi<"vqrshl", 0b1, 0b1>;
2746 defm MVE_VRSHL_by_vec : mve_shift_by_vec_multi<"vrshl", 0b0, 0b1>;
2748 let Predicates = [HasMVEInt] in {
2749 def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
2750 (v4i32 (MVE_VSHL_by_vecu32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
2751 def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
2752 (v8i16 (MVE_VSHL_by_vecu16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
2753 def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
2754 (v16i8 (MVE_VSHL_by_vecu8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
2756 def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
2757 (v4i32 (MVE_VSHL_by_vecs32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
2758 def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
2759 (v8i16 (MVE_VSHL_by_vecs16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
2760 def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
2761 (v16i8 (MVE_VSHL_by_vecs8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
2764 class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
2765 string ops, vpred_ops vpred, string cstr,
2766 list<dag> pattern=[]>
2767 : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
2772 let Inst{22} = Qd{3};
2773 let Inst{15-13} = Qd{2-0};
2774 let Inst{12-11} = 0b00;
2775 let Inst{7-6} = 0b01;
2776 let Inst{5} = Qm{3};
2778 let Inst{3-1} = Qm{2-0};
2780 let validForTailPredication = 1;
2782 // For the MVE_shift_imm_patterns multiclass to refer to
2783 MVEVectorVTInfo VTI;
2784 Operand immediateType;
2785 Intrinsic unpred_int;
2787 dag unsignedFlag = (?);
2790 class MVE_VSxI_imm<string iname, string suffix, bit bit_8, Operand immType>
2791 : MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd),
2792 (ins MQPR:$Qd_src, MQPR:$Qm, immType:$imm),
2793 "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> {
2796 let Inst{25-24} = 0b11;
2797 let Inst{21-16} = imm;
2798 let Inst{10-9} = 0b10;
2799 let Inst{8} = bit_8;
2800 let validForTailPredication = 1;
2802 Operand immediateType = immType;
2805 def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8> {
2806 let Inst{21-19} = 0b001;
2809 def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16> {
2810 let Inst{21-20} = 0b01;
2813 def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32> {
2817 def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7> {
2818 let Inst{21-19} = 0b001;
2821 def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15> {
2822 let Inst{21-20} = 0b01;
2825 def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31> {
2829 multiclass MVE_VSxI_patterns<MVE_VSxI_imm inst, string name,
2830 MVEVectorVTInfo VTI> {
2831 defvar inparams = (? (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm),
2832 (inst.immediateType:$imm));
2833 defvar outparams = (inst (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm),
2834 (inst.immediateType:$imm));
2835 defvar unpred_int = !cast<Intrinsic>("int_arm_mve_" # name);
2836 defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # name # "_predicated");
2838 def : Pat<(VTI.Vec !setop(inparams, unpred_int)),
2839 (VTI.Vec outparams)>;
2840 def : Pat<(VTI.Vec !con(inparams, (pred_int (VTI.Pred VCCR:$pred)))),
2841 (VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
2844 defm : MVE_VSxI_patterns<MVE_VSLIimm8, "vsli", MVE_v16i8>;
2845 defm : MVE_VSxI_patterns<MVE_VSLIimm16, "vsli", MVE_v8i16>;
2846 defm : MVE_VSxI_patterns<MVE_VSLIimm32, "vsli", MVE_v4i32>;
2847 defm : MVE_VSxI_patterns<MVE_VSRIimm8, "vsri", MVE_v16i8>;
2848 defm : MVE_VSxI_patterns<MVE_VSRIimm16, "vsri", MVE_v8i16>;
2849 defm : MVE_VSxI_patterns<MVE_VSRIimm32, "vsri", MVE_v4i32>;
2851 class MVE_VQSHL_imm<MVEVectorVTInfo VTI_, Operand immType>
2852 : MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd),
2853 (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
2857 let Inst{28} = VTI_.Unsigned;
2858 let Inst{25-24} = 0b11;
2859 let Inst{21-16} = imm;
2860 let Inst{10-8} = 0b111;
2863 let immediateType = immType;
2864 let unsignedFlag = (? (i32 VTI.Unsigned));
2867 let unpred_int = int_arm_mve_vqshl_imm,
2868 pred_int = int_arm_mve_vqshl_imm_predicated in {
2869 def MVE_VQSHLimms8 : MVE_VQSHL_imm<MVE_v16s8, imm0_7> {
2870 let Inst{21-19} = 0b001;
2872 def MVE_VQSHLimmu8 : MVE_VQSHL_imm<MVE_v16u8, imm0_7> {
2873 let Inst{21-19} = 0b001;
2876 def MVE_VQSHLimms16 : MVE_VQSHL_imm<MVE_v8s16, imm0_15> {
2877 let Inst{21-20} = 0b01;
2879 def MVE_VQSHLimmu16 : MVE_VQSHL_imm<MVE_v8u16, imm0_15> {
2880 let Inst{21-20} = 0b01;
2883 def MVE_VQSHLimms32 : MVE_VQSHL_imm<MVE_v4s32, imm0_31> {
2886 def MVE_VQSHLimmu32 : MVE_VQSHL_imm<MVE_v4u32, imm0_31> {
2891 class MVE_VQSHLU_imm<MVEVectorVTInfo VTI_, Operand immType>
2892 : MVE_shift_with_imm<"vqshlu", VTI_.Suffix, (outs MQPR:$Qd),
2893 (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
2898 let Inst{25-24} = 0b11;
2899 let Inst{21-16} = imm;
2900 let Inst{10-8} = 0b110;
2903 let immediateType = immType;
2906 let unpred_int = int_arm_mve_vqshlu_imm,
2907 pred_int = int_arm_mve_vqshlu_imm_predicated in {
2908 def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<MVE_v16s8, imm0_7> {
2909 let Inst{21-19} = 0b001;
2912 def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<MVE_v8s16, imm0_15> {
2913 let Inst{21-20} = 0b01;
2916 def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<MVE_v4s32, imm0_31> {
2921 class MVE_VRSHR_imm<MVEVectorVTInfo VTI_, Operand immType>
2922 : MVE_shift_with_imm<"vrshr", VTI_.Suffix, (outs MQPR:$Qd),
2923 (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
2927 let Inst{28} = VTI_.Unsigned;
2928 let Inst{25-24} = 0b11;
2929 let Inst{21-16} = imm;
2930 let Inst{10-8} = 0b010;
2933 let immediateType = immType;
2934 let unsignedFlag = (? (i32 VTI.Unsigned));
2937 let unpred_int = int_arm_mve_vrshr_imm,
2938 pred_int = int_arm_mve_vrshr_imm_predicated in {
2939 def MVE_VRSHR_imms8 : MVE_VRSHR_imm<MVE_v16s8, shr_imm8> {
2940 let Inst{21-19} = 0b001;
2943 def MVE_VRSHR_immu8 : MVE_VRSHR_imm<MVE_v16u8, shr_imm8> {
2944 let Inst{21-19} = 0b001;
2947 def MVE_VRSHR_imms16 : MVE_VRSHR_imm<MVE_v8s16, shr_imm16> {
2948 let Inst{21-20} = 0b01;
2951 def MVE_VRSHR_immu16 : MVE_VRSHR_imm<MVE_v8u16, shr_imm16> {
2952 let Inst{21-20} = 0b01;
2955 def MVE_VRSHR_imms32 : MVE_VRSHR_imm<MVE_v4s32, shr_imm32> {
2959 def MVE_VRSHR_immu32 : MVE_VRSHR_imm<MVE_v4u32, shr_imm32> {
2964 multiclass MVE_shift_imm_patterns<MVE_shift_with_imm inst> {
2965 def : Pat<(inst.VTI.Vec !con((inst.unpred_int (inst.VTI.Vec MQPR:$src),
2966 inst.immediateType:$imm),
2967 inst.unsignedFlag)),
2968 (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src),
2969 inst.immediateType:$imm))>;
2971 def : Pat<(inst.VTI.Vec !con((inst.pred_int (inst.VTI.Vec MQPR:$src),
2972 inst.immediateType:$imm),
2974 (? (inst.VTI.Pred VCCR:$mask),
2975 (inst.VTI.Vec MQPR:$inactive)))),
2976 (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src),
2977 inst.immediateType:$imm,
2978 ARMVCCThen, (inst.VTI.Pred VCCR:$mask),
2979 (inst.VTI.Vec MQPR:$inactive)))>;
2982 defm : MVE_shift_imm_patterns<MVE_VQSHLimms8>;
2983 defm : MVE_shift_imm_patterns<MVE_VQSHLimmu8>;
2984 defm : MVE_shift_imm_patterns<MVE_VQSHLimms16>;
2985 defm : MVE_shift_imm_patterns<MVE_VQSHLimmu16>;
2986 defm : MVE_shift_imm_patterns<MVE_VQSHLimms32>;
2987 defm : MVE_shift_imm_patterns<MVE_VQSHLimmu32>;
2988 defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms8>;
2989 defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms16>;
2990 defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms32>;
2991 defm : MVE_shift_imm_patterns<MVE_VRSHR_imms8>;
2992 defm : MVE_shift_imm_patterns<MVE_VRSHR_immu8>;
2993 defm : MVE_shift_imm_patterns<MVE_VRSHR_imms16>;
2994 defm : MVE_shift_imm_patterns<MVE_VRSHR_immu16>;
2995 defm : MVE_shift_imm_patterns<MVE_VRSHR_imms32>;
2996 defm : MVE_shift_imm_patterns<MVE_VRSHR_immu32>;
2998 class MVE_VSHR_imm<string suffix, dag imm>
2999 : MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd),
3000 !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
3004 let Inst{25-24} = 0b11;
3005 let Inst{21-16} = imm;
3006 let Inst{10-8} = 0b000;
3009 def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm)> {
3011 let Inst{21-19} = 0b001;
3014 def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm)> {
3016 let Inst{21-19} = 0b001;
3019 def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm)> {
3021 let Inst{21-20} = 0b01;
3024 def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm)> {
3026 let Inst{21-20} = 0b01;
3029 def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm)> {
3034 def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm)> {
3039 class MVE_VSHL_imm<string suffix, dag imm>
3040 : MVE_shift_with_imm<"vshl", suffix, (outs MQPR:$Qd),
3041 !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
3046 let Inst{25-24} = 0b11;
3047 let Inst{21-16} = imm;
3048 let Inst{10-8} = 0b101;
3051 def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm)> {
3052 let Inst{21-19} = 0b001;
3055 def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm)> {
3056 let Inst{21-20} = 0b01;
3059 def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
3063 multiclass MVE_immediate_shift_patterns_inner<
3064 MVEVectorVTInfo VTI, Operand imm_operand_type, SDNode unpred_op,
3065 Intrinsic pred_int, Instruction inst, list<int> unsignedFlag = []> {
3067 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src), imm_operand_type:$imm)),
3068 (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm))>;
3070 def : Pat<(VTI.Vec !con((pred_int (VTI.Vec MQPR:$src), imm_operand_type:$imm),
3071 !dag(pred_int, unsignedFlag, ?),
3072 (pred_int (VTI.Pred VCCR:$mask),
3073 (VTI.Vec MQPR:$inactive)))),
3074 (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm,
3075 ARMVCCThen, (VTI.Pred VCCR:$mask),
3076 (VTI.Vec MQPR:$inactive)))>;
3079 multiclass MVE_immediate_shift_patterns<MVEVectorVTInfo VTI,
3080 Operand imm_operand_type> {
3081 defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
3082 ARMvshlImm, int_arm_mve_shl_imm_predicated,
3083 !cast<Instruction>("MVE_VSHL_immi" # VTI.BitsSuffix)>;
3084 defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
3085 ARMvshruImm, int_arm_mve_shr_imm_predicated,
3086 !cast<Instruction>("MVE_VSHR_immu" # VTI.BitsSuffix), [1]>;
3087 defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
3088 ARMvshrsImm, int_arm_mve_shr_imm_predicated,
3089 !cast<Instruction>("MVE_VSHR_imms" # VTI.BitsSuffix), [0]>;
3092 let Predicates = [HasMVEInt] in {
3093 defm : MVE_immediate_shift_patterns<MVE_v16i8, imm0_7>;
3094 defm : MVE_immediate_shift_patterns<MVE_v8i16, imm0_15>;
3095 defm : MVE_immediate_shift_patterns<MVE_v4i32, imm0_31>;
3098 // end of mve_shift instructions
3100 // start of MVE Floating Point instructions
3102 class MVE_float<string iname, string suffix, dag oops, dag iops, string ops,
3103 vpred_ops vpred, string cstr, list<dag> pattern=[]>
3104 : MVE_f<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
3109 let Inst{5} = Qm{3};
3110 let Inst{3-1} = Qm{2-0};
3114 class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size,
3115 list<dag> pattern=[]>
3116 : MVE_float<!strconcat("vrint", rmode), suffix, (outs MQPR:$Qd),
3117 (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
3121 let Inst{25-23} = 0b111;
3122 let Inst{22} = Qd{3};
3123 let Inst{21-20} = 0b11;
3124 let Inst{19-18} = size;
3125 let Inst{17-16} = 0b10;
3126 let Inst{15-13} = Qd{2-0};
3127 let Inst{11-10} = 0b01;
3128 let Inst{9-7} = op{2-0};
3130 let validForTailPredication = 1;
3134 multiclass MVE_VRINT_ops<string suffix, bits<2> size, list<dag> pattern=[]> {
3135 def N : MVE_VRINT<"n", 0b000, suffix, size, pattern>;
3136 def X : MVE_VRINT<"x", 0b001, suffix, size, pattern>;
3137 def A : MVE_VRINT<"a", 0b010, suffix, size, pattern>;
3138 def Z : MVE_VRINT<"z", 0b011, suffix, size, pattern>;
3139 def M : MVE_VRINT<"m", 0b101, suffix, size, pattern>;
3140 def P : MVE_VRINT<"p", 0b111, suffix, size, pattern>;
3143 defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>;
3144 defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>;
3146 let Predicates = [HasMVEFloat] in {
3147 def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))),
3148 (v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>;
3149 def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))),
3150 (v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>;
3151 def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))),
3152 (v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>;
3153 def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))),
3154 (v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>;
3155 def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))),
3156 (v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>;
3157 def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))),
3158 (v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>;
3159 def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))),
3160 (v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>;
3161 def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))),
3162 (v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>;
3163 def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))),
3164 (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>;
3165 def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))),
3166 (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>;
3169 class MVEFloatArithNeon<string iname, string suffix, bit size,
3170 dag oops, dag iops, string ops,
3171 vpred_ops vpred, string cstr, list<dag> pattern=[]>
3172 : MVE_float<iname, suffix, oops, iops, ops, vpred, cstr, pattern> {
3173 let Inst{20} = size;
3177 class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]>
3178 : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
3179 (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", vpred_r, "",
3185 let Inst{25-23} = 0b110;
3186 let Inst{22} = Qd{3};
3188 let Inst{19-17} = Qn{2-0};
3189 let Inst{15-13} = Qd{2-0};
3190 let Inst{12-8} = 0b01101;
3191 let Inst{7} = Qn{3};
3193 let validForTailPredication = 1;
3196 multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
3197 SDNode unpred_op, Intrinsic pred_int> {
3198 def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size{0}>;
3199 defvar Inst = !cast<Instruction>(NAME);
3201 let Predicates = [HasMVEFloat] in {
3202 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
3203 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
3204 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
3205 (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
3206 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
3207 ARMVCCThen, (VTI.Pred VCCR:$mask),
3208 (VTI.Vec MQPR:$inactive)))>;
3212 multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI>
3213 : MVE_VMULT_fp_m<"vmul", 0, VTI, fmul, int_arm_mve_mul_predicated>;
3215 defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32>;
3216 defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16>;
3218 class MVE_VCMLA<string suffix, bit size>
3219 : MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd),
3220 (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
3221 "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", []> {
3228 let Inst{24-23} = rot;
3229 let Inst{22} = Qd{3};
3231 let Inst{19-17} = Qn{2-0};
3232 let Inst{15-13} = Qd{2-0};
3233 let Inst{12-8} = 0b01000;
3234 let Inst{7} = Qn{3};
3238 multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, bit size> {
3239 def "" : MVE_VCMLA<VTI.Suffix, size>;
3240 defvar Inst = !cast<Instruction>(NAME);
3242 let Predicates = [HasMVEFloat] in {
3243 def : Pat<(VTI.Vec (int_arm_mve_vcmlaq
3244 imm:$rot, (VTI.Vec MQPR:$Qd_src),
3245 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
3246 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
3247 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
3250 def : Pat<(VTI.Vec (int_arm_mve_vcmlaq_predicated
3251 imm:$rot, (VTI.Vec MQPR:$Qd_src),
3252 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
3253 (VTI.Pred VCCR:$mask))),
3254 (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qn),
3255 (VTI.Vec MQPR:$Qm), imm:$rot,
3256 ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
3261 defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16, 0b0>;
3262 defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32, 0b1>;
3264 class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
3265 bit bit_8, bit bit_21, dag iops=(ins),
3266 vpred_ops vpred=vpred_r, string cstr="",
3267 list<dag> pattern=[]>
3268 : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
3269 !con(iops, (ins MQPR:$Qn, MQPR:$Qm)), "$Qd, $Qn, $Qm",
3270 vpred, cstr, pattern> {
3275 let Inst{25-23} = 0b110;
3276 let Inst{22} = Qd{3};
3277 let Inst{21} = bit_21;
3278 let Inst{19-17} = Qn{2-0};
3279 let Inst{15-13} = Qd{2-0};
3280 let Inst{11-9} = 0b110;
3281 let Inst{8} = bit_8;
3282 let Inst{7} = Qn{3};
3283 let Inst{4} = bit_4;
3286 def MVE_VFMAf32 : MVE_VADDSUBFMA_fp<"vfma", "f32", 0b0, 0b1, 0b0, 0b0,
3287 (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
3288 def MVE_VFMAf16 : MVE_VADDSUBFMA_fp<"vfma", "f16", 0b1, 0b1, 0b0, 0b0,
3289 (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
3291 def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1,
3292 (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
3293 def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1,
3294 (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
3296 let Predicates = [HasMVEFloat] in {
3297 def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
3298 (v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>;
3299 def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
3300 (v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>;
3301 def : Pat<(v8f16 (fma (fneg (v8f16 MQPR:$src1)), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
3302 (v8f16 (MVE_VFMSf16 $src3, $src1, $src2))>;
3303 def : Pat<(v4f32 (fma (fneg (v4f32 MQPR:$src1)), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
3304 (v4f32 (MVE_VFMSf32 $src3, $src1, $src2))>;
3307 multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
3308 SDNode unpred_op, Intrinsic pred_int> {
3309 def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> {
3310 let validForTailPredication = 1;
3312 defvar Inst = !cast<Instruction>(NAME);
3314 let Predicates = [HasMVEFloat] in {
3315 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
3316 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
3317 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
3318 (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
3319 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
3320 ARMVCCThen, (VTI.Pred VCCR:$mask),
3321 (VTI.Vec MQPR:$inactive)))>;
3325 multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI>
3326 : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated>;
3327 multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI>
3328 : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated>;
3330 defm MVE_VADDf32 : MVE_VADD_fp_m<MVE_v4f32>;
3331 defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16>;
3333 defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32>;
3334 defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16>;
3336 class MVE_VCADD<string suffix, bit size, string cstr="">
3337 : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd),
3338 (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
3339 "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
3348 let Inst{22} = Qd{3};
3350 let Inst{19-17} = Qn{2-0};
3351 let Inst{15-13} = Qd{2-0};
3352 let Inst{12-8} = 0b01000;
3353 let Inst{7} = Qn{3};
3357 multiclass MVE_VCADD_m<MVEVectorVTInfo VTI, bit size, string cstr=""> {
3358 def "" : MVE_VCADD<VTI.Suffix, size, cstr>;
3359 defvar Inst = !cast<Instruction>(NAME);
3361 let Predicates = [HasMVEFloat] in {
3362 def : Pat<(VTI.Vec (int_arm_mve_vcaddq (i32 1),
3363 imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
3364 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
3367 def : Pat<(VTI.Vec (int_arm_mve_vcaddq_predicated (i32 1),
3368 imm:$rot, (VTI.Vec MQPR:$inactive),
3369 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
3370 (VTI.Pred VCCR:$mask))),
3371 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
3372 imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
3373 (VTI.Vec MQPR:$inactive)))>;
3378 defm MVE_VCADDf16 : MVE_VCADD_m<MVE_v8f16, 0b0>;
3379 defm MVE_VCADDf32 : MVE_VCADD_m<MVE_v4f32, 0b1, "@earlyclobber $Qd">;
3381 class MVE_VABD_fp<string suffix, bit size>
3382 : MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
3383 "$Qd, $Qn, $Qm", vpred_r, ""> {
3388 let Inst{25-23} = 0b110;
3389 let Inst{22} = Qd{3};
3391 let Inst{20} = size;
3392 let Inst{19-17} = Qn{2-0};
3394 let Inst{15-13} = Qd{2-0};
3395 let Inst{11-8} = 0b1101;
3396 let Inst{7} = Qn{3};
3398 let validForTailPredication = 1;
3401 multiclass MVE_VABDT_fp_m<MVEVectorVTInfo VTI,
3402 Intrinsic unpred_int, Intrinsic pred_int> {
3403 def "" : MVE_VABD_fp<VTI.Suffix, VTI.Size{0}>;
3404 defvar Inst = !cast<Instruction>(NAME);
3406 let Predicates = [HasMVEFloat] in {
3407 def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
3409 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
3410 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
3411 (i32 0), (VTI.Pred VCCR:$mask),
3412 (VTI.Vec MQPR:$inactive))),
3413 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
3414 ARMVCCThen, (VTI.Pred VCCR:$mask),
3415 (VTI.Vec MQPR:$inactive)))>;
3419 multiclass MVE_VABD_fp_m<MVEVectorVTInfo VTI>
3420 : MVE_VABDT_fp_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
3422 defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>;
3423 defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
3425 class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
3426 Operand imm_operand_type, list<dag> pattern=[]>
3427 : MVE_float<"vcvt", suffix,
3428 (outs MQPR:$Qd), (ins MQPR:$Qm, imm_operand_type:$imm6),
3429 "$Qd, $Qm, $imm6", vpred_r, "", pattern> {
3434 let Inst{25-23} = 0b111;
3435 let Inst{22} = Qd{3};
3437 let Inst{19-16} = imm6{3-0};
3438 let Inst{15-13} = Qd{2-0};
3439 let Inst{11-10} = 0b11;
3445 let DecoderMethod = "DecodeMVEVCVTt1fp";
3446 let validForTailPredication = 1;
3449 class MVE_VCVT_imm_asmop<int Bits> : AsmOperandClass {
3450 let PredicateMethod = "isImmediate<1," # Bits # ">";
3451 let DiagnosticString =
3452 "MVE fixed-point immediate operand must be between 1 and " # Bits;
3453 let Name = "MVEVcvtImm" # Bits;
3454 let RenderMethod = "addImmOperands";
3456 class MVE_VCVT_imm<int Bits>: Operand<i32> {
3457 let ParserMatchClass = MVE_VCVT_imm_asmop<Bits>;
3458 let EncoderMethod = "getNEONVcvtImm32OpValue";
3459 let DecoderMethod = "DecodeVCVTImmOperand";
3462 class MVE_VCVT_fix_f32<string suffix, bit U, bit op>
3463 : MVE_VCVT_fix<suffix, 0b1, U, op, MVE_VCVT_imm<32>> {
3464 let Inst{20} = imm6{4};
3466 class MVE_VCVT_fix_f16<string suffix, bit U, bit op>
3467 : MVE_VCVT_fix<suffix, 0b0, U, op, MVE_VCVT_imm<16>> {
3471 def MVE_VCVTf16s16_fix : MVE_VCVT_fix_f16<"f16.s16", 0b0, 0b0>;
3472 def MVE_VCVTs16f16_fix : MVE_VCVT_fix_f16<"s16.f16", 0b0, 0b1>;
3473 def MVE_VCVTf16u16_fix : MVE_VCVT_fix_f16<"f16.u16", 0b1, 0b0>;
3474 def MVE_VCVTu16f16_fix : MVE_VCVT_fix_f16<"u16.f16", 0b1, 0b1>;
3475 def MVE_VCVTf32s32_fix : MVE_VCVT_fix_f32<"f32.s32", 0b0, 0b0>;
3476 def MVE_VCVTs32f32_fix : MVE_VCVT_fix_f32<"s32.f32", 0b0, 0b1>;
3477 def MVE_VCVTf32u32_fix : MVE_VCVT_fix_f32<"f32.u32", 0b1, 0b0>;
3478 def MVE_VCVTu32f32_fix : MVE_VCVT_fix_f32<"u32.f32", 0b1, 0b1>;
3480 class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm,
3481 bits<2> rm, list<dag> pattern=[]>
3482 : MVE_float<!strconcat("vcvt", anpm), suffix, (outs MQPR:$Qd),
3483 (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
3487 let Inst{25-23} = 0b111;
3488 let Inst{22} = Qd{3};
3489 let Inst{21-20} = 0b11;
3490 let Inst{19-18} = size;
3491 let Inst{17-16} = 0b11;
3492 let Inst{15-13} = Qd{2-0};
3493 let Inst{12-10} = 0b000;
3497 let validForTailPredication = 1;
3500 multiclass MVE_VCVT_fp_int_anpm_multi<string suffix, bits<2> size, bit op,
3501 list<dag> pattern=[]> {
3502 def a : MVE_VCVT_fp_int_anpm<suffix, size, op, "a", 0b00>;
3503 def n : MVE_VCVT_fp_int_anpm<suffix, size, op, "n", 0b01>;
3504 def p : MVE_VCVT_fp_int_anpm<suffix, size, op, "p", 0b10>;
3505 def m : MVE_VCVT_fp_int_anpm<suffix, size, op, "m", 0b11>;
3508 // This defines instructions such as MVE_VCVTu16f16a, with an explicit
3509 // rounding-mode suffix on the mnemonic. The class below will define
3510 // the bare MVE_VCVTu16f16 (with implied rounding toward zero).
3511 defm MVE_VCVTs16f16 : MVE_VCVT_fp_int_anpm_multi<"s16.f16", 0b01, 0b0>;
3512 defm MVE_VCVTu16f16 : MVE_VCVT_fp_int_anpm_multi<"u16.f16", 0b01, 0b1>;
3513 defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_multi<"s32.f32", 0b10, 0b0>;
3514 defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_multi<"u32.f32", 0b10, 0b1>;
3516 class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
3517 list<dag> pattern=[]>
3518 : MVE_float<"vcvt", suffix, (outs MQPR:$Qd),
3519 (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
3523 let Inst{25-23} = 0b111;
3524 let Inst{22} = Qd{3};
3525 let Inst{21-20} = 0b11;
3526 let Inst{19-18} = size;
3527 let Inst{17-16} = 0b11;
3528 let Inst{15-13} = Qd{2-0};
3529 let Inst{12-9} = 0b0011;
3532 let validForTailPredication = 1;
3535 // The unsuffixed VCVT for float->int implicitly rounds toward zero,
3536 // which I reflect here in the llvm instruction names
3537 def MVE_VCVTs16f16z : MVE_VCVT_fp_int<"s16.f16", 0b01, 0b10>;
3538 def MVE_VCVTu16f16z : MVE_VCVT_fp_int<"u16.f16", 0b01, 0b11>;
3539 def MVE_VCVTs32f32z : MVE_VCVT_fp_int<"s32.f32", 0b10, 0b10>;
3540 def MVE_VCVTu32f32z : MVE_VCVT_fp_int<"u32.f32", 0b10, 0b11>;
3541 // Whereas VCVT for int->float rounds to nearest
3542 def MVE_VCVTf16s16n : MVE_VCVT_fp_int<"f16.s16", 0b01, 0b00>;
3543 def MVE_VCVTf16u16n : MVE_VCVT_fp_int<"f16.u16", 0b01, 0b01>;
3544 def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>;
3545 def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>;
3547 let Predicates = [HasMVEFloat] in {
3548 def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))),
3549 (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>;
3550 def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))),
3551 (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>;
3552 def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))),
3553 (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>;
3554 def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))),
3555 (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>;
3556 def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))),
3557 (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>;
3558 def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))),
3559 (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>;
3560 def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))),
3561 (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>;
3562 def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))),
3563 (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>;
3566 class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
3567 list<dag> pattern=[]>
3568 : MVE_float<iname, suffix, (outs MQPR:$Qd),
3569 (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
3573 let Inst{25-23} = 0b111;
3574 let Inst{22} = Qd{3};
3575 let Inst{21-20} = 0b11;
3576 let Inst{19-18} = size;
3577 let Inst{17-16} = 0b01;
3578 let Inst{15-13} = Qd{2-0};
3579 let Inst{11-8} = 0b0111;
3580 let Inst{7} = negate;
3582 let validForTailPredication = 1;
3585 def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>;
3586 def MVE_VABSf32 : MVE_VABSNEG_fp<"vabs", "f32", 0b10, 0b0>;
3588 let Predicates = [HasMVEFloat] in {
3589 def : Pat<(v8f16 (fabs MQPR:$src)),
3590 (MVE_VABSf16 MQPR:$src)>;
3591 def : Pat<(v4f32 (fabs MQPR:$src)),
3592 (MVE_VABSf32 MQPR:$src)>;
3595 def MVE_VNEGf16 : MVE_VABSNEG_fp<"vneg", "f16", 0b01, 0b1>;
3596 def MVE_VNEGf32 : MVE_VABSNEG_fp<"vneg", "f32", 0b10, 0b1>;
3598 let Predicates = [HasMVEFloat] in {
3599 def : Pat<(v8f16 (fneg MQPR:$src)),
3600 (MVE_VNEGf16 MQPR:$src)>;
3601 def : Pat<(v4f32 (fneg MQPR:$src)),
3602 (MVE_VNEGf32 MQPR:$src)>;
3605 class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
3606 list<dag> pattern=[]>
3607 : MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
3608 NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
3613 let Inst{28} = size;
3614 let Inst{25-23} = 0b100;
3615 let Inst{22} = Qd{3};
3616 let Inst{21-16} = 0b111111;
3617 let Inst{15-13} = Qd{2-0};
3618 let Inst{12} = bit_12;
3619 let Inst{11-6} = 0b111010;
3620 let Inst{5} = Qm{3};
3622 let Inst{3-1} = Qm{2-0};
3626 def MVE_VMAXNMAf32 : MVE_VMAXMINNMA<"vmaxnma", "f32", 0b0, 0b0>;
3627 def MVE_VMAXNMAf16 : MVE_VMAXMINNMA<"vmaxnma", "f16", 0b1, 0b0>;
3629 def MVE_VMINNMAf32 : MVE_VMAXMINNMA<"vminnma", "f32", 0b0, 0b1>;
3630 def MVE_VMINNMAf16 : MVE_VMAXMINNMA<"vminnma", "f16", 0b1, 0b1>;
3632 // end of MVE Floating Point instructions
3634 // start of MVE compares
3636 class MVE_VCMPqq<string suffix, bit bit_28, bits<2> bits_21_20,
3637 VCMPPredicateOperand predtype, list<dag> pattern=[]>
3638 : MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, MQPR:$Qm, predtype:$fc),
3639 NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", pattern> {
3640 // Base class for comparing two vector registers
3645 let Inst{28} = bit_28;
3646 let Inst{25-22} = 0b1000;
3647 let Inst{21-20} = bits_21_20;
3648 let Inst{19-17} = Qn{2-0};
3649 let Inst{16-13} = 0b1000;
3650 let Inst{12} = fc{2};
3651 let Inst{11-8} = 0b1111;
3652 let Inst{7} = fc{0};
3654 let Inst{5} = Qm{3};
3656 let Inst{3-1} = Qm{2-0};
3657 let Inst{0} = fc{1};
3659 let Constraints = "";
3661 // We need a custom decoder method for these instructions because of
3662 // the output VCCR operand, which isn't encoded in the instruction
3663 // bits anywhere (there is only one choice for it) but has to be
3664 // included in the MC operands so that codegen will be able to track
3665 // its data flow between instructions, spill/reload it when
3666 // necessary, etc. There seems to be no way to get the Tablegen
3667 // decoder to emit an operand that isn't affected by any instruction
3669 let DecoderMethod = "DecodeMVEVCMP<false," # predtype.DecoderMethod # ">";
3670 let validForTailPredication = 1;
3673 class MVE_VCMPqqf<string suffix, bit size>
3674 : MVE_VCMPqq<suffix, size, 0b11, pred_basic_fp> {
3675 let Predicates = [HasMVEFloat];
3678 class MVE_VCMPqqi<string suffix, bits<2> size>
3679 : MVE_VCMPqq<suffix, 0b1, size, pred_basic_i> {
3684 class MVE_VCMPqqu<string suffix, bits<2> size>
3685 : MVE_VCMPqq<suffix, 0b1, size, pred_basic_u> {
3690 class MVE_VCMPqqs<string suffix, bits<2> size>
3691 : MVE_VCMPqq<suffix, 0b1, size, pred_basic_s> {
3695 def MVE_VCMPf32 : MVE_VCMPqqf<"f32", 0b0>;
3696 def MVE_VCMPf16 : MVE_VCMPqqf<"f16", 0b1>;
3698 def MVE_VCMPi8 : MVE_VCMPqqi<"i8", 0b00>;
3699 def MVE_VCMPi16 : MVE_VCMPqqi<"i16", 0b01>;
3700 def MVE_VCMPi32 : MVE_VCMPqqi<"i32", 0b10>;
3702 def MVE_VCMPu8 : MVE_VCMPqqu<"u8", 0b00>;
3703 def MVE_VCMPu16 : MVE_VCMPqqu<"u16", 0b01>;
3704 def MVE_VCMPu32 : MVE_VCMPqqu<"u32", 0b10>;
3706 def MVE_VCMPs8 : MVE_VCMPqqs<"s8", 0b00>;
3707 def MVE_VCMPs16 : MVE_VCMPqqs<"s16", 0b01>;
3708 def MVE_VCMPs32 : MVE_VCMPqqs<"s32", 0b10>;
3710 class MVE_VCMPqr<string suffix, bit bit_28, bits<2> bits_21_20,
3711 VCMPPredicateOperand predtype, list<dag> pattern=[]>
3712 : MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, GPRwithZR:$Rm, predtype:$fc),
3713 NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", pattern> {
3714 // Base class for comparing a vector register with a scalar
3719 let Inst{28} = bit_28;
3720 let Inst{25-22} = 0b1000;
3721 let Inst{21-20} = bits_21_20;
3722 let Inst{19-17} = Qn{2-0};
3723 let Inst{16-13} = 0b1000;
3724 let Inst{12} = fc{2};
3725 let Inst{11-8} = 0b1111;
3726 let Inst{7} = fc{0};
3728 let Inst{5} = fc{1};
3730 let Inst{3-0} = Rm{3-0};
3732 let Constraints = "";
3733 // Custom decoder method, for the same reason as MVE_VCMPqq
3734 let DecoderMethod = "DecodeMVEVCMP<true," # predtype.DecoderMethod # ">";
3735 let validForTailPredication = 1;
3738 class MVE_VCMPqrf<string suffix, bit size>
3739 : MVE_VCMPqr<suffix, size, 0b11, pred_basic_fp> {
3740 let Predicates = [HasMVEFloat];
3743 class MVE_VCMPqri<string suffix, bits<2> size>
3744 : MVE_VCMPqr<suffix, 0b1, size, pred_basic_i> {
3749 class MVE_VCMPqru<string suffix, bits<2> size>
3750 : MVE_VCMPqr<suffix, 0b1, size, pred_basic_u> {
3755 class MVE_VCMPqrs<string suffix, bits<2> size>
3756 : MVE_VCMPqr<suffix, 0b1, size, pred_basic_s> {
3760 def MVE_VCMPf32r : MVE_VCMPqrf<"f32", 0b0>;
3761 def MVE_VCMPf16r : MVE_VCMPqrf<"f16", 0b1>;
3763 def MVE_VCMPi8r : MVE_VCMPqri<"i8", 0b00>;
3764 def MVE_VCMPi16r : MVE_VCMPqri<"i16", 0b01>;
3765 def MVE_VCMPi32r : MVE_VCMPqri<"i32", 0b10>;
3767 def MVE_VCMPu8r : MVE_VCMPqru<"u8", 0b00>;
3768 def MVE_VCMPu16r : MVE_VCMPqru<"u16", 0b01>;
3769 def MVE_VCMPu32r : MVE_VCMPqru<"u32", 0b10>;
3771 def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>;
3772 def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>;
3773 def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>;
3775 multiclass unpred_vcmp_z<string suffix, PatLeaf fc> {
3776 def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)),
3777 (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>;
3778 def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)),
3779 (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>;
3780 def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)),
3781 (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>;
3783 def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)))),
3784 (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
3785 def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)))),
3786 (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
3787 def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)))),
3788 (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
3791 multiclass unpred_vcmp_r<string suffix, PatLeaf fc> {
3792 def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)),
3793 (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>;
3794 def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)),
3795 (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>;
3796 def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)),
3797 (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>;
3799 def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)),
3800 (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>;
3801 def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)),
3802 (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>;
3803 def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)),
3804 (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>;
3806 def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)))),
3807 (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
3808 def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)))),
3809 (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
3810 def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)))),
3811 (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
3813 def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)))),
3814 (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
3815 def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)))),
3816 (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
3817 def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)))),
3818 (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
3821 multiclass unpred_vcmpf_z<PatLeaf fc> {
3822 def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)),
3823 (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>;
3824 def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)),
3825 (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
3827 def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)))),
3828 (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
3829 def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)))),
3830 (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
3833 multiclass unpred_vcmpf_r<int fc> {
3834 def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)),
3835 (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
3836 def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)),
3837 (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
3839 def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)),
3840 (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>;
3841 def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)),
3842 (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>;
3844 def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)))),
3845 (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
3846 def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)))),
3847 (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
3849 def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)))),
3850 (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>;
3851 def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)))),
3852 (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>;
3855 let Predicates = [HasMVEInt] in {
3856 defm MVE_VCEQZ : unpred_vcmp_z<"i", ARMCCeq>;
3857 defm MVE_VCNEZ : unpred_vcmp_z<"i", ARMCCne>;
3858 defm MVE_VCGEZ : unpred_vcmp_z<"s", ARMCCge>;
3859 defm MVE_VCLTZ : unpred_vcmp_z<"s", ARMCClt>;
3860 defm MVE_VCGTZ : unpred_vcmp_z<"s", ARMCCgt>;
3861 defm MVE_VCLEZ : unpred_vcmp_z<"s", ARMCCle>;
3862 defm MVE_VCGTUZ : unpred_vcmp_z<"u", ARMCChi>;
3863 defm MVE_VCGEUZ : unpred_vcmp_z<"u", ARMCChs>;
3865 defm MVE_VCEQ : unpred_vcmp_r<"i", ARMCCeq>;
3866 defm MVE_VCNE : unpred_vcmp_r<"i", ARMCCne>;
3867 defm MVE_VCGE : unpred_vcmp_r<"s", ARMCCge>;
3868 defm MVE_VCLT : unpred_vcmp_r<"s", ARMCClt>;
3869 defm MVE_VCGT : unpred_vcmp_r<"s", ARMCCgt>;
3870 defm MVE_VCLE : unpred_vcmp_r<"s", ARMCCle>;
3871 defm MVE_VCGTU : unpred_vcmp_r<"u", ARMCChi>;
3872 defm MVE_VCGEU : unpred_vcmp_r<"u", ARMCChs>;
3875 let Predicates = [HasMVEFloat] in {
3876 defm MVE_VFCEQZ : unpred_vcmpf_z<ARMCCeq>;
3877 defm MVE_VFCNEZ : unpred_vcmpf_z<ARMCCne>;
3878 defm MVE_VFCGEZ : unpred_vcmpf_z<ARMCCge>;
3879 defm MVE_VFCLTZ : unpred_vcmpf_z<ARMCClt>;
3880 defm MVE_VFCGTZ : unpred_vcmpf_z<ARMCCgt>;
3881 defm MVE_VFCLEZ : unpred_vcmpf_z<ARMCCle>;
3883 defm MVE_VFCEQ : unpred_vcmpf_r<ARMCCeq>;
3884 defm MVE_VFCNE : unpred_vcmpf_r<ARMCCne>;
3885 defm MVE_VFCGE : unpred_vcmpf_r<ARMCCge>;
3886 defm MVE_VFCLT : unpred_vcmpf_r<ARMCClt>;
3887 defm MVE_VFCGT : unpred_vcmpf_r<ARMCCgt>;
3888 defm MVE_VFCLE : unpred_vcmpf_r<ARMCCle>;
3892 // Extra "worst case" and/or/xor partterns, going into and out of GRP
3893 multiclass two_predops<SDPatternOperator opnode, Instruction insn> {
3894 def v16i1 : Pat<(v16i1 (opnode (v16i1 VCCR:$p1), (v16i1 VCCR:$p2))),
3895 (v16i1 (COPY_TO_REGCLASS
3896 (insn (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p1), rGPR)),
3897 (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p2), rGPR))),
3899 def v8i1 : Pat<(v8i1 (opnode (v8i1 VCCR:$p1), (v8i1 VCCR:$p2))),
3900 (v8i1 (COPY_TO_REGCLASS
3901 (insn (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p1), rGPR)),
3902 (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p2), rGPR))),
3904 def v4i1 : Pat<(v4i1 (opnode (v4i1 VCCR:$p1), (v4i1 VCCR:$p2))),
3905 (v4i1 (COPY_TO_REGCLASS
3906 (insn (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p1), rGPR)),
3907 (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p2), rGPR))),
3911 let Predicates = [HasMVEInt] in {
3912 defm POR : two_predops<or, t2ORRrr>;
3913 defm PAND : two_predops<and, t2ANDrr>;
3914 defm PEOR : two_predops<xor, t2EORrr>;
3917 // Occasionally we need to cast between a i32 and a boolean vector, for
3918 // example when moving between rGPR and VPR.P0 as part of predicate vector
3919 // shuffles. We also sometimes need to cast between different predicate
3920 // vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles.
3922 def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>;
3924 let Predicates = [HasMVEInt] in {
3925 foreach VT = [ v4i1, v8i1, v16i1 ] in {
3926 def : Pat<(i32 (predicate_cast (VT VCCR:$src))),
3927 (i32 (COPY_TO_REGCLASS (VT VCCR:$src), VCCR))>;
3928 def : Pat<(VT (predicate_cast (i32 VCCR:$src))),
3929 (VT (COPY_TO_REGCLASS (i32 VCCR:$src), VCCR))>;
3931 foreach VT2 = [ v4i1, v8i1, v16i1 ] in
3932 def : Pat<(VT (predicate_cast (VT2 VCCR:$src))),
3933 (VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
3937 // end of MVE compares
3939 // start of MVE_qDest_qSrc
3941 class MVE_qDest_qSrc<string iname, string suffix, dag oops, dag iops,
3942 string ops, vpred_ops vpred, string cstr,
3943 list<dag> pattern=[]>
3944 : MVE_p<oops, iops, NoItinerary, iname, suffix,
3945 ops, vpred, cstr, pattern> {
3949 let Inst{25-23} = 0b100;
3950 let Inst{22} = Qd{3};
3951 let Inst{15-13} = Qd{2-0};
3952 let Inst{11-9} = 0b111;
3954 let Inst{5} = Qm{3};
3956 let Inst{3-1} = Qm{2-0};
3959 class MVE_VQxDMLxDH<string iname, bit exch, bit round, bit subtract,
3960 string suffix, bits<2> size, string cstr="", list<dag> pattern=[]>
3961 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
3962 (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
3963 vpred_n, "$Qd = $Qd_src"#cstr, pattern> {
3966 let Inst{28} = subtract;
3967 let Inst{21-20} = size;
3968 let Inst{19-17} = Qn{2-0};
3970 let Inst{12} = exch;
3972 let Inst{7} = Qn{3};
3973 let Inst{0} = round;
3976 multiclass MVE_VQxDMLxDH_multi<string iname, bit exch,
3977 bit round, bit subtract> {
3978 def s8 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s8", 0b00>;
3979 def s16 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s16", 0b01>;
3980 def s32 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s32", 0b10, ",@earlyclobber $Qd">;
3983 defm MVE_VQDMLADH : MVE_VQxDMLxDH_multi<"vqdmladh", 0b0, 0b0, 0b0>;
3984 defm MVE_VQDMLADHX : MVE_VQxDMLxDH_multi<"vqdmladhx", 0b1, 0b0, 0b0>;
3985 defm MVE_VQRDMLADH : MVE_VQxDMLxDH_multi<"vqrdmladh", 0b0, 0b1, 0b0>;
3986 defm MVE_VQRDMLADHX : MVE_VQxDMLxDH_multi<"vqrdmladhx", 0b1, 0b1, 0b0>;
3987 defm MVE_VQDMLSDH : MVE_VQxDMLxDH_multi<"vqdmlsdh", 0b0, 0b0, 0b1>;
3988 defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>;
3989 defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>;
3990 defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>;
3992 class MVE_VCMUL<string iname, string suffix, bit size, string cstr="">
3993 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
3994 (ins MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
3995 "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
3999 let Inst{28} = size;
4000 let Inst{21-20} = 0b11;
4001 let Inst{19-17} = Qn{2-0};
4003 let Inst{12} = rot{1};
4005 let Inst{7} = Qn{3};
4006 let Inst{0} = rot{0};
4008 let Predicates = [HasMVEFloat];
4011 multiclass MVE_VCMUL_m<string iname, MVEVectorVTInfo VTI,
4012 bit size, string cstr=""> {
4013 def "" : MVE_VCMUL<iname, VTI.Suffix, size, cstr>;
4014 defvar Inst = !cast<Instruction>(NAME);
4016 let Predicates = [HasMVEFloat] in {
4017 def : Pat<(VTI.Vec (int_arm_mve_vcmulq
4018 imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
4019 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
4022 def : Pat<(VTI.Vec (int_arm_mve_vcmulq_predicated
4023 imm:$rot, (VTI.Vec MQPR:$inactive),
4024 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
4025 (VTI.Pred VCCR:$mask))),
4026 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
4027 imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
4028 (VTI.Vec MQPR:$inactive)))>;
4033 defm MVE_VCMULf16 : MVE_VCMUL_m<"vcmul", MVE_v8f16, 0b0>;
4034 defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, 0b1, "@earlyclobber $Qd">;
4036 class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
4037 bit T, string cstr, list<dag> pattern=[]>
4038 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
4039 (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
4040 vpred_r, cstr, pattern> {
4045 let Inst{28} = bit_28;
4046 let Inst{21-20} = bits_21_20;
4047 let Inst{19-17} = Qn{2-0};
4051 let Inst{7} = Qn{3};
4053 let validForTailPredication = 1;
4056 multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
4057 SDNode unpred_op, Intrinsic pred_int,
4058 bit Top, string cstr=""> {
4059 def "" : MVE_VMULL<"vmull" # !if(Top, "t", "b"), VTI.Suffix, VTI.Unsigned,
4060 VTI.Size, Top, cstr>;
4061 defvar Inst = !cast<Instruction>(NAME);
4063 let Predicates = [HasMVEInt] in {
4064 defvar uflag = !if(!eq(VTI.SuffixLetter, "p"), (?), (? (i32 VTI.Unsigned)));
4066 // Unpredicated multiply
4067 def : Pat<(VTI.DblVec !con((unpred_op (VTI.Vec MQPR:$Qm),
4068 (VTI.Vec MQPR:$Qn)),
4069 uflag, (? (i32 Top)))),
4070 (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
4072 // Predicated multiply
4073 def : Pat<(VTI.DblVec !con((pred_int (VTI.Vec MQPR:$Qm),
4074 (VTI.Vec MQPR:$Qn)),
4075 uflag, (? (i32 Top), (VTI.Pred VCCR:$mask),
4076 (VTI.DblVec MQPR:$inactive)))),
4077 (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
4078 ARMVCCThen, (VTI.Pred VCCR:$mask),
4079 (VTI.DblVec MQPR:$inactive)))>;
4083 // For polynomial multiplies, the size bits take the unused value 0b11, and
4084 // the unsigned bit switches to encoding the size.
4086 defm MVE_VMULLBs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull,
4087 int_arm_mve_mull_int_predicated, 0b0>;
4088 defm MVE_VMULLTs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull,
4089 int_arm_mve_mull_int_predicated, 0b1>;
4090 defm MVE_VMULLBs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull,
4091 int_arm_mve_mull_int_predicated, 0b0>;
4092 defm MVE_VMULLTs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull,
4093 int_arm_mve_mull_int_predicated, 0b1>;
4094 defm MVE_VMULLBs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull,
4095 int_arm_mve_mull_int_predicated, 0b0,
4096 "@earlyclobber $Qd">;
4097 defm MVE_VMULLTs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull,
4098 int_arm_mve_mull_int_predicated, 0b1,
4099 "@earlyclobber $Qd">;
4101 defm MVE_VMULLBu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull,
4102 int_arm_mve_mull_int_predicated, 0b0>;
4103 defm MVE_VMULLTu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull,
4104 int_arm_mve_mull_int_predicated, 0b1>;
4105 defm MVE_VMULLBu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull,
4106 int_arm_mve_mull_int_predicated, 0b0>;
4107 defm MVE_VMULLTu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull,
4108 int_arm_mve_mull_int_predicated, 0b1>;
4109 defm MVE_VMULLBu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull,
4110 int_arm_mve_mull_int_predicated, 0b0,
4111 "@earlyclobber $Qd">;
4112 defm MVE_VMULLTu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull,
4113 int_arm_mve_mull_int_predicated, 0b1,
4114 "@earlyclobber $Qd">;
4116 defm MVE_VMULLBp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly,
4117 int_arm_mve_mull_poly_predicated, 0b0>;
4118 defm MVE_VMULLTp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly,
4119 int_arm_mve_mull_poly_predicated, 0b1>;
4120 defm MVE_VMULLBp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
4121 int_arm_mve_mull_poly_predicated, 0b0>;
4122 defm MVE_VMULLTp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
4123 int_arm_mve_mull_poly_predicated, 0b1>;
4125 class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size, bit round,
4126 list<dag> pattern=[]>
4127 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
4128 (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
4129 vpred_r, "", pattern> {
4133 let Inst{21-20} = size;
4134 let Inst{19-17} = Qn{2-0};
4136 let Inst{12} = round;
4138 let Inst{7} = Qn{3};
4142 multiclass MVE_VxMULH_m<string iname, MVEVectorVTInfo VTI, SDNode unpred_op,
4143 Intrinsic pred_int, bit round> {
4144 def "" : MVE_VxMULH<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, round>;
4145 defvar Inst = !cast<Instruction>(NAME);
4147 let Predicates = [HasMVEInt] in {
4148 // Unpredicated multiply returning high bits
4149 def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
4150 (i32 VTI.Unsigned))),
4151 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
4153 // Predicated multiply returning high bits
4154 def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
4155 (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
4156 (VTI.Vec MQPR:$inactive))),
4157 (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
4158 ARMVCCThen, (VTI.Pred VCCR:$mask),
4159 (VTI.Vec MQPR:$inactive)))>;
4163 multiclass MVE_VMULT<string iname, MVEVectorVTInfo VTI, bit round>
4164 : MVE_VxMULH_m<iname, VTI, !if(round, int_arm_mve_vrmulh, int_arm_mve_vmulh),
4165 !if(round, int_arm_mve_rmulh_predicated,
4166 int_arm_mve_mulh_predicated),
4169 defm MVE_VMULHs8 : MVE_VMULT<"vmulh", MVE_v16s8, 0b0>;
4170 defm MVE_VMULHs16 : MVE_VMULT<"vmulh", MVE_v8s16, 0b0>;
4171 defm MVE_VMULHs32 : MVE_VMULT<"vmulh", MVE_v4s32, 0b0>;
4172 defm MVE_VMULHu8 : MVE_VMULT<"vmulh", MVE_v16u8, 0b0>;
4173 defm MVE_VMULHu16 : MVE_VMULT<"vmulh", MVE_v8u16, 0b0>;
4174 defm MVE_VMULHu32 : MVE_VMULT<"vmulh", MVE_v4u32, 0b0>;
4176 defm MVE_VRMULHs8 : MVE_VMULT<"vrmulh", MVE_v16s8, 0b1>;
4177 defm MVE_VRMULHs16 : MVE_VMULT<"vrmulh", MVE_v8s16, 0b1>;
4178 defm MVE_VRMULHs32 : MVE_VMULT<"vrmulh", MVE_v4s32, 0b1>;
4179 defm MVE_VRMULHu8 : MVE_VMULT<"vrmulh", MVE_v16u8, 0b1>;
4180 defm MVE_VRMULHu16 : MVE_VMULT<"vrmulh", MVE_v8u16, 0b1>;
4181 defm MVE_VRMULHu32 : MVE_VMULT<"vrmulh", MVE_v4u32, 0b1>;
4183 class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
4184 bits<2> size, bit T, list<dag> pattern=[]>
4185 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
4186 (ins MQPR:$Qd_src, MQPR:$Qm), "$Qd, $Qm",
4187 vpred_n, "$Qd = $Qd_src", pattern> {
4189 let Inst{28} = bit_28;
4190 let Inst{21-20} = 0b11;
4191 let Inst{19-18} = size;
4192 let Inst{17} = bit_17;
4196 let Inst{7} = !if(!eq(bit_17, 0), 1, 0);
4200 multiclass MVE_VxMOVxN_halves<string iname, string suffix,
4201 bit bit_28, bit bit_17, bits<2> size> {
4202 def bh : MVE_VxMOVxN<iname # "b", suffix, bit_28, bit_17, size, 0b0>;
4203 def th : MVE_VxMOVxN<iname # "t", suffix, bit_28, bit_17, size, 0b1>;
4206 defm MVE_VMOVNi16 : MVE_VxMOVxN_halves<"vmovn", "i16", 0b1, 0b0, 0b00>;
4207 defm MVE_VMOVNi32 : MVE_VxMOVxN_halves<"vmovn", "i32", 0b1, 0b0, 0b01>;
4208 defm MVE_VQMOVNs16 : MVE_VxMOVxN_halves<"vqmovn", "s16", 0b0, 0b1, 0b00>;
4209 defm MVE_VQMOVNs32 : MVE_VxMOVxN_halves<"vqmovn", "s32", 0b0, 0b1, 0b01>;
4210 defm MVE_VQMOVNu16 : MVE_VxMOVxN_halves<"vqmovn", "u16", 0b1, 0b1, 0b00>;
4211 defm MVE_VQMOVNu32 : MVE_VxMOVxN_halves<"vqmovn", "u32", 0b1, 0b1, 0b01>;
4212 defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>;
4213 defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>;
4215 def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>;
4216 let Predicates = [HasMVEInt] in {
4217 def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
4218 (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
4219 def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
4220 (v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
4221 def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))),
4222 (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
4223 def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))),
4224 (v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
4227 class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
4228 list<dag> pattern=[]>
4229 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
4230 "$Qd, $Qm", vpred_n, "$Qd = $Qd_src", pattern> {
4232 let Inst{21-16} = 0b111111;
4234 let Inst{8-7} = 0b00;
4237 let Predicates = [HasMVEFloat];
4240 multiclass MVE_VCVT_f2h_m<string iname, int half> {
4241 def "": MVE_VCVT_ff<iname, "f16.f32", 0b0, half>;
4242 defvar Inst = !cast<Instruction>(NAME);
4244 let Predicates = [HasMVEFloat] in {
4245 def : Pat<(v8f16 (int_arm_mve_vcvt_narrow
4246 (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half))),
4247 (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>;
4248 def : Pat<(v8f16 (int_arm_mve_vcvt_narrow_predicated
4249 (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half),
4250 (v4i1 VCCR:$mask))),
4251 (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm),
4252 ARMVCCThen, (v4i1 VCCR:$mask)))>;
4256 multiclass MVE_VCVT_h2f_m<string iname, int half> {
4257 def "": MVE_VCVT_ff<iname, "f32.f16", 0b1, half>;
4260 defm MVE_VCVTf16f32bh : MVE_VCVT_f2h_m<"vcvtb", 0b0>;
4261 defm MVE_VCVTf16f32th : MVE_VCVT_f2h_m<"vcvtt", 0b1>;
4262 defm MVE_VCVTf32f16bh : MVE_VCVT_h2f_m<"vcvtb", 0b0>;
4263 defm MVE_VCVTf32f16th : MVE_VCVT_h2f_m<"vcvtt", 0b1>;
4265 class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
4267 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
4268 (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
4269 "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
4273 let Inst{28} = halve;
4274 let Inst{21-20} = size;
4275 let Inst{19-17} = Qn{2-0};
4279 let Inst{7} = Qn{3};
4283 multiclass MVE_VxCADD_m<string iname, MVEVectorVTInfo VTI,
4284 bit halve, string cstr=""> {
4285 def "" : MVE_VxCADD<iname, VTI.Suffix, VTI.Size, halve, cstr>;
4286 defvar Inst = !cast<Instruction>(NAME);
4288 let Predicates = [HasMVEInt] in {
4289 def : Pat<(VTI.Vec (int_arm_mve_vcaddq halve,
4290 imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
4291 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
4294 def : Pat<(VTI.Vec (int_arm_mve_vcaddq_predicated halve,
4295 imm:$rot, (VTI.Vec MQPR:$inactive),
4296 (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
4297 (VTI.Pred VCCR:$mask))),
4298 (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
4299 imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
4300 (VTI.Vec MQPR:$inactive)))>;
4305 defm MVE_VCADDi8 : MVE_VxCADD_m<"vcadd", MVE_v16i8, 0b1>;
4306 defm MVE_VCADDi16 : MVE_VxCADD_m<"vcadd", MVE_v8i16, 0b1>;
4307 defm MVE_VCADDi32 : MVE_VxCADD_m<"vcadd", MVE_v4i32, 0b1, "@earlyclobber $Qd">;
4309 defm MVE_VHCADDs8 : MVE_VxCADD_m<"vhcadd", MVE_v16s8, 0b0>;
4310 defm MVE_VHCADDs16 : MVE_VxCADD_m<"vhcadd", MVE_v8s16, 0b0>;
4311 defm MVE_VHCADDs32 : MVE_VxCADD_m<"vhcadd", MVE_v4s32, 0b0, "@earlyclobber $Qd">;
4313 class MVE_VADCSBC<string iname, bit I, bit subtract,
4314 dag carryin, list<dag> pattern=[]>
4315 : MVE_qDest_qSrc<iname, "i32", (outs MQPR:$Qd, cl_FPSCR_NZCV:$carryout),
4316 !con((ins MQPR:$Qn, MQPR:$Qm), carryin),
4317 "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
4320 let Inst{28} = subtract;
4321 let Inst{21-20} = 0b11;
4322 let Inst{19-17} = Qn{2-0};
4326 let Inst{7} = Qn{3};
4329 // Custom decoder method in order to add the FPSCR operand(s), which
4330 // Tablegen won't do right
4331 let DecoderMethod = "DecodeMVEVADCInstruction";
4334 def MVE_VADC : MVE_VADCSBC<"vadc", 0b0, 0b0, (ins cl_FPSCR_NZCV:$carryin)>;
4335 def MVE_VADCI : MVE_VADCSBC<"vadci", 0b1, 0b0, (ins)>;
4337 def MVE_VSBC : MVE_VADCSBC<"vsbc", 0b0, 0b1, (ins cl_FPSCR_NZCV:$carryin)>;
4338 def MVE_VSBCI : MVE_VADCSBC<"vsbci", 0b1, 0b1, (ins)>;
4340 class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
4341 string cstr="", list<dag> pattern=[]>
4342 : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
4343 (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
4344 vpred_r, cstr, pattern> {
4347 let Inst{28} = size;
4348 let Inst{21-20} = 0b11;
4349 let Inst{19-17} = Qn{2-0};
4353 let Inst{7} = Qn{3};
4355 let validForTailPredication = 1;
4358 multiclass MVE_VQDMULL_halves<string suffix, bit size, string cstr=""> {
4359 def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0, cstr>;
4360 def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1, cstr>;
4363 defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<"s16", 0b0>;
4364 defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1, "@earlyclobber $Qd">;
4366 // end of mve_qDest_qSrc
4368 // start of mve_qDest_rSrc
4370 class MVE_qr_base<dag oops, dag iops, InstrItinClass itin, string iname,
4371 string suffix, string ops, vpred_ops vpred, string cstr,
4372 list<dag> pattern=[]>
4373 : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
4378 let Inst{25-23} = 0b100;
4379 let Inst{22} = Qd{3};
4380 let Inst{19-17} = Qn{2-0};
4381 let Inst{15-13} = Qd{2-0};
4382 let Inst{11-9} = 0b111;
4383 let Inst{7} = Qn{3};
4386 let Inst{3-0} = Rm{3-0};
4389 class MVE_qDest_rSrc<string iname, string suffix, string cstr="", list<dag> pattern=[]>
4390 : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm),
4391 NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr,
4394 class MVE_qDestSrc_rSrc<string iname, string suffix, list<dag> pattern=[]>
4395 : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, rGPR:$Rm),
4396 NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src",
4399 class MVE_qDest_single_rSrc<string iname, string suffix, list<dag> pattern=[]>
4400 : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, rGPR:$Rm), NoItinerary, iname,
4401 suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", pattern> {
4405 let Inst{22} = Qd{3};
4406 let Inst{15-13} = Qd{2-0};
4407 let Inst{3-0} = Rm{3-0};
4410 class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
4411 bit bit_5, bit bit_12, bit bit_16,
4412 bit bit_28, list<dag> pattern=[]>
4413 : MVE_qDest_rSrc<iname, suffix, "", pattern> {
4415 let Inst{28} = bit_28;
4416 let Inst{21-20} = size;
4417 let Inst{16} = bit_16;
4418 let Inst{12} = bit_12;
4420 let Inst{5} = bit_5;
4421 let validForTailPredication = 1;
4424 multiclass MVE_VADDSUB_qr_sizes<string iname, string suffix,
4425 bit bit_5, bit bit_12, bit bit_16,
4426 bit bit_28, list<dag> pattern=[]> {
4427 def "8" : MVE_VADDSUB_qr<iname, suffix#"8", 0b00,
4428 bit_5, bit_12, bit_16, bit_28>;
4429 def "16" : MVE_VADDSUB_qr<iname, suffix#"16", 0b01,
4430 bit_5, bit_12, bit_16, bit_28>;
4431 def "32" : MVE_VADDSUB_qr<iname, suffix#"32", 0b10,
4432 bit_5, bit_12, bit_16, bit_28>;
4435 defm MVE_VADD_qr_i : MVE_VADDSUB_qr_sizes<"vadd", "i", 0b0, 0b0, 0b1, 0b0>;
4436 defm MVE_VQADD_qr_s : MVE_VADDSUB_qr_sizes<"vqadd", "s", 0b1, 0b0, 0b0, 0b0>;
4437 defm MVE_VQADD_qr_u : MVE_VADDSUB_qr_sizes<"vqadd", "u", 0b1, 0b0, 0b0, 0b1>;
4439 defm MVE_VSUB_qr_i : MVE_VADDSUB_qr_sizes<"vsub", "i", 0b0, 0b1, 0b1, 0b0>;
4440 defm MVE_VQSUB_qr_s : MVE_VADDSUB_qr_sizes<"vqsub", "s", 0b1, 0b1, 0b0, 0b0>;
4441 defm MVE_VQSUB_qr_u : MVE_VADDSUB_qr_sizes<"vqsub", "u", 0b1, 0b1, 0b0, 0b1>;
4443 let Predicates = [HasMVEInt] in {
4444 def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
4445 (v16i8 (MVE_VADD_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
4446 def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
4447 (v8i16 (MVE_VADD_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
4448 def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
4449 (v4i32 (MVE_VADD_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
4452 let Predicates = [HasMVEInt] in {
4453 def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
4454 (v16i8 (MVE_VSUB_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
4455 def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
4456 (v8i16 (MVE_VSUB_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
4457 def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
4458 (v4i32 (MVE_VSUB_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
4461 class MVE_VQDMULL_qr<string iname, string suffix, bit size,
4462 bit T, string cstr="", list<dag> pattern=[]>
4463 : MVE_qDest_rSrc<iname, suffix, cstr, pattern> {
4465 let Inst{28} = size;
4466 let Inst{21-20} = 0b11;
4471 let validForTailPredication = 1;
4474 multiclass MVE_VQDMULL_qr_halves<string suffix, bit size, string cstr=""> {
4475 def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0, cstr>;
4476 def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1, cstr>;
4479 defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<"s16", 0b0>;
4480 defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1, "@earlyclobber $Qd">;
4482 class MVE_VxADDSUB_qr<string iname, string suffix,
4483 bit bit_28, bits<2> bits_21_20, bit subtract,
4484 list<dag> pattern=[]>
4485 : MVE_qDest_rSrc<iname, suffix, "", pattern> {
4487 let Inst{28} = bit_28;
4488 let Inst{21-20} = bits_21_20;
4490 let Inst{12} = subtract;
4493 let validForTailPredication = 1;
4496 def MVE_VHADD_qr_s8 : MVE_VxADDSUB_qr<"vhadd", "s8", 0b0, 0b00, 0b0>;
4497 def MVE_VHADD_qr_s16 : MVE_VxADDSUB_qr<"vhadd", "s16", 0b0, 0b01, 0b0>;
4498 def MVE_VHADD_qr_s32 : MVE_VxADDSUB_qr<"vhadd", "s32", 0b0, 0b10, 0b0>;
4499 def MVE_VHADD_qr_u8 : MVE_VxADDSUB_qr<"vhadd", "u8", 0b1, 0b00, 0b0>;
4500 def MVE_VHADD_qr_u16 : MVE_VxADDSUB_qr<"vhadd", "u16", 0b1, 0b01, 0b0>;
4501 def MVE_VHADD_qr_u32 : MVE_VxADDSUB_qr<"vhadd", "u32", 0b1, 0b10, 0b0>;
4503 def MVE_VHSUB_qr_s8 : MVE_VxADDSUB_qr<"vhsub", "s8", 0b0, 0b00, 0b1>;
4504 def MVE_VHSUB_qr_s16 : MVE_VxADDSUB_qr<"vhsub", "s16", 0b0, 0b01, 0b1>;
4505 def MVE_VHSUB_qr_s32 : MVE_VxADDSUB_qr<"vhsub", "s32", 0b0, 0b10, 0b1>;
4506 def MVE_VHSUB_qr_u8 : MVE_VxADDSUB_qr<"vhsub", "u8", 0b1, 0b00, 0b1>;
4507 def MVE_VHSUB_qr_u16 : MVE_VxADDSUB_qr<"vhsub", "u16", 0b1, 0b01, 0b1>;
4508 def MVE_VHSUB_qr_u32 : MVE_VxADDSUB_qr<"vhsub", "u32", 0b1, 0b10, 0b1>;
4510 let Predicates = [HasMVEFloat] in {
4511 def MVE_VADD_qr_f32 : MVE_VxADDSUB_qr<"vadd", "f32", 0b0, 0b11, 0b0>;
4512 def MVE_VADD_qr_f16 : MVE_VxADDSUB_qr<"vadd", "f16", 0b1, 0b11, 0b0>;
4514 def MVE_VSUB_qr_f32 : MVE_VxADDSUB_qr<"vsub", "f32", 0b0, 0b11, 0b1>;
4515 def MVE_VSUB_qr_f16 : MVE_VxADDSUB_qr<"vsub", "f16", 0b1, 0b11, 0b1>;
4518 class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
4519 bit bit_7, bit bit_17, list<dag> pattern=[]>
4520 : MVE_qDest_single_rSrc<iname, suffix, pattern> {
4523 let Inst{25-23} = 0b100;
4524 let Inst{21-20} = 0b11;
4525 let Inst{19-18} = size;
4526 let Inst{17} = bit_17;
4528 let Inst{12-8} = 0b11110;
4529 let Inst{7} = bit_7;
4530 let Inst{6-4} = 0b110;
4531 let validForTailPredication = 1;
4534 multiclass MVE_VxSHL_qr_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
4535 def "" : MVE_VxSHL_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>;
4536 defvar Inst = !cast<Instruction>(NAME);
4538 def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar
4539 (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
4540 (i32 q), (i32 r), (i32 VTI.Unsigned))),
4541 (VTI.Vec (Inst (VTI.Vec MQPR:$in), (i32 rGPR:$sh)))>;
4543 def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar_predicated
4544 (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
4545 (i32 q), (i32 r), (i32 VTI.Unsigned),
4546 (VTI.Pred VCCR:$mask))),
4547 (VTI.Vec (Inst (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
4548 ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
4551 multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> {
4552 defm s8 : MVE_VxSHL_qr_p<iname, MVE_v16s8, bit_7, bit_17>;
4553 defm s16 : MVE_VxSHL_qr_p<iname, MVE_v8s16, bit_7, bit_17>;
4554 defm s32 : MVE_VxSHL_qr_p<iname, MVE_v4s32, bit_7, bit_17>;
4555 defm u8 : MVE_VxSHL_qr_p<iname, MVE_v16u8, bit_7, bit_17>;
4556 defm u16 : MVE_VxSHL_qr_p<iname, MVE_v8u16, bit_7, bit_17>;
4557 defm u32 : MVE_VxSHL_qr_p<iname, MVE_v4u32, bit_7, bit_17>;
4560 defm MVE_VSHL_qr : MVE_VxSHL_qr_types<"vshl", 0b0, 0b0>;
4561 defm MVE_VRSHL_qr : MVE_VxSHL_qr_types<"vrshl", 0b0, 0b1>;
4562 defm MVE_VQSHL_qr : MVE_VxSHL_qr_types<"vqshl", 0b1, 0b0>;
4563 defm MVE_VQRSHL_qr : MVE_VxSHL_qr_types<"vqrshl", 0b1, 0b1>;
4565 let Predicates = [HasMVEInt] in {
4566 def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
4567 (v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
4568 def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
4569 (v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
4570 def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
4571 (v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
4573 def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
4574 (v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
4575 def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
4576 (v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
4577 def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
4578 (v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
4581 class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
4582 : MVE_qDest_rSrc<iname, suffix, "", pattern> {
4585 let Inst{21-20} = size;
4590 let validForTailPredication = 1;
4593 def MVE_VBRSR8 : MVE_VBRSR<"vbrsr", "8", 0b00>;
4594 def MVE_VBRSR16 : MVE_VBRSR<"vbrsr", "16", 0b01>;
4595 def MVE_VBRSR32 : MVE_VBRSR<"vbrsr", "32", 0b10>;
4597 let Predicates = [HasMVEInt] in {
4598 def : Pat<(v16i8 ( bitreverse (v16i8 MQPR:$val1))),
4599 (v16i8 ( MVE_VBRSR8 (v16i8 MQPR:$val1), (t2MOVi (i32 8)) ))>;
4601 def : Pat<(v4i32 ( bitreverse (v4i32 MQPR:$val1))),
4602 (v4i32 ( MVE_VBRSR32 (v4i32 MQPR:$val1), (t2MOVi (i32 32)) ))>;
4604 def : Pat<(v8i16 ( bitreverse (v8i16 MQPR:$val1))),
4605 (v8i16 ( MVE_VBRSR16 (v8i16 MQPR:$val1), (t2MOVi (i32 16)) ))>;
4608 class MVE_VMUL_qr_int<string iname, string suffix,
4609 bits<2> size, list<dag> pattern=[]>
4610 : MVE_qDest_rSrc<iname, suffix, "", pattern> {
4613 let Inst{21-20} = size;
4618 let validForTailPredication = 1;
4621 def MVE_VMUL_qr_i8 : MVE_VMUL_qr_int<"vmul", "i8", 0b00>;
4622 def MVE_VMUL_qr_i16 : MVE_VMUL_qr_int<"vmul", "i16", 0b01>;
4623 def MVE_VMUL_qr_i32 : MVE_VMUL_qr_int<"vmul", "i32", 0b10>;
4625 let Predicates = [HasMVEInt] in {
4626 def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
4627 (v16i8 (MVE_VMUL_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
4628 def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
4629 (v8i16 (MVE_VMUL_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
4630 def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
4631 (v4i32 (MVE_VMUL_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
4634 class MVE_VxxMUL_qr<string iname, string suffix,
4635 bit bit_28, bits<2> bits_21_20, list<dag> pattern=[]>
4636 : MVE_qDest_rSrc<iname, suffix, "", pattern> {
4638 let Inst{28} = bit_28;
4639 let Inst{21-20} = bits_21_20;
4646 def MVE_VQDMULH_qr_s8 : MVE_VxxMUL_qr<"vqdmulh", "s8", 0b0, 0b00>;
4647 def MVE_VQDMULH_qr_s16 : MVE_VxxMUL_qr<"vqdmulh", "s16", 0b0, 0b01>;
4648 def MVE_VQDMULH_qr_s32 : MVE_VxxMUL_qr<"vqdmulh", "s32", 0b0, 0b10>;
4650 def MVE_VQRDMULH_qr_s8 : MVE_VxxMUL_qr<"vqrdmulh", "s8", 0b1, 0b00>;
4651 def MVE_VQRDMULH_qr_s16 : MVE_VxxMUL_qr<"vqrdmulh", "s16", 0b1, 0b01>;
4652 def MVE_VQRDMULH_qr_s32 : MVE_VxxMUL_qr<"vqrdmulh", "s32", 0b1, 0b10>;
4654 let Predicates = [HasMVEFloat], validForTailPredication = 1 in {
4655 def MVE_VMUL_qr_f16 : MVE_VxxMUL_qr<"vmul", "f16", 0b1, 0b11>;
4656 def MVE_VMUL_qr_f32 : MVE_VxxMUL_qr<"vmul", "f32", 0b0, 0b11>;
4659 class MVE_VFMAMLA_qr<string iname, string suffix,
4660 bit bit_28, bits<2> bits_21_20, bit S,
4661 list<dag> pattern=[]>
4662 : MVE_qDestSrc_rSrc<iname, suffix, pattern> {
4664 let Inst{28} = bit_28;
4665 let Inst{21-20} = bits_21_20;
4670 let validForTailPredication = 1;
4673 def MVE_VMLA_qr_s8 : MVE_VFMAMLA_qr<"vmla", "s8", 0b0, 0b00, 0b0>;
4674 def MVE_VMLA_qr_s16 : MVE_VFMAMLA_qr<"vmla", "s16", 0b0, 0b01, 0b0>;
4675 def MVE_VMLA_qr_s32 : MVE_VFMAMLA_qr<"vmla", "s32", 0b0, 0b10, 0b0>;
4676 def MVE_VMLA_qr_u8 : MVE_VFMAMLA_qr<"vmla", "u8", 0b1, 0b00, 0b0>;
4677 def MVE_VMLA_qr_u16 : MVE_VFMAMLA_qr<"vmla", "u16", 0b1, 0b01, 0b0>;
4678 def MVE_VMLA_qr_u32 : MVE_VFMAMLA_qr<"vmla", "u32", 0b1, 0b10, 0b0>;
4680 def MVE_VMLAS_qr_s8 : MVE_VFMAMLA_qr<"vmlas", "s8", 0b0, 0b00, 0b1>;
4681 def MVE_VMLAS_qr_s16 : MVE_VFMAMLA_qr<"vmlas", "s16", 0b0, 0b01, 0b1>;
4682 def MVE_VMLAS_qr_s32 : MVE_VFMAMLA_qr<"vmlas", "s32", 0b0, 0b10, 0b1>;
4683 def MVE_VMLAS_qr_u8 : MVE_VFMAMLA_qr<"vmlas", "u8", 0b1, 0b00, 0b1>;
4684 def MVE_VMLAS_qr_u16 : MVE_VFMAMLA_qr<"vmlas", "u16", 0b1, 0b01, 0b1>;
4685 def MVE_VMLAS_qr_u32 : MVE_VFMAMLA_qr<"vmlas", "u32", 0b1, 0b10, 0b1>;
4687 let Predicates = [HasMVEInt] in {
4688 def : Pat<(v4i32 (add (v4i32 MQPR:$src1),
4689 (v4i32 (mul (v4i32 MQPR:$src2),
4690 (v4i32 (ARMvdup (i32 rGPR:$x))))))),
4691 (v4i32 (MVE_VMLA_qr_u32 $src1, $src2, $x))>;
4692 def : Pat<(v8i16 (add (v8i16 MQPR:$src1),
4693 (v8i16 (mul (v8i16 MQPR:$src2),
4694 (v8i16 (ARMvdup (i32 rGPR:$x))))))),
4695 (v8i16 (MVE_VMLA_qr_u16 $src1, $src2, $x))>;
4696 def : Pat<(v16i8 (add (v16i8 MQPR:$src1),
4697 (v16i8 (mul (v16i8 MQPR:$src2),
4698 (v16i8 (ARMvdup (i32 rGPR:$x))))))),
4699 (v16i8 (MVE_VMLA_qr_u8 $src1, $src2, $x))>;
4702 let Predicates = [HasMVEFloat] in {
4703 def MVE_VFMA_qr_f16 : MVE_VFMAMLA_qr<"vfma", "f16", 0b1, 0b11, 0b0>;
4704 def MVE_VFMA_qr_f32 : MVE_VFMAMLA_qr<"vfma", "f32", 0b0, 0b11, 0b0>;
4705 def MVE_VFMA_qr_Sf16 : MVE_VFMAMLA_qr<"vfmas", "f16", 0b1, 0b11, 0b1>;
4706 def MVE_VFMA_qr_Sf32 : MVE_VFMAMLA_qr<"vfmas", "f32", 0b0, 0b11, 0b1>;
4709 class MVE_VQDMLAH_qr<string iname, string suffix, bit U, bits<2> size,
4710 bit bit_5, bit bit_12, list<dag> pattern=[]>
4711 : MVE_qDestSrc_rSrc<iname, suffix, pattern> {
4714 let Inst{21-20} = size;
4716 let Inst{12} = bit_12;
4718 let Inst{5} = bit_5;
4721 multiclass MVE_VQDMLAH_qr_types<string iname, bit bit_5, bit bit_12> {
4722 def s8 : MVE_VQDMLAH_qr<iname, "s8", 0b0, 0b00, bit_5, bit_12>;
4723 def s16 : MVE_VQDMLAH_qr<iname, "s16", 0b0, 0b01, bit_5, bit_12>;
4724 def s32 : MVE_VQDMLAH_qr<iname, "s32", 0b0, 0b10, bit_5, bit_12>;
4727 defm MVE_VQDMLAH_qr : MVE_VQDMLAH_qr_types<"vqdmlah", 0b1, 0b0>;
4728 defm MVE_VQRDMLAH_qr : MVE_VQDMLAH_qr_types<"vqrdmlah", 0b0, 0b0>;
4729 defm MVE_VQDMLASH_qr : MVE_VQDMLAH_qr_types<"vqdmlash", 0b1, 0b1>;
4730 defm MVE_VQRDMLASH_qr : MVE_VQDMLAH_qr_types<"vqrdmlash", 0b0, 0b1>;
4732 class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
4733 list<dag> pattern=[]>
4734 : MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
4735 (ins tGPREven:$Rn_src, MVE_VIDUP_imm:$imm), NoItinerary,
4736 iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src",
4743 let Inst{25-23} = 0b100;
4744 let Inst{22} = Qd{3};
4745 let Inst{21-20} = size;
4746 let Inst{19-17} = Rn{3-1};
4748 let Inst{15-13} = Qd{2-0};
4749 let Inst{12} = bit_12;
4750 let Inst{11-8} = 0b1111;
4751 let Inst{7} = imm{1};
4752 let Inst{6-1} = 0b110111;
4753 let Inst{0} = imm{0};
4754 let validForTailPredication = 1;
4757 def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0>;
4758 def MVE_VIDUPu16 : MVE_VxDUP<"vidup", "u16", 0b01, 0b0>;
4759 def MVE_VIDUPu32 : MVE_VxDUP<"vidup", "u32", 0b10, 0b0>;
4761 def MVE_VDDUPu8 : MVE_VxDUP<"vddup", "u8", 0b00, 0b1>;
4762 def MVE_VDDUPu16 : MVE_VxDUP<"vddup", "u16", 0b01, 0b1>;
4763 def MVE_VDDUPu32 : MVE_VxDUP<"vddup", "u32", 0b10, 0b1>;
4765 class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12,
4766 list<dag> pattern=[]>
4767 : MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
4768 (ins tGPREven:$Rn_src, tGPROdd:$Rm, MVE_VIDUP_imm:$imm), NoItinerary,
4769 iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src",
4777 let Inst{25-23} = 0b100;
4778 let Inst{22} = Qd{3};
4779 let Inst{21-20} = size;
4780 let Inst{19-17} = Rn{3-1};
4782 let Inst{15-13} = Qd{2-0};
4783 let Inst{12} = bit_12;
4784 let Inst{11-8} = 0b1111;
4785 let Inst{7} = imm{1};
4786 let Inst{6-4} = 0b110;
4787 let Inst{3-1} = Rm{3-1};
4788 let Inst{0} = imm{0};
4789 let validForTailPredication = 1;
4792 def MVE_VIWDUPu8 : MVE_VxWDUP<"viwdup", "u8", 0b00, 0b0>;
4793 def MVE_VIWDUPu16 : MVE_VxWDUP<"viwdup", "u16", 0b01, 0b0>;
4794 def MVE_VIWDUPu32 : MVE_VxWDUP<"viwdup", "u32", 0b10, 0b0>;
4796 def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>;
4797 def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
4798 def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
4800 let hasSideEffects = 1 in
4801 class MVE_VCTPInst<string suffix, bits<2> size, list<dag> pattern=[]>
4802 : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
4803 "$Rn", vpred_n, "", pattern> {
4806 let Inst{28-27} = 0b10;
4807 let Inst{26-22} = 0b00000;
4808 let Inst{21-20} = size;
4809 let Inst{19-16} = Rn{3-0};
4810 let Inst{15-11} = 0b11101;
4811 let Inst{10-0} = 0b00000000001;
4812 let Unpredictable{10-0} = 0b11111111111;
4814 let Constraints = "";
4815 let DecoderMethod = "DecodeMveVCTP";
4816 let validForTailPredication = 1;
4819 multiclass MVE_VCTP<MVEVectorVTInfo VTI, Intrinsic intr> {
4820 def "": MVE_VCTPInst<VTI.BitsSuffix, VTI.Size>;
4821 defvar Inst = !cast<Instruction>(NAME);
4823 let Predicates = [HasMVEInt] in {
4824 def : Pat<(intr rGPR:$Rn), (VTI.Pred (Inst rGPR:$Rn))>;
4825 def : Pat<(and (intr rGPR:$Rn), (VTI.Pred VCCR:$mask)),
4826 (VTI.Pred (Inst rGPR:$Rn, ARMVCCThen, VCCR:$mask))>;
4830 defm MVE_VCTP8 : MVE_VCTP<MVE_v16i8, int_arm_mve_vctp8>;
4831 defm MVE_VCTP16 : MVE_VCTP<MVE_v8i16, int_arm_mve_vctp16>;
4832 defm MVE_VCTP32 : MVE_VCTP<MVE_v4i32, int_arm_mve_vctp32>;
4833 defm MVE_VCTP64 : MVE_VCTP<MVE_v2i64, int_arm_mve_vctp64>;
4835 // end of mve_qDest_rSrc
4837 // start of coproc mov
4839 class MVE_VMOV_64bit<dag oops, dag iops, bit to_qreg, string ops, string cstr>
4840 : MVE_VMOV_lane_base<oops, !con(iops, (ins MVEPairVectorIndex2:$idx,
4841 MVEPairVectorIndex0:$idx2)),
4842 NoItinerary, "vmov", "", ops, cstr, []> {
4849 let Inst{31-23} = 0b111011000;
4850 let Inst{22} = Qd{3};
4852 let Inst{20} = to_qreg;
4853 let Inst{19-16} = Rt2{3-0};
4854 let Inst{15-13} = Qd{2-0};
4855 let Inst{12-5} = 0b01111000;
4857 let Inst{3-0} = Rt{3-0};
4860 // The assembly syntax for these instructions mentions the vector
4861 // register name twice, e.g.
4863 // vmov q2[2], q2[0], r0, r1
4864 // vmov r0, r1, q2[2], q2[0]
4866 // which needs a bit of juggling with MC operand handling.
4868 // For the move _into_ a vector register, the MC operand list also has
4869 // to mention the register name twice: once as the output, and once as
4870 // an extra input to represent where the unchanged half of the output
4871 // register comes from (when this instruction is used in code
4872 // generation). So we arrange that the first mention of the vector reg
4873 // in the instruction is considered by the AsmMatcher to be the output
4874 // ($Qd), and the second one is the input ($QdSrc). Binding them
4875 // together with the existing 'tie' constraint is enough to enforce at
4876 // register allocation time that they have to be the same register.
4878 // For the move _from_ a vector register, there's no way to get round
4879 // the fact that both instances of that register name have to be
4880 // inputs. They have to be the same register again, but this time, we
4881 // can't use a tie constraint, because that has to be between an
4882 // output and an input operand. So this time, we have to arrange that
4883 // the q-reg appears just once in the MC operand list, in spite of
4884 // being mentioned twice in the asm syntax - which needs a custom
4885 // AsmMatchConverter.
4887 def MVE_VMOV_q_rr : MVE_VMOV_64bit<(outs MQPR:$Qd),
4888 (ins MQPR:$QdSrc, rGPR:$Rt, rGPR:$Rt2),
4889 0b1, "$Qd$idx, $QdSrc$idx2, $Rt, $Rt2",
4891 let DecoderMethod = "DecodeMVEVMOVDRegtoQ";
4894 def MVE_VMOV_rr_q : MVE_VMOV_64bit<(outs rGPR:$Rt, rGPR:$Rt2), (ins MQPR:$Qd),
4895 0b0, "$Rt, $Rt2, $Qd$idx, $Qd$idx2", ""> {
4896 let DecoderMethod = "DecodeMVEVMOVQtoDReg";
4897 let AsmMatchConverter = "cvtMVEVMOVQtoDReg";
4900 // end of coproc mov
4902 // start of MVE interleaving load/store
4904 // Base class for the family of interleaving/deinterleaving
4905 // load/stores with names like VLD20.8 and VST43.32.
4906 class MVE_vldst24_base<bit writeback, bit fourregs, bits<2> stage, bits<2> size,
4907 bit load, dag Oops, dag loadIops, dag wbIops,
4908 string iname, string ops,
4909 string cstr, list<dag> pattern=[]>
4910 : MVE_MI<Oops, !con(loadIops, wbIops), NoItinerary, iname, ops, cstr, pattern> {
4914 let Inst{31-22} = 0b1111110010;
4915 let Inst{21} = writeback;
4916 let Inst{20} = load;
4917 let Inst{19-16} = Rn;
4918 let Inst{15-13} = VQd{2-0};
4919 let Inst{12-9} = 0b1111;
4920 let Inst{8-7} = size;
4921 let Inst{6-5} = stage;
4922 let Inst{4-1} = 0b0000;
4923 let Inst{0} = fourregs;
4926 let mayStore = !eq(load,0);
4929 // A parameter class used to encapsulate all the ways the writeback
4930 // variants of VLD20 and friends differ from the non-writeback ones.
4931 class MVE_vldst24_writeback<bit b, dag Oo, dag Io,
4932 string sy="", string c="", string n=""> {
4938 string id_suffix = n;
4941 // Another parameter class that encapsulates the differences between VLD2x
4943 class MVE_vldst24_nvecs<int n, list<int> s, bit b, RegisterOperand vl> {
4945 list<int> stages = s;
4947 RegisterOperand VecList = vl;
4950 // A third parameter class that distinguishes VLDnn.8 from .16 from .32.
4951 class MVE_vldst24_lanesize<int i, bits<2> b> {
4953 bits<2> sizebits = b;
4956 // A base class for each direction of transfer: one for load, one for
4957 // store. I can't make these a fourth independent parametric tuple
4958 // class, because they have to take the nvecs tuple class as a
4959 // parameter, in order to find the right VecList operand type.
4961 class MVE_vld24_base<MVE_vldst24_nvecs n, bits<2> pat, bits<2> size,
4962 MVE_vldst24_writeback wb, string iname,
4963 list<dag> pattern=[]>
4964 : MVE_vldst24_base<wb.writeback, n.bit0, pat, size, 1,
4965 !con((outs n.VecList:$VQd), wb.Oops),
4966 (ins n.VecList:$VQdSrc), wb.Iops,
4967 iname, "$VQd, $Rn" # wb.syntax,
4968 wb.cstr # ",$VQdSrc = $VQd", pattern>;
4970 class MVE_vst24_base<MVE_vldst24_nvecs n, bits<2> pat, bits<2> size,
4971 MVE_vldst24_writeback wb, string iname,
4972 list<dag> pattern=[]>
4973 : MVE_vldst24_base<wb.writeback, n.bit0, pat, size, 0,
4974 wb.Oops, (ins n.VecList:$VQd), wb.Iops,
4975 iname, "$VQd, $Rn" # wb.syntax,
4978 // Actually define all the interleaving loads and stores, by a series
4979 // of nested foreaches over number of vectors (VLD2/VLD4); stage
4980 // within one of those series (VLDx0/VLDx1/VLDx2/VLDx3); size of
4981 // vector lane; writeback or no writeback.
4982 foreach n = [MVE_vldst24_nvecs<2, [0,1], 0, VecList2Q>,
4983 MVE_vldst24_nvecs<4, [0,1,2,3], 1, VecList4Q>] in
4984 foreach stage = n.stages in
4985 foreach s = [MVE_vldst24_lanesize< 8, 0b00>,
4986 MVE_vldst24_lanesize<16, 0b01>,
4987 MVE_vldst24_lanesize<32, 0b10>] in
4988 foreach wb = [MVE_vldst24_writeback<
4989 1, (outs rGPR:$wb), (ins t2_nosp_addr_offset_none:$Rn),
4990 "!", "$Rn.base = $wb", "_wb">,
4991 MVE_vldst24_writeback<0, (outs), (ins t2_addr_offset_none:$Rn)>] in {
4993 // For each case within all of those foreaches, define the actual
4994 // instructions. The def names are made by gluing together pieces
4995 // from all the parameter classes, and will end up being things like
4996 // MVE_VLD20_8 and MVE_VST43_16_wb.
4998 def "MVE_VLD" # n.nvecs # stage # "_" # s.lanesize # wb.id_suffix
4999 : MVE_vld24_base<n, stage, s.sizebits, wb,
5000 "vld" # n.nvecs # stage # "." # s.lanesize>;
5002 def "MVE_VST" # n.nvecs # stage # "_" # s.lanesize # wb.id_suffix
5003 : MVE_vst24_base<n, stage, s.sizebits, wb,
5004 "vst" # n.nvecs # stage # "." # s.lanesize>;
5007 multiclass MVE_vst24_patterns<int lanesize, ValueType VT> {
5008 foreach stage = [0,1] in
5009 def : Pat<(int_arm_mve_vst2q i32:$addr,
5010 (VT MQPR:$v0), (VT MQPR:$v1), (i32 stage)),
5011 (!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize)
5012 (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
5013 t2_addr_offset_none:$addr)>;
5015 foreach stage = [0,1,2,3] in
5016 def : Pat<(int_arm_mve_vst4q i32:$addr,
5017 (VT MQPR:$v0), (VT MQPR:$v1),
5018 (VT MQPR:$v2), (VT MQPR:$v3), (i32 stage)),
5019 (!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize)
5020 (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
5021 VT:$v2, qsub_2, VT:$v3, qsub_3),
5022 t2_addr_offset_none:$addr)>;
5024 defm : MVE_vst24_patterns<8, v16i8>;
5025 defm : MVE_vst24_patterns<16, v8i16>;
5026 defm : MVE_vst24_patterns<32, v4i32>;
5027 defm : MVE_vst24_patterns<16, v8f16>;
5028 defm : MVE_vst24_patterns<32, v4f32>;
5030 // end of MVE interleaving load/store
5032 // start of MVE predicable load/store
5034 // A parameter class for the direction of transfer.
5035 class MVE_ldst_direction<bit b, dag Oo, dag Io, string c=""> {
5041 def MVE_ld: MVE_ldst_direction<1, (outs MQPR:$Qd), (ins), ",@earlyclobber $Qd">;
5042 def MVE_st: MVE_ldst_direction<0, (outs), (ins MQPR:$Qd)>;
5044 // A parameter class for the size of memory access in a load.
5045 class MVE_memsz<bits<2> e, int s, AddrMode m, string mn, list<string> types> {
5046 bits<2> encoding = e; // opcode bit(s) for encoding
5047 int shift = s; // shift applied to immediate load offset
5050 // For instruction aliases: define the complete list of type
5051 // suffixes at this size, and the canonical ones for loads and
5053 string MnemonicLetter = mn;
5054 int TypeBits = !shl(8, s);
5055 string CanonLoadSuffix = ".u" # TypeBits;
5056 string CanonStoreSuffix = "." # TypeBits;
5057 list<string> suffixes = !foreach(letter, types, "." # letter # TypeBits);
5060 // Instances of MVE_memsz.
5062 // (memD doesn't need an AddrMode, because those are only for
5063 // contiguous loads, and memD is only used by gather/scatters.)
5064 def MVE_memB: MVE_memsz<0b00, 0, AddrModeT2_i7, "b", ["", "u", "s"]>;
5065 def MVE_memH: MVE_memsz<0b01, 1, AddrModeT2_i7s2, "h", ["", "u", "s", "f"]>;
5066 def MVE_memW: MVE_memsz<0b10, 2, AddrModeT2_i7s4, "w", ["", "u", "s", "f"]>;
5067 def MVE_memD: MVE_memsz<0b11, 3, ?, "d", ["", "u", "s", "f"]>;
5069 // This is the base class for all the MVE loads and stores other than
5070 // the interleaving ones. All the non-interleaving loads/stores share
5071 // the characteristic that they operate on just one vector register,
5072 // so they are VPT-predicable.
5074 // The predication operand is vpred_n, for both loads and stores. For
5075 // store instructions, the reason is obvious: if there is no output
5076 // register, there can't be a need for an input parameter giving the
5077 // output register's previous value. Load instructions also don't need
5078 // that input parameter, because unlike MVE data processing
5079 // instructions, predicated loads are defined to set the inactive
5080 // lanes of the output register to zero, instead of preserving their
5082 class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc,
5083 dag oops, dag iops, string asm, string suffix,
5084 string ops, string cstr, list<dag> pattern=[]>
5085 : MVE_p<oops, iops, NoItinerary, asm, suffix, ops, vpred_n, cstr, pattern> {
5093 let Inst{20} = dir.load;
5094 let Inst{15-13} = Qd{2-0};
5096 let Inst{11-9} = 0b111;
5098 let mayLoad = dir.load;
5099 let mayStore = !eq(dir.load,0);
5100 let validForTailPredication = 1;
5103 // Contiguous load and store instructions. These come in two main
5104 // categories: same-size loads/stores in which 128 bits of vector
5105 // register is transferred to or from 128 bits of memory in the most
5106 // obvious way, and widening loads / narrowing stores, in which the
5107 // size of memory accessed is less than the size of a vector register,
5108 // so the load instructions sign- or zero-extend each memory value
5109 // into a wider vector lane, and the store instructions truncate
5112 // The instruction mnemonics for these two classes look reasonably
5113 // similar, but the actual encodings are different enough to need two
5114 // separate base classes.
5116 // Contiguous, same size
5117 class MVE_VLDRSTR_cs<MVE_ldst_direction dir, MVE_memsz memsz, bit P, bit W,
5118 dag oops, dag iops, string asm, string suffix,
5119 IndexMode im, string ops, string cstr>
5120 : MVE_VLDRSTR_base<dir, 0, P, W, 1, oops, iops, asm, suffix, ops, cstr> {
5122 let Inst{23} = addr{7};
5123 let Inst{19-16} = addr{11-8};
5124 let Inst{8-7} = memsz.encoding;
5125 let Inst{6-0} = addr{6-0};
5128 // Contiguous, widening/narrowing
5129 class MVE_VLDRSTR_cw<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
5130 bit P, bit W, bits<2> size, dag oops, dag iops,
5131 string asm, string suffix, IndexMode im,
5132 string ops, string cstr>
5133 : MVE_VLDRSTR_base<dir, U, P, W, 0, oops, iops, asm, suffix, ops, cstr> {
5135 let Inst{23} = addr{7};
5136 let Inst{19} = memsz.encoding{0}; // enough to tell 16- from 32-bit
5137 let Inst{18-16} = addr{10-8};
5138 let Inst{8-7} = size;
5139 let Inst{6-0} = addr{6-0};
5144 // Multiclass wrapper on each of the _cw and _cs base classes, to
5145 // generate three writeback modes (none, preindex, postindex).
5147 multiclass MVE_VLDRSTR_cw_m<MVE_ldst_direction dir, MVE_memsz memsz,
5148 string asm, string suffix, bit U, bits<2> size> {
5149 let AM = memsz.AM in {
5150 def "" : MVE_VLDRSTR_cw<
5151 dir, memsz, U, 1, 0, size,
5152 dir.Oops, !con(dir.Iops, (ins taddrmode_imm7<memsz.shift>:$addr)),
5153 asm, suffix, IndexModeNone, "$Qd, $addr", "">;
5155 def _pre : MVE_VLDRSTR_cw<
5156 dir, memsz, U, 1, 1, size,
5157 !con((outs tGPR:$wb), dir.Oops),
5158 !con(dir.Iops, (ins taddrmode_imm7<memsz.shift>:$addr)),
5159 asm, suffix, IndexModePre, "$Qd, $addr!", "$addr.base = $wb"> {
5160 let DecoderMethod = "DecodeMVE_MEM_1_pre<"#memsz.shift#">";
5163 def _post : MVE_VLDRSTR_cw<
5164 dir, memsz, U, 0, 1, size,
5165 !con((outs tGPR:$wb), dir.Oops),
5166 !con(dir.Iops, (ins t_addr_offset_none:$Rn,
5167 t2am_imm7_offset<memsz.shift>:$addr)),
5168 asm, suffix, IndexModePost, "$Qd, $Rn$addr", "$Rn.base = $wb"> {
5170 let Inst{18-16} = Rn{2-0};
5175 multiclass MVE_VLDRSTR_cs_m<MVE_ldst_direction dir, MVE_memsz memsz,
5176 string asm, string suffix> {
5177 let AM = memsz.AM in {
5178 def "" : MVE_VLDRSTR_cs<
5180 dir.Oops, !con(dir.Iops, (ins t2addrmode_imm7<memsz.shift>:$addr)),
5181 asm, suffix, IndexModeNone, "$Qd, $addr", "">;
5183 def _pre : MVE_VLDRSTR_cs<
5185 !con((outs rGPR:$wb), dir.Oops),
5186 !con(dir.Iops, (ins t2addrmode_imm7_pre<memsz.shift>:$addr)),
5187 asm, suffix, IndexModePre, "$Qd, $addr!", "$addr.base = $wb"> {
5188 let DecoderMethod = "DecodeMVE_MEM_2_pre<"#memsz.shift#">";
5191 def _post : MVE_VLDRSTR_cs<
5193 !con((outs rGPR:$wb), dir.Oops),
5194 // We need an !if here to select the base register class,
5195 // because it's legal to write back to SP in a load of this
5196 // type, but not in a store.
5197 !con(dir.Iops, (ins !if(dir.load, t2_addr_offset_none,
5198 t2_nosp_addr_offset_none):$Rn,
5199 t2am_imm7_offset<memsz.shift>:$addr)),
5200 asm, suffix, IndexModePost, "$Qd, $Rn$addr", "$Rn.base = $wb"> {
5202 let Inst{19-16} = Rn{3-0};
5207 // Now actually declare all the contiguous load/stores, via those
5208 // multiclasses. The instruction ids coming out of this are the bare
5209 // names shown in the defm, with _pre or _post appended for writeback,
5210 // e.g. MVE_VLDRBS16, MVE_VSTRB16_pre, MVE_VSTRHU16_post.
5212 defm MVE_VLDRBS16: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "s16", 0, 0b01>;
5213 defm MVE_VLDRBS32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "s32", 0, 0b10>;
5214 defm MVE_VLDRBU16: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "u16", 1, 0b01>;
5215 defm MVE_VLDRBU32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "u32", 1, 0b10>;
5216 defm MVE_VLDRHS32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memH, "vldrh", "s32", 0, 0b10>;
5217 defm MVE_VLDRHU32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memH, "vldrh", "u32", 1, 0b10>;
5219 defm MVE_VLDRBU8: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memB, "vldrb", "u8">;
5220 defm MVE_VLDRHU16: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memH, "vldrh", "u16">;
5221 defm MVE_VLDRWU32: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memW, "vldrw", "u32">;
5223 defm MVE_VSTRB16: MVE_VLDRSTR_cw_m<MVE_st, MVE_memB, "vstrb", "16", 0, 0b01>;
5224 defm MVE_VSTRB32: MVE_VLDRSTR_cw_m<MVE_st, MVE_memB, "vstrb", "32", 0, 0b10>;
5225 defm MVE_VSTRH32: MVE_VLDRSTR_cw_m<MVE_st, MVE_memH, "vstrh", "32", 0, 0b10>;
5227 defm MVE_VSTRBU8 : MVE_VLDRSTR_cs_m<MVE_st, MVE_memB, "vstrb", "8">;
5228 defm MVE_VSTRHU16: MVE_VLDRSTR_cs_m<MVE_st, MVE_memH, "vstrh", "16">;
5229 defm MVE_VSTRWU32: MVE_VLDRSTR_cs_m<MVE_st, MVE_memW, "vstrw", "32">;
5231 // Gather loads / scatter stores whose address operand is of the form
5232 // [Rn,Qm], i.e. a single GPR as the common base address, plus a
5233 // vector of offset from it. ('Load/store this sequence of elements of
5234 // the same array.')
5236 // Like the contiguous family, these loads and stores can widen the
5237 // loaded values / truncate the stored ones, or they can just
5238 // load/store the same size of memory and vector lane. But unlike the
5239 // contiguous family, there's no particular difference in encoding
5240 // between those two cases.
5242 // This family also comes with the option to scale the offset values
5243 // in Qm by the size of the loaded memory (i.e. to treat them as array
5244 // indices), or not to scale them (to treat them as plain byte offsets
5245 // in memory, so that perhaps the loaded values are unaligned). The
5246 // scaled instructions' address operand in assembly looks like
5247 // [Rn,Qm,UXTW #2] or similar.
5250 class MVE_VLDRSTR_rq<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
5251 bits<2> size, bit os, string asm, string suffix, int shift>
5252 : MVE_VLDRSTR_base<dir, U, 0b0, 0b0, 0, dir.Oops,
5253 !con(dir.Iops, (ins mve_addr_rq_shift<shift>:$addr)),
5254 asm, suffix, "$Qd, $addr", dir.cstr> {
5257 let Inst{19-16} = addr{6-3};
5258 let Inst{8-7} = size;
5259 let Inst{6} = memsz.encoding{1};
5261 let Inst{4} = memsz.encoding{0};
5262 let Inst{3-1} = addr{2-0};
5266 // Multiclass that defines the scaled and unscaled versions of an
5267 // instruction, when the memory size is wider than a byte. The scaled
5268 // version gets the default name like MVE_VLDRBU16_rq; the unscaled /
5269 // potentially unaligned version gets a "_u" suffix, e.g.
5270 // MVE_VLDRBU16_rq_u.
5271 multiclass MVE_VLDRSTR_rq_w<MVE_ldst_direction dir, MVE_memsz memsz,
5272 string asm, string suffix, bit U, bits<2> size> {
5273 def _u : MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;
5274 def "" : MVE_VLDRSTR_rq<dir, memsz, U, size, 1, asm, suffix, memsz.shift>;
5277 // Subclass of MVE_VLDRSTR_rq with the same API as that multiclass,
5278 // for use when the memory size is one byte, so there's no 'scaled'
5279 // version of the instruction at all. (This is encoded as if it were
5280 // unscaled, but named in the default way with no _u suffix.)
5281 class MVE_VLDRSTR_rq_b<MVE_ldst_direction dir, MVE_memsz memsz,
5282 string asm, string suffix, bit U, bits<2> size>
5283 : MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;
5285 // Multiclasses wrapping that to add ISel patterns for intrinsics.
5286 multiclass MVE_VLDR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
5287 defm "": MVE_VLDRSTR_rq_w<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter,
5288 VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>;
5289 defvar Inst = !cast<Instruction>(NAME);
5290 defvar InstU = !cast<Instruction>(NAME # "_u");
5292 foreach VTI = VTIs in
5293 foreach UnsignedFlag = !if(!eq(VTI.Size, memsz.encoding),
5294 [0,1], [VTI.Unsigned]) in {
5295 def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag)),
5296 (VTI.Vec (InstU GPR:$base, MQPR:$offsets))>;
5297 def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag)),
5298 (VTI.Vec (Inst GPR:$base, MQPR:$offsets))>;
5299 def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag, (VTI.Pred VCCR:$pred))),
5300 (VTI.Vec (InstU GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
5301 def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag, (VTI.Pred VCCR:$pred))),
5302 (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
5305 multiclass MVE_VLDR_rq_b<list<MVEVectorVTInfo> VTIs> {
5306 def "": MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb",
5307 VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>;
5308 defvar Inst = !cast<Instruction>(NAME);
5310 foreach VTI = VTIs in {
5311 def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned)),
5312 (VTI.Vec (Inst GPR:$base, MQPR:$offsets))>;
5313 def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned, (VTI.Pred VCCR:$pred))),
5314 (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
5317 multiclass MVE_VSTR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
5318 defm "": MVE_VLDRSTR_rq_w<MVE_st, memsz, "vstr" # memsz.MnemonicLetter,
5319 VTIs[0].BitsSuffix, 0, VTIs[0].Size>;
5320 defvar Inst = !cast<Instruction>(NAME);
5321 defvar InstU = !cast<Instruction>(NAME # "_u");
5323 foreach VTI = VTIs in {
5324 def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0),
5325 (InstU MQPR:$data, GPR:$base, MQPR:$offsets)>;
5326 def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift),
5327 (Inst MQPR:$data, GPR:$base, MQPR:$offsets)>;
5328 def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0, (VTI.Pred VCCR:$pred)),
5329 (InstU MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
5330 def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift, (VTI.Pred VCCR:$pred)),
5331 (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
5334 multiclass MVE_VSTR_rq_b<list<MVEVectorVTInfo> VTIs> {
5335 def "": MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb",
5336 VTIs[0].BitsSuffix, 0, VTIs[0].Size>;
5337 defvar Inst = !cast<Instruction>(NAME);
5339 foreach VTI = VTIs in {
5340 def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0),
5341 (Inst MQPR:$data, GPR:$base, MQPR:$offsets)>;
5342 def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0, (VTI.Pred VCCR:$pred)),
5343 (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
5347 // Actually define all the loads and stores in this family.
5349 defm MVE_VLDRBU8_rq : MVE_VLDR_rq_b<[MVE_v16u8,MVE_v16s8]>;
5350 defm MVE_VLDRBU16_rq: MVE_VLDR_rq_b<[MVE_v8u16]>;
5351 defm MVE_VLDRBS16_rq: MVE_VLDR_rq_b<[MVE_v8s16]>;
5352 defm MVE_VLDRBU32_rq: MVE_VLDR_rq_b<[MVE_v4u32]>;
5353 defm MVE_VLDRBS32_rq: MVE_VLDR_rq_b<[MVE_v4s32]>;
5355 defm MVE_VLDRHU16_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v8u16,MVE_v8s16,MVE_v8f16]>;
5356 defm MVE_VLDRHU32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4u32]>;
5357 defm MVE_VLDRHS32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4s32]>;
5358 defm MVE_VLDRWU32_rq: MVE_VLDR_rq_w<MVE_memW, [MVE_v4u32,MVE_v4s32,MVE_v4f32]>;
5359 defm MVE_VLDRDU64_rq: MVE_VLDR_rq_w<MVE_memD, [MVE_v2u64,MVE_v2s64]>;
5361 defm MVE_VSTRB8_rq : MVE_VSTR_rq_b<[MVE_v16i8]>;
5362 defm MVE_VSTRB16_rq : MVE_VSTR_rq_b<[MVE_v8i16]>;
5363 defm MVE_VSTRB32_rq : MVE_VSTR_rq_b<[MVE_v4i32]>;
5365 defm MVE_VSTRH16_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v8i16,MVE_v8f16]>;
5366 defm MVE_VSTRH32_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v4i32]>;
5367 defm MVE_VSTRW32_rq : MVE_VSTR_rq_w<MVE_memW, [MVE_v4i32,MVE_v4f32]>;
5368 defm MVE_VSTRD64_rq : MVE_VSTR_rq_w<MVE_memD, [MVE_v2i64]>;
5370 // Gather loads / scatter stores whose address operand is of the form
5371 // [Qm,#imm], i.e. a vector containing a full base address for each
5372 // loaded item, plus an immediate offset applied consistently to all
5373 // of them. ('Load/store the same field from this vector of pointers
5374 // to a structure type.')
5376 // This family requires the vector lane size to be at least 32 bits
5377 // (so there's room for an address in each lane at all). It has no
5378 // widening/narrowing variants. But it does support preindex
5379 // writeback, in which the address vector is updated to hold the
5380 // addresses actually loaded from.
5383 class MVE_VLDRSTR_qi<MVE_ldst_direction dir, MVE_memsz memsz, bit W, dag wbops,
5384 string asm, string wbAsm, string suffix, string cstr = "">
5385 : MVE_VLDRSTR_base<dir, 1, 1, W, 1, !con(wbops, dir.Oops),
5386 !con(dir.Iops, (ins mve_addr_q_shift<memsz.shift>:$addr)),
5387 asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr> {
5389 let Inst{23} = addr{7};
5390 let Inst{19-17} = addr{10-8};
5392 let Inst{8} = memsz.encoding{0}; // enough to distinguish 32- from 64-bit
5394 let Inst{6-0} = addr{6-0};
5397 // Multiclass that generates the non-writeback and writeback variants.
5398 multiclass MVE_VLDRSTR_qi_m<MVE_ldst_direction dir, MVE_memsz memsz,
5399 string asm, string suffix> {
5400 def "" : MVE_VLDRSTR_qi<dir, memsz, 0, (outs), asm, "", suffix>;
5401 def _pre : MVE_VLDRSTR_qi<dir, memsz, 1, (outs MQPR:$wb), asm, "!", suffix,
5402 "$addr.base = $wb"> {
5403 let DecoderMethod="DecodeMVE_MEM_3_pre<"#memsz.shift#">";
5407 // Multiclasses wrapping that one, adding selection patterns for the
5408 // non-writeback loads and all the stores. (The writeback loads must
5409 // deliver multiple output values, so they have to be selected by C++
5411 multiclass MVE_VLDR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
5412 list<MVEVectorVTInfo> DVTIs> {
5413 defm "" : MVE_VLDRSTR_qi_m<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter,
5414 "u" # memsz.TypeBits>;
5415 defvar Inst = !cast<Instruction>(NAME);
5417 foreach DVTI = DVTIs in {
5418 def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base
5419 (AVTI.Vec MQPR:$addr), (i32 imm:$offset))),
5420 (DVTI.Vec (Inst (AVTI.Vec MQPR:$addr), (i32 imm:$offset)))>;
5421 def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base_predicated
5422 (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (AVTI.Pred VCCR:$pred))),
5423 (DVTI.Vec (Inst (AVTI.Vec MQPR:$addr), (i32 imm:$offset),
5424 ARMVCCThen, VCCR:$pred))>;
5427 multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
5428 list<MVEVectorVTInfo> DVTIs> {
5429 defm "" : MVE_VLDRSTR_qi_m<MVE_st, memsz, "vstr" # memsz.MnemonicLetter,
5430 !cast<string>(memsz.TypeBits)>;
5431 defvar Inst = !cast<Instruction>(NAME);
5432 defvar InstPre = !cast<Instruction>(NAME # "_pre");
5434 foreach DVTI = DVTIs in {
5435 def : Pat<(int_arm_mve_vstr_scatter_base
5436 (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data)),
5437 (Inst (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
5438 (i32 imm:$offset))>;
5439 def : Pat<(int_arm_mve_vstr_scatter_base_predicated
5440 (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred)),
5441 (Inst (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
5442 (i32 imm:$offset), ARMVCCThen, VCCR:$pred)>;
5443 def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb
5444 (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data))),
5445 (AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
5446 (i32 imm:$offset)))>;
5447 def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb_predicated
5448 (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred))),
5449 (AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
5450 (i32 imm:$offset), ARMVCCThen, VCCR:$pred))>;
5454 // Actual instruction definitions.
5455 defm MVE_VLDRWU32_qi: MVE_VLDR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>;
5456 defm MVE_VLDRDU64_qi: MVE_VLDR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>;
5457 defm MVE_VSTRW32_qi: MVE_VSTR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>;
5458 defm MVE_VSTRD64_qi: MVE_VSTR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>;
5460 // Define aliases for all the instructions where memory size and
5461 // vector lane size are the same. These are mnemonic aliases, so they
5462 // apply consistently across all of the above families - contiguous
5463 // loads, and both the rq and qi types of gather/scatter.
5465 // Rationale: As long as you're loading (for example) 16-bit memory
5466 // values into 16-bit vector lanes, you can think of them as signed or
5467 // unsigned integers, fp16 or just raw 16-bit blobs and it makes no
5468 // difference. So we permit all of vldrh.16, vldrh.u16, vldrh.s16,
5469 // vldrh.f16 and treat them all as equivalent to the canonical
5470 // spelling (which happens to be .u16 for loads, and just .16 for
5473 foreach vpt_cond = ["", "t", "e"] in
5474 foreach memsz = [MVE_memB, MVE_memH, MVE_memW, MVE_memD] in
5475 foreach suffix = memsz.suffixes in {
5476 // Define an alias with every suffix in the list, except for the one
5477 // used by the real Instruction record (i.e. the one that all the
5478 // rest are aliases *for*).
5480 if !ne(suffix, memsz.CanonLoadSuffix) then {
5481 def : MnemonicAlias<
5482 "vldr" # memsz.MnemonicLetter # vpt_cond # suffix,
5483 "vldr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonLoadSuffix>;
5486 if !ne(suffix, memsz.CanonStoreSuffix) then {
5487 def : MnemonicAlias<
5488 "vstr" # memsz.MnemonicLetter # vpt_cond # suffix,
5489 "vstr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonStoreSuffix>;
5493 // end of MVE predicable load/store
5495 class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> pattern=[]>
5496 : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", pattern> {
5501 let Inst{31-23} = 0b111111100;
5502 let Inst{22} = Mk{3};
5503 let Inst{21-20} = size;
5504 let Inst{19-17} = Qn{2-0};
5506 let Inst{15-13} = Mk{2-0};
5507 let Inst{12} = fc{2};
5508 let Inst{11-8} = 0b1111;
5509 let Inst{7} = fc{0};
5515 class MVE_VPTt1<string suffix, bits<2> size, dag iops>
5516 : MVE_VPT<suffix, size, iops, "$fc, $Qn, $Qm"> {
5521 let Inst{5} = Qm{3};
5522 let Inst{3-1} = Qm{2-0};
5523 let Inst{0} = fc{1};
5526 class MVE_VPTt1i<string suffix, bits<2> size>
5527 : MVE_VPTt1<suffix, size,
5528 (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_i:$fc)> {
5533 def MVE_VPTv4i32 : MVE_VPTt1i<"i32", 0b10>;
5534 def MVE_VPTv8i16 : MVE_VPTt1i<"i16", 0b01>;
5535 def MVE_VPTv16i8 : MVE_VPTt1i<"i8", 0b00>;
5537 class MVE_VPTt1u<string suffix, bits<2> size>
5538 : MVE_VPTt1<suffix, size,
5539 (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_u:$fc)> {
5544 def MVE_VPTv4u32 : MVE_VPTt1u<"u32", 0b10>;
5545 def MVE_VPTv8u16 : MVE_VPTt1u<"u16", 0b01>;
5546 def MVE_VPTv16u8 : MVE_VPTt1u<"u8", 0b00>;
5548 class MVE_VPTt1s<string suffix, bits<2> size>
5549 : MVE_VPTt1<suffix, size,
5550 (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_s:$fc)> {
5554 def MVE_VPTv4s32 : MVE_VPTt1s<"s32", 0b10>;
5555 def MVE_VPTv8s16 : MVE_VPTt1s<"s16", 0b01>;
5556 def MVE_VPTv16s8 : MVE_VPTt1s<"s8", 0b00>;
5558 class MVE_VPTt2<string suffix, bits<2> size, dag iops>
5559 : MVE_VPT<suffix, size, iops,
5566 let Inst{5} = fc{1};
5567 let Inst{3-0} = Rm{3-0};
5570 class MVE_VPTt2i<string suffix, bits<2> size>
5571 : MVE_VPTt2<suffix, size,
5572 (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_i:$fc)> {
5577 def MVE_VPTv4i32r : MVE_VPTt2i<"i32", 0b10>;
5578 def MVE_VPTv8i16r : MVE_VPTt2i<"i16", 0b01>;
5579 def MVE_VPTv16i8r : MVE_VPTt2i<"i8", 0b00>;
5581 class MVE_VPTt2u<string suffix, bits<2> size>
5582 : MVE_VPTt2<suffix, size,
5583 (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_u:$fc)> {
5588 def MVE_VPTv4u32r : MVE_VPTt2u<"u32", 0b10>;
5589 def MVE_VPTv8u16r : MVE_VPTt2u<"u16", 0b01>;
5590 def MVE_VPTv16u8r : MVE_VPTt2u<"u8", 0b00>;
5592 class MVE_VPTt2s<string suffix, bits<2> size>
5593 : MVE_VPTt2<suffix, size,
5594 (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_s:$fc)> {
5598 def MVE_VPTv4s32r : MVE_VPTt2s<"s32", 0b10>;
5599 def MVE_VPTv8s16r : MVE_VPTt2s<"s16", 0b01>;
5600 def MVE_VPTv16s8r : MVE_VPTt2s<"s8", 0b00>;
5603 class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=[]>
5604 : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm,
5610 let Inst{31-29} = 0b111;
5611 let Inst{28} = size;
5612 let Inst{27-23} = 0b11100;
5613 let Inst{22} = Mk{3};
5614 let Inst{21-20} = 0b11;
5615 let Inst{19-17} = Qn{2-0};
5617 let Inst{15-13} = Mk{2-0};
5618 let Inst{12} = fc{2};
5619 let Inst{11-8} = 0b1111;
5620 let Inst{7} = fc{0};
5624 let Predicates = [HasMVEFloat];
5627 class MVE_VPTft1<string suffix, bit size>
5628 : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_fp:$fc),
5634 let Inst{5} = Qm{3};
5635 let Inst{3-1} = Qm{2-0};
5636 let Inst{0} = fc{1};
5639 def MVE_VPTv4f32 : MVE_VPTft1<"f32", 0b0>;
5640 def MVE_VPTv8f16 : MVE_VPTft1<"f16", 0b1>;
5642 class MVE_VPTft2<string suffix, bit size>
5643 : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_fp:$fc),
5649 let Inst{5} = fc{1};
5650 let Inst{3-0} = Rm{3-0};
5653 def MVE_VPTv4f32r : MVE_VPTft2<"f32", 0b0>;
5654 def MVE_VPTv8f16r : MVE_VPTft2<"f16", 0b1>;
5656 def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary,
5657 !strconcat("vpst", "${Mk}"), "", "", []> {
5660 let Inst{31-23} = 0b111111100;
5661 let Inst{22} = Mk{3};
5662 let Inst{21-16} = 0b110001;
5663 let Inst{15-13} = Mk{2-0};
5664 let Inst{12-0} = 0b0111101001101;
5665 let Unpredictable{12} = 0b1;
5666 let Unpredictable{7} = 0b1;
5667 let Unpredictable{5} = 0b1;
5670 let validForTailPredication = 1;
5673 def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
5674 "vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", []> {
5680 let Inst{25-23} = 0b100;
5681 let Inst{22} = Qd{3};
5682 let Inst{21-20} = 0b11;
5683 let Inst{19-17} = Qn{2-0};
5685 let Inst{15-13} = Qd{2-0};
5686 let Inst{12-9} = 0b0111;
5688 let Inst{7} = Qn{3};
5690 let Inst{5} = Qm{3};
5692 let Inst{3-1} = Qm{2-0};
5696 foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32",
5697 "i8", "i16", "i32", "f16", "f32"] in
5698 def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm",
5699 (MVE_VPSEL MQPR:$Qd, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
5701 let Predicates = [HasMVEInt] in {
5702 def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
5703 (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
5704 def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
5705 (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
5706 def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
5707 (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
5709 def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
5710 (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
5711 def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
5712 (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
5714 def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
5715 (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
5716 (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), ARMCCne)))>;
5717 def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
5718 (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
5719 (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>;
5720 def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
5721 (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
5722 (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>;
5724 def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
5725 (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
5726 (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>;
5727 def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
5728 (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
5729 (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>;
5732 def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
5733 (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
5734 def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
5735 (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
5736 def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))),
5737 (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
5739 def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))),
5740 (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
5741 def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))),
5742 (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
5743 def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))),
5744 (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
5746 def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))),
5747 (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
5748 def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))),
5749 (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
5750 def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))),
5751 (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
5753 def : Pat<(v16i1 (trunc (v16i8 MQPR:$v1))),
5754 (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, ARMCCne))>;
5755 def : Pat<(v8i1 (trunc (v8i16 MQPR:$v1))),
5756 (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, ARMCCne))>;
5757 def : Pat<(v4i1 (trunc (v4i32 MQPR:$v1))),
5758 (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, ARMCCne))>;
5761 let Predicates = [HasMVEFloat] in {
5763 // 112 is 1.0 in float
5764 def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))),
5765 (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>;
5766 // 2620 in 1.0 in half
5767 def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))),
5768 (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>;
5769 // 240 is -1.0 in float
5770 def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))),
5771 (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>;
5772 // 2748 is -1.0 in half
5773 def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))),
5774 (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>;
5776 def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))),
5777 (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>;
5778 def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))),
5779 (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>;
5780 def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))),
5781 (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>;
5782 def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))),
5783 (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>;
5786 def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary,
5787 "vpnot", "", "", vpred_n, "", []> {
5788 let Inst{31-0} = 0b11111110001100010000111101001101;
5789 let Unpredictable{19-17} = 0b111;
5790 let Unpredictable{12} = 0b1;
5791 let Unpredictable{7} = 0b1;
5792 let Unpredictable{5} = 0b1;
5794 let Constraints = "";
5795 let DecoderMethod = "DecodeMVEVPNOT";
5798 let Predicates = [HasMVEInt] in {
5799 def : Pat<(v4i1 (xor (v4i1 VCCR:$pred), (v4i1 (predicate_cast (i32 65535))))),
5800 (v4i1 (MVE_VPNOT (v4i1 VCCR:$pred)))>;
5801 def : Pat<(v8i1 (xor (v8i1 VCCR:$pred), (v8i1 (predicate_cast (i32 65535))))),
5802 (v8i1 (MVE_VPNOT (v8i1 VCCR:$pred)))>;
5803 def : Pat<(v16i1 (xor (v16i1 VCCR:$pred), (v16i1 (predicate_cast (i32 65535))))),
5804 (v16i1 (MVE_VPNOT (v16i1 VCCR:$pred)))>;
5808 class MVE_loltp_start<dag iops, string asm, string ops, bits<2> size>
5809 : t2LOL<(outs GPRlr:$LR), iops, asm, ops> {
5811 let Predicates = [HasMVEInt];
5813 let Inst{21-20} = size;
5814 let Inst{19-16} = Rn{3-0};
5818 class MVE_DLSTP<string asm, bits<2> size>
5819 : MVE_loltp_start<(ins rGPR:$Rn), asm, "$LR, $Rn", size> {
5821 let Inst{11-1} = 0b00000000000;
5822 let Unpredictable{10-1} = 0b1111111111;
5825 class MVE_WLSTP<string asm, bits<2> size>
5826 : MVE_loltp_start<(ins rGPR:$Rn, wlslabel_u11:$label),
5827 asm, "$LR, $Rn, $label", size> {
5830 let Inst{11} = label{0};
5831 let Inst{10-1} = label{10-1};
5833 let isTerminator = 1;
5836 def MVE_DLSTP_8 : MVE_DLSTP<"dlstp.8", 0b00>;
5837 def MVE_DLSTP_16 : MVE_DLSTP<"dlstp.16", 0b01>;
5838 def MVE_DLSTP_32 : MVE_DLSTP<"dlstp.32", 0b10>;
5839 def MVE_DLSTP_64 : MVE_DLSTP<"dlstp.64", 0b11>;
5841 def MVE_WLSTP_8 : MVE_WLSTP<"wlstp.8", 0b00>;
5842 def MVE_WLSTP_16 : MVE_WLSTP<"wlstp.16", 0b01>;
5843 def MVE_WLSTP_32 : MVE_WLSTP<"wlstp.32", 0b10>;
5844 def MVE_WLSTP_64 : MVE_WLSTP<"wlstp.64", 0b11>;
5846 class MVE_loltp_end<dag oops, dag iops, string asm, string ops>
5847 : t2LOL<oops, iops, asm, ops> {
5848 let Predicates = [HasMVEInt];
5849 let Inst{22-21} = 0b00;
5850 let Inst{19-16} = 0b1111;
5854 def MVE_LETP : MVE_loltp_end<(outs GPRlr:$LRout),
5855 (ins GPRlr:$LRin, lelabel_u11:$label),
5856 "letp", "$LRin, $label"> {
5860 let Inst{11} = label{0};
5861 let Inst{10-1} = label{10-1};
5863 let isTerminator = 1;
5866 def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
5869 let Inst{11-1} = 0b00000000000;
5870 let Unpredictable{21-20} = 0b11;
5871 let Unpredictable{11-1} = 0b11111111111;
5875 //===----------------------------------------------------------------------===//
5877 //===----------------------------------------------------------------------===//
5879 // PatFrags for loads and stores. Often trying to keep semi-consistent names.
5881 def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
5882 (pre_store node:$val, node:$ptr, node:$offset), [{
5883 return cast<StoreSDNode>(N)->getAlignment() >= 4;
5885 def aligned32_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
5886 (post_store node:$val, node:$ptr, node:$offset), [{
5887 return cast<StoreSDNode>(N)->getAlignment() >= 4;
5889 def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
5890 (pre_store node:$val, node:$ptr, node:$offset), [{
5891 return cast<StoreSDNode>(N)->getAlignment() >= 2;
5893 def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
5894 (post_store node:$val, node:$ptr, node:$offset), [{
5895 return cast<StoreSDNode>(N)->getAlignment() >= 2;
5899 def aligned_maskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
5900 (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
5901 auto *Ld = cast<MaskedLoadSDNode>(N);
5902 return Ld->getMemoryVT().getScalarType() == MVT::i8;
5904 def aligned_sextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
5905 (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
5906 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
5908 def aligned_zextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
5909 (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
5910 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
5912 def aligned_extmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
5913 (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
5914 auto *Ld = cast<MaskedLoadSDNode>(N);
5915 EVT ScalarVT = Ld->getMemoryVT().getScalarType();
5916 return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
5918 def aligned_maskedloadvi16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
5919 (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
5920 auto *Ld = cast<MaskedLoadSDNode>(N);
5921 EVT ScalarVT = Ld->getMemoryVT().getScalarType();
5922 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2;
5924 def aligned_sextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
5925 (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
5926 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
5928 def aligned_zextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
5929 (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
5930 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
5932 def aligned_extmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
5933 (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
5934 auto *Ld = cast<MaskedLoadSDNode>(N);
5935 EVT ScalarVT = Ld->getMemoryVT().getScalarType();
5936 return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
5938 def aligned_maskedloadvi32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
5939 (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
5940 auto *Ld = cast<MaskedLoadSDNode>(N);
5941 EVT ScalarVT = Ld->getMemoryVT().getScalarType();
5942 return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4;
5945 def aligned_maskedstvi8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
5946 (masked_st node:$val, node:$ptr, undef, node:$pred), [{
5947 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
5949 def aligned_maskedstvi16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
5950 (masked_st node:$val, node:$ptr, undef, node:$pred), [{
5951 auto *St = cast<MaskedStoreSDNode>(N);
5952 EVT ScalarVT = St->getMemoryVT().getScalarType();
5953 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
5955 def aligned_maskedstvi32 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
5956 (masked_st node:$val, node:$ptr, undef, node:$pred), [{
5957 auto *St = cast<MaskedStoreSDNode>(N);
5958 EVT ScalarVT = St->getMemoryVT().getScalarType();
5959 return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
5962 def pre_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask),
5963 (masked_st node:$val, node:$base, node:$offset, node:$mask), [{
5964 ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
5965 return AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
5967 def post_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask),
5968 (masked_st node:$val, node:$base, node:$offset, node:$mask), [{
5969 ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
5970 return AM == ISD::POST_INC || AM == ISD::POST_DEC;
5972 def aligned_pre_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
5973 (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
5974 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
5976 def aligned_post_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
5977 (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
5978 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
5980 def aligned_pre_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
5981 (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
5982 auto *St = cast<MaskedStoreSDNode>(N);
5983 EVT ScalarVT = St->getMemoryVT().getScalarType();
5984 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
5986 def aligned_post_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
5987 (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
5988 auto *St = cast<MaskedStoreSDNode>(N);
5989 EVT ScalarVT = St->getMemoryVT().getScalarType();
5990 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
5992 def aligned_pre_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
5993 (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
5994 auto *St = cast<MaskedStoreSDNode>(N);
5995 EVT ScalarVT = St->getMemoryVT().getScalarType();
5996 return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
5998 def aligned_post_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
5999 (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
6000 auto *St = cast<MaskedStoreSDNode>(N);
6001 EVT ScalarVT = St->getMemoryVT().getScalarType();
6002 return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
6006 // PatFrags for "Aligned" extending / truncating
6008 def aligned_extloadvi8 : PatFrag<(ops node:$ptr), (extloadvi8 node:$ptr)>;
6009 def aligned_sextloadvi8 : PatFrag<(ops node:$ptr), (sextloadvi8 node:$ptr)>;
6010 def aligned_zextloadvi8 : PatFrag<(ops node:$ptr), (zextloadvi8 node:$ptr)>;
6012 def aligned_truncstvi8 : PatFrag<(ops node:$val, node:$ptr),
6013 (truncstorevi8 node:$val, node:$ptr)>;
6014 def aligned_post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
6015 (post_truncstvi8 node:$val, node:$base, node:$offset)>;
6016 def aligned_pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
6017 (pre_truncstvi8 node:$val, node:$base, node:$offset)>;
6019 let MinAlignment = 2 in {
6020 def aligned_extloadvi16 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>;
6021 def aligned_sextloadvi16 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>;
6022 def aligned_zextloadvi16 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>;
6024 def aligned_truncstvi16 : PatFrag<(ops node:$val, node:$ptr),
6025 (truncstorevi16 node:$val, node:$ptr)>;
6026 def aligned_post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
6027 (post_truncstvi16 node:$val, node:$base, node:$offset)>;
6028 def aligned_pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
6029 (pre_truncstvi16 node:$val, node:$base, node:$offset)>;
6032 def truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$pred),
6033 (masked_st node:$val, node:$base, undef, node:$pred), [{
6034 return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
6036 def aligned_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$pred),
6037 (truncmaskedst node:$val, node:$base, node:$pred), [{
6038 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
6040 def aligned_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$pred),
6041 (truncmaskedst node:$val, node:$base, node:$pred), [{
6042 auto *St = cast<MaskedStoreSDNode>(N);
6043 EVT ScalarVT = St->getMemoryVT().getScalarType();
6044 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
6046 def pre_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
6047 (masked_st node:$val, node:$base, node:$offset, node:$pred), [{
6048 ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
6049 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && (AM == ISD::PRE_INC || AM == ISD::PRE_DEC);
6051 def aligned_pre_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
6052 (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{
6053 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
6055 def aligned_pre_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
6056 (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{
6057 auto *St = cast<MaskedStoreSDNode>(N);
6058 EVT ScalarVT = St->getMemoryVT().getScalarType();
6059 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
6061 def post_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
6062 (masked_st node:$val, node:$base, node:$offset, node:$postd), [{
6063 ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
6064 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && (AM == ISD::POST_INC || AM == ISD::POST_DEC);
6066 def aligned_post_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
6067 (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{
6068 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
6070 def aligned_post_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
6071 (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{
6072 auto *St = cast<MaskedStoreSDNode>(N);
6073 EVT ScalarVT = St->getMemoryVT().getScalarType();
6074 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
6077 // Load/store patterns
6079 class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst,
6080 PatFrag StoreKind, int shift>
6081 : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr),
6082 (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>;
6084 class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst,
6085 PatFrag StoreKind, int shift>
6086 : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred),
6087 (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred)>;
6089 multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind,
6091 def : MVE_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>;
6092 def : MVE_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>;
6093 def : MVE_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>;
6094 def : MVE_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>;
6095 def : MVE_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>;
6096 def : MVE_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>;
6097 def : MVE_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>;
6100 class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
6101 PatFrag LoadKind, int shift>
6102 : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)),
6103 (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>;
6105 class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
6106 PatFrag LoadKind, int shift>
6107 : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))),
6108 (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred))>;
6110 multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
6112 def : MVE_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>;
6113 def : MVE_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>;
6114 def : MVE_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>;
6115 def : MVE_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>;
6116 def : MVE_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>;
6117 def : MVE_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>;
6118 def : MVE_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>;
6121 class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode,
6122 PatFrag StoreKind, int shift>
6123 : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr),
6124 (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>;
6126 class MVE_vector_offset_maskedstore_typed<ValueType Ty, Instruction Opcode,
6127 PatFrag StoreKind, int shift>
6128 : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr, VCCR:$pred),
6129 (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr, ARMVCCThen, VCCR:$pred)>;
6131 multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind,
6133 def : MVE_vector_offset_store_typed<v16i8, RegImmInst, StoreKind, shift>;
6134 def : MVE_vector_offset_store_typed<v8i16, RegImmInst, StoreKind, shift>;
6135 def : MVE_vector_offset_store_typed<v8f16, RegImmInst, StoreKind, shift>;
6136 def : MVE_vector_offset_store_typed<v4i32, RegImmInst, StoreKind, shift>;
6137 def : MVE_vector_offset_store_typed<v4f32, RegImmInst, StoreKind, shift>;
6138 def : MVE_vector_offset_store_typed<v2i64, RegImmInst, StoreKind, shift>;
6139 def : MVE_vector_offset_store_typed<v2f64, RegImmInst, StoreKind, shift>;
6143 let Predicates = [HasMVEInt, IsLE] in {
6145 defm : MVE_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>;
6146 defm : MVE_vector_store<MVE_VSTRHU16, hword_alignedstore, 1>;
6147 defm : MVE_vector_store<MVE_VSTRWU32, alignedstore32, 2>;
6150 defm : MVE_vector_load<MVE_VLDRBU8, byte_alignedload, 0>;
6151 defm : MVE_vector_load<MVE_VLDRHU16, hword_alignedload, 1>;
6152 defm : MVE_vector_load<MVE_VLDRWU32, alignedload32, 2>;
6154 // Pre/post inc stores
6155 defm : MVE_vector_offset_store<MVE_VSTRBU8_pre, pre_store, 0>;
6156 defm : MVE_vector_offset_store<MVE_VSTRBU8_post, post_store, 0>;
6157 defm : MVE_vector_offset_store<MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
6158 defm : MVE_vector_offset_store<MVE_VSTRHU16_post, aligned16_post_store, 1>;
6159 defm : MVE_vector_offset_store<MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
6160 defm : MVE_vector_offset_store<MVE_VSTRWU32_post, aligned32_post_store, 2>;
6163 let Predicates = [HasMVEInt, IsBE] in {
6165 def : MVE_vector_store_typed<v16i8, MVE_VSTRBU8, store, 0>;
6166 def : MVE_vector_store_typed<v8i16, MVE_VSTRHU16, alignedstore16, 1>;
6167 def : MVE_vector_store_typed<v8f16, MVE_VSTRHU16, alignedstore16, 1>;
6168 def : MVE_vector_store_typed<v4i32, MVE_VSTRWU32, alignedstore32, 2>;
6169 def : MVE_vector_store_typed<v4f32, MVE_VSTRWU32, alignedstore32, 2>;
6172 def : MVE_vector_load_typed<v16i8, MVE_VLDRBU8, load, 0>;
6173 def : MVE_vector_load_typed<v8i16, MVE_VLDRHU16, alignedload16, 1>;
6174 def : MVE_vector_load_typed<v8f16, MVE_VLDRHU16, alignedload16, 1>;
6175 def : MVE_vector_load_typed<v4i32, MVE_VLDRWU32, alignedload32, 2>;
6176 def : MVE_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>;
6178 // Other unaligned loads/stores need to go though a VREV
6179 def : Pat<(v2f64 (load t2addrmode_imm7<0>:$addr)),
6180 (v2f64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
6181 def : Pat<(v2i64 (load t2addrmode_imm7<0>:$addr)),
6182 (v2i64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
6183 def : Pat<(v4i32 (load t2addrmode_imm7<0>:$addr)),
6184 (v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
6185 def : Pat<(v4f32 (load t2addrmode_imm7<0>:$addr)),
6186 (v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
6187 def : Pat<(v8i16 (load t2addrmode_imm7<0>:$addr)),
6188 (v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
6189 def : Pat<(v8f16 (load t2addrmode_imm7<0>:$addr)),
6190 (v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
6191 def : Pat<(store (v2f64 MQPR:$val), t2addrmode_imm7<0>:$addr),
6192 (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
6193 def : Pat<(store (v2i64 MQPR:$val), t2addrmode_imm7<0>:$addr),
6194 (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
6195 def : Pat<(store (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr),
6196 (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
6197 def : Pat<(store (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr),
6198 (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
6199 def : Pat<(store (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr),
6200 (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
6201 def : Pat<(store (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr),
6202 (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
6204 // Pre/Post inc stores
6205 def : MVE_vector_offset_store_typed<v16i8, MVE_VSTRBU8_pre, pre_store, 0>;
6206 def : MVE_vector_offset_store_typed<v16i8, MVE_VSTRBU8_post, post_store, 0>;
6207 def : MVE_vector_offset_store_typed<v8i16, MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
6208 def : MVE_vector_offset_store_typed<v8i16, MVE_VSTRHU16_post, aligned16_post_store, 1>;
6209 def : MVE_vector_offset_store_typed<v8f16, MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
6210 def : MVE_vector_offset_store_typed<v8f16, MVE_VSTRHU16_post, aligned16_post_store, 1>;
6211 def : MVE_vector_offset_store_typed<v4i32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
6212 def : MVE_vector_offset_store_typed<v4i32, MVE_VSTRWU32_post, aligned32_post_store, 2>;
6213 def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
6214 def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_post, aligned32_post_store, 2>;
6217 let Predicates = [HasMVEInt] in {
6218 // Aligned masked store, shared between LE and BE
6219 def : MVE_vector_maskedstore_typed<v16i8, MVE_VSTRBU8, aligned_maskedstvi8, 0>;
6220 def : MVE_vector_maskedstore_typed<v8i16, MVE_VSTRHU16, aligned_maskedstvi16, 1>;
6221 def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, aligned_maskedstvi16, 1>;
6222 def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, aligned_maskedstvi32, 2>;
6223 def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, aligned_maskedstvi32, 2>;
6225 // Pre/Post inc masked stores
6226 def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_pre, aligned_pre_maskedstorevi8, 0>;
6227 def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_post, aligned_post_maskedstorevi8, 0>;
6228 def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_pre, aligned_pre_maskedstorevi16, 1>;
6229 def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_post, aligned_post_maskedstorevi16, 1>;
6230 def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_pre, aligned_pre_maskedstorevi16, 1>;
6231 def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_post, aligned_post_maskedstorevi16, 1>;
6232 def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_pre, aligned_pre_maskedstorevi32, 2>;
6233 def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_post, aligned_post_maskedstorevi32, 2>;
6234 def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_pre, aligned_pre_maskedstorevi32, 2>;
6235 def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_post, aligned_post_maskedstorevi32, 2>;
6237 // Aligned masked loads
6238 def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, aligned_maskedloadvi8, 0>;
6239 def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, aligned_maskedloadvi16, 1>;
6240 def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, aligned_maskedloadvi16, 1>;
6241 def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, aligned_maskedloadvi32, 2>;
6242 def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, aligned_maskedloadvi32, 2>;
6245 // Widening/Narrowing Loads/Stores
6247 multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string StoreInst,
6248 string Amble, ValueType VT, int Shift> {
6250 def : Pat<(!cast<PatFrag>("aligned_truncst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr),
6251 (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr)>;
6252 def : Pat<(!cast<PatFrag>("aligned_post_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr),
6253 (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr)>;
6254 def : Pat<(!cast<PatFrag>("aligned_pre_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr),
6255 (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr)>;
6257 // Masked trunc stores
6258 def : Pat<(!cast<PatFrag>("aligned_truncmaskedst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr, VCCR:$pred),
6259 (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
6260 def : Pat<(!cast<PatFrag>("aligned_post_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
6261 (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
6262 def : Pat<(!cast<PatFrag>("aligned_pre_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
6263 (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
6266 def : Pat<(VT (!cast<PatFrag>("aligned_extload"#Amble) taddrmode_imm7<Shift>:$addr)),
6267 (VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
6268 def : Pat<(VT (!cast<PatFrag>("aligned_sextload"#Amble) taddrmode_imm7<Shift>:$addr)),
6269 (VT (LoadSInst taddrmode_imm7<Shift>:$addr))>;
6270 def : Pat<(VT (!cast<PatFrag>("aligned_zextload"#Amble) taddrmode_imm7<Shift>:$addr)),
6271 (VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
6274 def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
6275 (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
6276 def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
6277 (VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
6278 def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
6279 (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
6282 let Predicates = [HasMVEInt] in {
6283 defm : MVEExtLoadStore<MVE_VLDRBS16, MVE_VLDRBU16, "MVE_VSTRB16", "vi8", v8i16, 0>;
6284 defm : MVEExtLoadStore<MVE_VLDRBS32, MVE_VLDRBU32, "MVE_VSTRB32", "vi8", v4i32, 0>;
6285 defm : MVEExtLoadStore<MVE_VLDRHS32, MVE_VLDRHU32, "MVE_VSTRH32", "vi16", v4i32, 1>;
6289 // Bit convert patterns
6291 let Predicates = [HasMVEInt] in {
6292 def : Pat<(v2f64 (bitconvert (v2i64 MQPR:$src))), (v2f64 MQPR:$src)>;
6293 def : Pat<(v2i64 (bitconvert (v2f64 MQPR:$src))), (v2i64 MQPR:$src)>;
6295 def : Pat<(v4i32 (bitconvert (v4f32 MQPR:$src))), (v4i32 MQPR:$src)>;
6296 def : Pat<(v4f32 (bitconvert (v4i32 MQPR:$src))), (v4f32 MQPR:$src)>;
6298 def : Pat<(v8i16 (bitconvert (v8f16 MQPR:$src))), (v8i16 MQPR:$src)>;
6299 def : Pat<(v8f16 (bitconvert (v8i16 MQPR:$src))), (v8f16 MQPR:$src)>;
6302 let Predicates = [IsLE,HasMVEInt] in {
6303 def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 MQPR:$src)>;
6304 def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 MQPR:$src)>;
6305 def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 MQPR:$src)>;
6306 def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 MQPR:$src)>;
6307 def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 MQPR:$src)>;
6309 def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 MQPR:$src)>;
6310 def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 MQPR:$src)>;
6311 def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 MQPR:$src)>;
6312 def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 MQPR:$src)>;
6313 def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 MQPR:$src)>;
6315 def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 MQPR:$src)>;
6316 def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 MQPR:$src)>;
6317 def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 MQPR:$src)>;
6318 def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 MQPR:$src)>;
6319 def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 MQPR:$src)>;
6321 def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 MQPR:$src)>;
6322 def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 MQPR:$src)>;
6323 def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 MQPR:$src)>;
6324 def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 MQPR:$src)>;
6325 def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 MQPR:$src)>;
6327 def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 MQPR:$src)>;
6328 def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 MQPR:$src)>;
6329 def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 MQPR:$src)>;
6330 def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 MQPR:$src)>;
6331 def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 MQPR:$src)>;
6333 def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 MQPR:$src)>;
6334 def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 MQPR:$src)>;
6335 def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 MQPR:$src)>;
6336 def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 MQPR:$src)>;
6337 def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 MQPR:$src)>;
6339 def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 MQPR:$src)>;
6340 def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 MQPR:$src)>;
6341 def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 MQPR:$src)>;
6342 def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 MQPR:$src)>;
6343 def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 MQPR:$src)>;
6344 def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 MQPR:$src)>;
6347 let Predicates = [IsBE,HasMVEInt] in {
6348 def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>;
6349 def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>;
6350 def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>;
6351 def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>;
6352 def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 (MVE_VREV64_8 MQPR:$src))>;
6354 def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>;
6355 def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>;
6356 def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>;
6357 def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>;
6358 def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 (MVE_VREV64_8 MQPR:$src))>;
6360 def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>;
6361 def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>;
6362 def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>;
6363 def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>;
6364 def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 (MVE_VREV32_8 MQPR:$src))>;
6366 def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>;
6367 def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>;
6368 def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>;
6369 def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>;
6370 def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 (MVE_VREV32_8 MQPR:$src))>;
6372 def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>;
6373 def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>;
6374 def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>;
6375 def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>;
6376 def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 (MVE_VREV16_8 MQPR:$src))>;
6378 def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>;
6379 def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>;
6380 def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>;
6381 def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>;
6382 def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 (MVE_VREV16_8 MQPR:$src))>;
6384 def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>;
6385 def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>;
6386 def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>;
6387 def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>;
6388 def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>;
6389 def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>;