1 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the ARM NEON instruction set.
12 //===----------------------------------------------------------------------===//
15 //===----------------------------------------------------------------------===//
16 // NEON-specific Operands.
17 //===----------------------------------------------------------------------===//
18 def nModImm : Operand<i32> {
19 let PrintMethod = "printNEONModImmOperand";
22 def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
23 def nImmSplatI8 : Operand<i32> {
24 let PrintMethod = "printNEONModImmOperand";
25 let ParserMatchClass = nImmSplatI8AsmOperand;
27 def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
28 def nImmSplatI16 : Operand<i32> {
29 let PrintMethod = "printNEONModImmOperand";
30 let ParserMatchClass = nImmSplatI16AsmOperand;
32 def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
33 def nImmSplatI32 : Operand<i32> {
34 let PrintMethod = "printNEONModImmOperand";
35 let ParserMatchClass = nImmSplatI32AsmOperand;
37 def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
38 def nImmSplatNotI16 : Operand<i32> {
39 let ParserMatchClass = nImmSplatNotI16AsmOperand;
41 def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
42 def nImmSplatNotI32 : Operand<i32> {
43 let ParserMatchClass = nImmSplatNotI32AsmOperand;
45 def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
46 def nImmVMOVI32 : Operand<i32> {
47 let PrintMethod = "printNEONModImmOperand";
48 let ParserMatchClass = nImmVMOVI32AsmOperand;
51 class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To>
53 let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate";
54 let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">";
55 let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands";
58 class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To>
60 let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate";
61 let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">";
62 let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands";
65 class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> {
66 let PrintMethod = "printNEONModImmOperand";
67 let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>;
70 class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> {
71 let PrintMethod = "printNEONModImmOperand";
72 let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>;
75 def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
76 def nImmVMOVI32Neg : Operand<i32> {
77 let PrintMethod = "printNEONModImmOperand";
78 let ParserMatchClass = nImmVMOVI32NegAsmOperand;
80 def nImmVMOVF32 : Operand<i32> {
81 let PrintMethod = "printFPImmOperand";
82 let ParserMatchClass = FPImmOperand;
84 def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
85 def nImmSplatI64 : Operand<i32> {
86 let PrintMethod = "printNEONModImmOperand";
87 let ParserMatchClass = nImmSplatI64AsmOperand;
90 def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
91 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
92 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
93 def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; }
94 def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
95 return ((uint64_t)Imm) < 8;
97 let ParserMatchClass = VectorIndex8Operand;
98 let PrintMethod = "printVectorIndex";
99 let MIOperandInfo = (ops i32imm);
101 def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
102 return ((uint64_t)Imm) < 4;
104 let ParserMatchClass = VectorIndex16Operand;
105 let PrintMethod = "printVectorIndex";
106 let MIOperandInfo = (ops i32imm);
108 def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
109 return ((uint64_t)Imm) < 2;
111 let ParserMatchClass = VectorIndex32Operand;
112 let PrintMethod = "printVectorIndex";
113 let MIOperandInfo = (ops i32imm);
115 def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{
116 return ((uint64_t)Imm) < 1;
118 let ParserMatchClass = VectorIndex64Operand;
119 let PrintMethod = "printVectorIndex";
120 let MIOperandInfo = (ops i32imm);
123 // Register list of one D register.
124 def VecListOneDAsmOperand : AsmOperandClass {
125 let Name = "VecListOneD";
126 let ParserMethod = "parseVectorList";
127 let RenderMethod = "addVecListOperands";
129 def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
130 let ParserMatchClass = VecListOneDAsmOperand;
132 // Register list of two sequential D registers.
133 def VecListDPairAsmOperand : AsmOperandClass {
134 let Name = "VecListDPair";
135 let ParserMethod = "parseVectorList";
136 let RenderMethod = "addVecListOperands";
138 def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
139 let ParserMatchClass = VecListDPairAsmOperand;
141 // Register list of three sequential D registers.
142 def VecListThreeDAsmOperand : AsmOperandClass {
143 let Name = "VecListThreeD";
144 let ParserMethod = "parseVectorList";
145 let RenderMethod = "addVecListOperands";
147 def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
148 let ParserMatchClass = VecListThreeDAsmOperand;
150 // Register list of four sequential D registers.
151 def VecListFourDAsmOperand : AsmOperandClass {
152 let Name = "VecListFourD";
153 let ParserMethod = "parseVectorList";
154 let RenderMethod = "addVecListOperands";
156 def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
157 let ParserMatchClass = VecListFourDAsmOperand;
159 // Register list of two D registers spaced by 2 (two sequential Q registers).
160 def VecListDPairSpacedAsmOperand : AsmOperandClass {
161 let Name = "VecListDPairSpaced";
162 let ParserMethod = "parseVectorList";
163 let RenderMethod = "addVecListOperands";
165 def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
166 let ParserMatchClass = VecListDPairSpacedAsmOperand;
168 // Register list of three D registers spaced by 2 (three Q registers).
169 def VecListThreeQAsmOperand : AsmOperandClass {
170 let Name = "VecListThreeQ";
171 let ParserMethod = "parseVectorList";
172 let RenderMethod = "addVecListOperands";
174 def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
175 let ParserMatchClass = VecListThreeQAsmOperand;
177 // Register list of three D registers spaced by 2 (three Q registers).
178 def VecListFourQAsmOperand : AsmOperandClass {
179 let Name = "VecListFourQ";
180 let ParserMethod = "parseVectorList";
181 let RenderMethod = "addVecListOperands";
183 def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
184 let ParserMatchClass = VecListFourQAsmOperand;
187 // Register list of one D register, with "all lanes" subscripting.
188 def VecListOneDAllLanesAsmOperand : AsmOperandClass {
189 let Name = "VecListOneDAllLanes";
190 let ParserMethod = "parseVectorList";
191 let RenderMethod = "addVecListOperands";
193 def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
194 let ParserMatchClass = VecListOneDAllLanesAsmOperand;
196 // Register list of two D registers, with "all lanes" subscripting.
197 def VecListDPairAllLanesAsmOperand : AsmOperandClass {
198 let Name = "VecListDPairAllLanes";
199 let ParserMethod = "parseVectorList";
200 let RenderMethod = "addVecListOperands";
202 def VecListDPairAllLanes : RegisterOperand<DPair,
203 "printVectorListTwoAllLanes"> {
204 let ParserMatchClass = VecListDPairAllLanesAsmOperand;
206 // Register list of two D registers spaced by 2 (two sequential Q registers).
207 def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
208 let Name = "VecListDPairSpacedAllLanes";
209 let ParserMethod = "parseVectorList";
210 let RenderMethod = "addVecListOperands";
212 def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc,
213 "printVectorListTwoSpacedAllLanes"> {
214 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
216 // Register list of three D registers, with "all lanes" subscripting.
217 def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
218 let Name = "VecListThreeDAllLanes";
219 let ParserMethod = "parseVectorList";
220 let RenderMethod = "addVecListOperands";
222 def VecListThreeDAllLanes : RegisterOperand<DPR,
223 "printVectorListThreeAllLanes"> {
224 let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
226 // Register list of three D registers spaced by 2 (three sequential Q regs).
227 def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
228 let Name = "VecListThreeQAllLanes";
229 let ParserMethod = "parseVectorList";
230 let RenderMethod = "addVecListOperands";
232 def VecListThreeQAllLanes : RegisterOperand<DPR,
233 "printVectorListThreeSpacedAllLanes"> {
234 let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
236 // Register list of four D registers, with "all lanes" subscripting.
237 def VecListFourDAllLanesAsmOperand : AsmOperandClass {
238 let Name = "VecListFourDAllLanes";
239 let ParserMethod = "parseVectorList";
240 let RenderMethod = "addVecListOperands";
242 def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
243 let ParserMatchClass = VecListFourDAllLanesAsmOperand;
245 // Register list of four D registers spaced by 2 (four sequential Q regs).
246 def VecListFourQAllLanesAsmOperand : AsmOperandClass {
247 let Name = "VecListFourQAllLanes";
248 let ParserMethod = "parseVectorList";
249 let RenderMethod = "addVecListOperands";
251 def VecListFourQAllLanes : RegisterOperand<DPR,
252 "printVectorListFourSpacedAllLanes"> {
253 let ParserMatchClass = VecListFourQAllLanesAsmOperand;
257 // Register list of one D register, with byte lane subscripting.
258 def VecListOneDByteIndexAsmOperand : AsmOperandClass {
259 let Name = "VecListOneDByteIndexed";
260 let ParserMethod = "parseVectorList";
261 let RenderMethod = "addVecListIndexedOperands";
263 def VecListOneDByteIndexed : Operand<i32> {
264 let ParserMatchClass = VecListOneDByteIndexAsmOperand;
265 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
267 // ...with half-word lane subscripting.
268 def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
269 let Name = "VecListOneDHWordIndexed";
270 let ParserMethod = "parseVectorList";
271 let RenderMethod = "addVecListIndexedOperands";
273 def VecListOneDHWordIndexed : Operand<i32> {
274 let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
277 // ...with word lane subscripting.
278 def VecListOneDWordIndexAsmOperand : AsmOperandClass {
279 let Name = "VecListOneDWordIndexed";
280 let ParserMethod = "parseVectorList";
281 let RenderMethod = "addVecListIndexedOperands";
283 def VecListOneDWordIndexed : Operand<i32> {
284 let ParserMatchClass = VecListOneDWordIndexAsmOperand;
285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
288 // Register list of two D registers with byte lane subscripting.
289 def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
290 let Name = "VecListTwoDByteIndexed";
291 let ParserMethod = "parseVectorList";
292 let RenderMethod = "addVecListIndexedOperands";
294 def VecListTwoDByteIndexed : Operand<i32> {
295 let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
296 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
298 // ...with half-word lane subscripting.
299 def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
300 let Name = "VecListTwoDHWordIndexed";
301 let ParserMethod = "parseVectorList";
302 let RenderMethod = "addVecListIndexedOperands";
304 def VecListTwoDHWordIndexed : Operand<i32> {
305 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
306 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
308 // ...with word lane subscripting.
309 def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
310 let Name = "VecListTwoDWordIndexed";
311 let ParserMethod = "parseVectorList";
312 let RenderMethod = "addVecListIndexedOperands";
314 def VecListTwoDWordIndexed : Operand<i32> {
315 let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
316 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
318 // Register list of two Q registers with half-word lane subscripting.
319 def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
320 let Name = "VecListTwoQHWordIndexed";
321 let ParserMethod = "parseVectorList";
322 let RenderMethod = "addVecListIndexedOperands";
324 def VecListTwoQHWordIndexed : Operand<i32> {
325 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
326 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
328 // ...with word lane subscripting.
329 def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
330 let Name = "VecListTwoQWordIndexed";
331 let ParserMethod = "parseVectorList";
332 let RenderMethod = "addVecListIndexedOperands";
334 def VecListTwoQWordIndexed : Operand<i32> {
335 let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
336 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
340 // Register list of three D registers with byte lane subscripting.
341 def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
342 let Name = "VecListThreeDByteIndexed";
343 let ParserMethod = "parseVectorList";
344 let RenderMethod = "addVecListIndexedOperands";
346 def VecListThreeDByteIndexed : Operand<i32> {
347 let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
348 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
350 // ...with half-word lane subscripting.
351 def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
352 let Name = "VecListThreeDHWordIndexed";
353 let ParserMethod = "parseVectorList";
354 let RenderMethod = "addVecListIndexedOperands";
356 def VecListThreeDHWordIndexed : Operand<i32> {
357 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
360 // ...with word lane subscripting.
361 def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
362 let Name = "VecListThreeDWordIndexed";
363 let ParserMethod = "parseVectorList";
364 let RenderMethod = "addVecListIndexedOperands";
366 def VecListThreeDWordIndexed : Operand<i32> {
367 let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
370 // Register list of three Q registers with half-word lane subscripting.
371 def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
372 let Name = "VecListThreeQHWordIndexed";
373 let ParserMethod = "parseVectorList";
374 let RenderMethod = "addVecListIndexedOperands";
376 def VecListThreeQHWordIndexed : Operand<i32> {
377 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
380 // ...with word lane subscripting.
381 def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
382 let Name = "VecListThreeQWordIndexed";
383 let ParserMethod = "parseVectorList";
384 let RenderMethod = "addVecListIndexedOperands";
386 def VecListThreeQWordIndexed : Operand<i32> {
387 let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
391 // Register list of four D registers with byte lane subscripting.
392 def VecListFourDByteIndexAsmOperand : AsmOperandClass {
393 let Name = "VecListFourDByteIndexed";
394 let ParserMethod = "parseVectorList";
395 let RenderMethod = "addVecListIndexedOperands";
397 def VecListFourDByteIndexed : Operand<i32> {
398 let ParserMatchClass = VecListFourDByteIndexAsmOperand;
399 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
401 // ...with half-word lane subscripting.
402 def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
403 let Name = "VecListFourDHWordIndexed";
404 let ParserMethod = "parseVectorList";
405 let RenderMethod = "addVecListIndexedOperands";
407 def VecListFourDHWordIndexed : Operand<i32> {
408 let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
409 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
411 // ...with word lane subscripting.
412 def VecListFourDWordIndexAsmOperand : AsmOperandClass {
413 let Name = "VecListFourDWordIndexed";
414 let ParserMethod = "parseVectorList";
415 let RenderMethod = "addVecListIndexedOperands";
417 def VecListFourDWordIndexed : Operand<i32> {
418 let ParserMatchClass = VecListFourDWordIndexAsmOperand;
419 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
421 // Register list of four Q registers with half-word lane subscripting.
422 def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
423 let Name = "VecListFourQHWordIndexed";
424 let ParserMethod = "parseVectorList";
425 let RenderMethod = "addVecListIndexedOperands";
427 def VecListFourQHWordIndexed : Operand<i32> {
428 let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
429 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
431 // ...with word lane subscripting.
432 def VecListFourQWordIndexAsmOperand : AsmOperandClass {
433 let Name = "VecListFourQWordIndexed";
434 let ParserMethod = "parseVectorList";
435 let RenderMethod = "addVecListIndexedOperands";
437 def VecListFourQWordIndexed : Operand<i32> {
438 let ParserMatchClass = VecListFourQWordIndexAsmOperand;
439 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
442 def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
443 return cast<LoadSDNode>(N)->getAlignment() >= 8;
445 def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
446 (store node:$val, node:$ptr), [{
447 return cast<StoreSDNode>(N)->getAlignment() >= 8;
449 def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
450 return cast<LoadSDNode>(N)->getAlignment() == 4;
452 def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
453 (store node:$val, node:$ptr), [{
454 return cast<StoreSDNode>(N)->getAlignment() == 4;
456 def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
457 return cast<LoadSDNode>(N)->getAlignment() == 2;
459 def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
460 (store node:$val, node:$ptr), [{
461 return cast<StoreSDNode>(N)->getAlignment() == 2;
463 def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
464 return cast<LoadSDNode>(N)->getAlignment() == 1;
466 def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
467 (store node:$val, node:$ptr), [{
468 return cast<StoreSDNode>(N)->getAlignment() == 1;
470 def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
471 return cast<LoadSDNode>(N)->getAlignment() < 4;
473 def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
474 (store node:$val, node:$ptr), [{
475 return cast<StoreSDNode>(N)->getAlignment() < 4;
478 //===----------------------------------------------------------------------===//
479 // NEON-specific DAG Nodes.
480 //===----------------------------------------------------------------------===//
482 def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
483 def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
485 def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
486 def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
487 def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
488 def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
489 def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
490 def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
491 def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
492 def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
493 def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
494 def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
495 def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
497 // Types for vector shift by immediates. The "SHX" version is for long and
498 // narrow operations where the source and destination vectors have different
499 // types. The "SHINS" version is for shift and insert operations.
500 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
502 def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
504 def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
505 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
507 def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
508 def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
509 def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
510 def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
512 def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
513 def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
514 def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
516 def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
517 def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
518 def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
519 def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
520 def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
521 def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
523 def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
524 def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
525 def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
527 def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
528 def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
530 def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
532 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
533 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
535 def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
536 def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
537 def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
538 def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
540 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
542 def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
543 def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
545 def NEONvbsl : SDNode<"ARMISD::VBSL",
546 SDTypeProfile<1, 3, [SDTCisVec<0>,
549 SDTCisSameAs<0, 3>]>>;
551 def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
553 // VDUPLANE can produce a quad-register result from a double-register source,
554 // so the result is not constrained to match the source.
555 def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
556 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
559 def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
560 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
561 def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
563 def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
564 def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
565 def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
566 def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
568 def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
570 SDTCisSameAs<0, 3>]>;
571 def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
572 def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
573 def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
575 def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
576 SDTCisSameAs<1, 2>]>;
577 def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
578 def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
580 def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
582 def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
583 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
584 def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
585 def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
588 def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
589 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
590 unsigned EltBits = 0;
591 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
592 return (EltBits == 32 && EltVal == 0);
595 def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
596 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
597 unsigned EltBits = 0;
598 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
599 return (EltBits == 8 && EltVal == 0xff);
602 //===----------------------------------------------------------------------===//
603 // NEON load / store instructions
604 //===----------------------------------------------------------------------===//
606 // Use VLDM to load a Q register as a D register pair.
607 // This is a pseudo instruction that is expanded to VLDMD after reg alloc.
609 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
611 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
613 // Use VSTM to store a Q register as a D register pair.
614 // This is a pseudo instruction that is expanded to VSTMD after reg alloc.
616 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
618 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
620 // Classes for VLD* pseudo-instructions with multi-register operands.
621 // These are expanded to real instructions after register allocation.
622 class VLDQPseudo<InstrItinClass itin>
623 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
624 class VLDQWBPseudo<InstrItinClass itin>
625 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
626 (ins addrmode6:$addr, am6offset:$offset), itin,
628 class VLDQWBfixedPseudo<InstrItinClass itin>
629 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
630 (ins addrmode6:$addr), itin,
632 class VLDQWBregisterPseudo<InstrItinClass itin>
633 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
634 (ins addrmode6:$addr, rGPR:$offset), itin,
637 class VLDQQPseudo<InstrItinClass itin>
638 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
639 class VLDQQWBPseudo<InstrItinClass itin>
640 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
641 (ins addrmode6:$addr, am6offset:$offset), itin,
643 class VLDQQWBfixedPseudo<InstrItinClass itin>
644 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
645 (ins addrmode6:$addr), itin,
647 class VLDQQWBregisterPseudo<InstrItinClass itin>
648 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
649 (ins addrmode6:$addr, rGPR:$offset), itin,
653 class VLDQQQQPseudo<InstrItinClass itin>
654 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
656 class VLDQQQQWBPseudo<InstrItinClass itin>
657 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
658 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
659 "$addr.addr = $wb, $src = $dst">;
661 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
663 // VLD1 : Vector Load (multiple single elements)
664 class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
665 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
666 (ins AddrMode:$Rn), IIC_VLD1,
667 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
670 let DecoderMethod = "DecodeVLDST1Instruction";
672 class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
673 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
674 (ins AddrMode:$Rn), IIC_VLD1x2,
675 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
677 let Inst{5-4} = Rn{5-4};
678 let DecoderMethod = "DecodeVLDST1Instruction";
681 def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>;
682 def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
683 def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
684 def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
686 def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>;
687 def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
688 def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
689 def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
691 // ...with address register writeback:
692 multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
693 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
694 (ins AddrMode:$Rn), IIC_VLD1u,
695 "vld1", Dt, "$Vd, $Rn!",
696 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
697 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
699 let DecoderMethod = "DecodeVLDST1Instruction";
701 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
702 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
703 "vld1", Dt, "$Vd, $Rn, $Rm",
704 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
706 let DecoderMethod = "DecodeVLDST1Instruction";
709 multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
710 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
711 (ins AddrMode:$Rn), IIC_VLD1x2u,
712 "vld1", Dt, "$Vd, $Rn!",
713 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
714 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
715 let Inst{5-4} = Rn{5-4};
716 let DecoderMethod = "DecodeVLDST1Instruction";
718 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
719 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
720 "vld1", Dt, "$Vd, $Rn, $Rm",
721 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
722 let Inst{5-4} = Rn{5-4};
723 let DecoderMethod = "DecodeVLDST1Instruction";
727 defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>;
728 defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
729 defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
730 defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
731 defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
732 defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
733 defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
734 defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
736 // ...with 3 registers
737 class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
738 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
739 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
740 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
743 let DecoderMethod = "DecodeVLDST1Instruction";
745 multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
746 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
747 (ins AddrMode:$Rn), IIC_VLD1x2u,
748 "vld1", Dt, "$Vd, $Rn!",
749 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
750 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
752 let DecoderMethod = "DecodeVLDST1Instruction";
754 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
755 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
756 "vld1", Dt, "$Vd, $Rn, $Rm",
757 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
759 let DecoderMethod = "DecodeVLDST1Instruction";
763 def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>;
764 def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
765 def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
766 def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
768 defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>;
769 defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
770 defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
771 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
773 def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
774 def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
775 def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
776 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
777 def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
778 def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
780 def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
781 def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
782 def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
783 def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
784 def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
785 def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
786 def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
787 def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
789 // ...with 4 registers
790 class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
791 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
792 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
793 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
795 let Inst{5-4} = Rn{5-4};
796 let DecoderMethod = "DecodeVLDST1Instruction";
798 multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
799 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
800 (ins AddrMode:$Rn), IIC_VLD1x2u,
801 "vld1", Dt, "$Vd, $Rn!",
802 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
803 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
804 let Inst{5-4} = Rn{5-4};
805 let DecoderMethod = "DecodeVLDST1Instruction";
807 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
808 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
809 "vld1", Dt, "$Vd, $Rn, $Rm",
810 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
811 let Inst{5-4} = Rn{5-4};
812 let DecoderMethod = "DecodeVLDST1Instruction";
816 def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
817 def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
818 def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
819 def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
821 defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
822 defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
823 defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
824 defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
826 def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
827 def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
828 def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
829 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
830 def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
831 def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
833 def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
834 def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
835 def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
836 def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
837 def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
838 def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
839 def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
840 def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
842 // VLD2 : Vector Load (multiple 2-element structures)
843 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
844 InstrItinClass itin, Operand AddrMode>
845 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
846 (ins AddrMode:$Rn), itin,
847 "vld2", Dt, "$Vd, $Rn", "", []> {
849 let Inst{5-4} = Rn{5-4};
850 let DecoderMethod = "DecodeVLDST2Instruction";
853 def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
854 addrmode6align64or128>, Sched<[WriteVLD2]>;
855 def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
856 addrmode6align64or128>, Sched<[WriteVLD2]>;
857 def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
858 addrmode6align64or128>, Sched<[WriteVLD2]>;
860 def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
861 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
862 def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
863 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
864 def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
865 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
867 def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
868 def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
869 def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
871 // ...with address register writeback:
872 multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
873 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
874 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
875 (ins AddrMode:$Rn), itin,
876 "vld2", Dt, "$Vd, $Rn!",
877 "$Rn.addr = $wb", []> {
878 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
879 let Inst{5-4} = Rn{5-4};
880 let DecoderMethod = "DecodeVLDST2Instruction";
882 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
883 (ins AddrMode:$Rn, rGPR:$Rm), itin,
884 "vld2", Dt, "$Vd, $Rn, $Rm",
885 "$Rn.addr = $wb", []> {
886 let Inst{5-4} = Rn{5-4};
887 let DecoderMethod = "DecodeVLDST2Instruction";
891 defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
892 addrmode6align64or128>, Sched<[WriteVLD2]>;
893 defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
894 addrmode6align64or128>, Sched<[WriteVLD2]>;
895 defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
896 addrmode6align64or128>, Sched<[WriteVLD2]>;
898 defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
899 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
900 defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
901 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
902 defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
903 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
905 def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
906 def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
907 def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
908 def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
909 def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
910 def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
912 // ...with double-spaced registers
913 def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
914 addrmode6align64or128>, Sched<[WriteVLD2]>;
915 def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
916 addrmode6align64or128>, Sched<[WriteVLD2]>;
917 def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
918 addrmode6align64or128>, Sched<[WriteVLD2]>;
919 defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
920 addrmode6align64or128>, Sched<[WriteVLD2]>;
921 defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
922 addrmode6align64or128>, Sched<[WriteVLD2]>;
923 defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
924 addrmode6align64or128>, Sched<[WriteVLD2]>;
926 // VLD3 : Vector Load (multiple 3-element structures)
927 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
928 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
929 (ins addrmode6:$Rn), IIC_VLD3,
930 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
933 let DecoderMethod = "DecodeVLDST3Instruction";
936 def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
937 def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
938 def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
940 def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
941 def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
942 def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
944 // ...with address register writeback:
945 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
946 : NLdSt<0, 0b10, op11_8, op7_4,
947 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
948 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
949 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
950 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
952 let DecoderMethod = "DecodeVLDST3Instruction";
955 def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
956 def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
957 def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
959 def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
960 def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
961 def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
963 // ...with double-spaced registers:
964 def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
965 def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
966 def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
967 def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
968 def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
969 def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
971 def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
972 def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
973 def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
975 // ...alternate versions to be allocated odd register numbers:
976 def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
977 def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
978 def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
980 def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
981 def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
982 def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
984 // VLD4 : Vector Load (multiple 4-element structures)
985 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
986 : NLdSt<0, 0b10, op11_8, op7_4,
987 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
988 (ins addrmode6:$Rn), IIC_VLD4,
989 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
992 let Inst{5-4} = Rn{5-4};
993 let DecoderMethod = "DecodeVLDST4Instruction";
996 def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
997 def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
998 def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
1000 def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1001 def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1002 def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1004 // ...with address register writeback:
1005 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1006 : NLdSt<0, 0b10, op11_8, op7_4,
1007 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1008 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
1009 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
1010 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
1011 let Inst{5-4} = Rn{5-4};
1012 let DecoderMethod = "DecodeVLDST4Instruction";
1015 def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
1016 def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
1017 def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
1019 def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1020 def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1021 def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1023 // ...with double-spaced registers:
1024 def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
1025 def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
1026 def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
1027 def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
1028 def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
1029 def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
1031 def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1032 def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1033 def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1035 // ...alternate versions to be allocated odd register numbers:
1036 def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1037 def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1038 def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1040 def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1041 def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1042 def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1044 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1046 // Classes for VLD*LN pseudo-instructions with multi-register operands.
1047 // These are expanded to real instructions after register allocation.
1048 class VLDQLNPseudo<InstrItinClass itin>
1049 : PseudoNLdSt<(outs QPR:$dst),
1050 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
1051 itin, "$src = $dst">;
1052 class VLDQLNWBPseudo<InstrItinClass itin>
1053 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
1054 (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
1055 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1056 class VLDQQLNPseudo<InstrItinClass itin>
1057 : PseudoNLdSt<(outs QQPR:$dst),
1058 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
1059 itin, "$src = $dst">;
1060 class VLDQQLNWBPseudo<InstrItinClass itin>
1061 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
1062 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
1063 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1064 class VLDQQQQLNPseudo<InstrItinClass itin>
1065 : PseudoNLdSt<(outs QQQQPR:$dst),
1066 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1067 itin, "$src = $dst">;
1068 class VLDQQQQLNWBPseudo<InstrItinClass itin>
1069 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
1070 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1071 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1073 // VLD1LN : Vector Load (single element to one lane)
1074 class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1076 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1077 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
1078 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1080 [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1081 (i32 (LoadOp addrmode6:$Rn)),
1084 let DecoderMethod = "DecodeVLD1LN";
1086 class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1088 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1089 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
1090 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1092 [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1093 (i32 (LoadOp addrmode6oneL32:$Rn)),
1094 imm:$lane))]>, Sched<[WriteVLD1]> {
1096 let DecoderMethod = "DecodeVLD1LN";
1098 class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
1099 Sched<[WriteVLD1]> {
1100 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
1101 (i32 (LoadOp addrmode6:$addr)),
1105 def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
1106 let Inst{7-5} = lane{2-0};
1108 def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
1109 let Inst{7-6} = lane{1-0};
1110 let Inst{5-4} = Rn{5-4};
1112 def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
1113 let Inst{7} = lane{0};
1114 let Inst{5-4} = Rn{5-4};
1117 def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
1118 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
1119 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
1121 def : Pat<(vector_insert (v2f32 DPR:$src),
1122 (f32 (load addrmode6:$addr)), imm:$lane),
1123 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1124 def : Pat<(vector_insert (v4f32 QPR:$src),
1125 (f32 (load addrmode6:$addr)), imm:$lane),
1126 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1128 // A 64-bit subvector insert to the first 128-bit vector position
1129 // is a subregister copy that needs no instruction.
1130 def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)),
1131 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1132 def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)),
1133 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1134 def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)),
1135 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1136 def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)),
1137 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1138 def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)),
1139 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1140 def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)),
1141 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1144 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1146 // ...with address register writeback:
1147 class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1148 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
1149 (ins addrmode6:$Rn, am6offset:$Rm,
1150 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
1151 "\\{$Vd[$lane]\\}, $Rn$Rm",
1152 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1153 let DecoderMethod = "DecodeVLD1LN";
1156 def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
1157 let Inst{7-5} = lane{2-0};
1159 def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
1160 let Inst{7-6} = lane{1-0};
1161 let Inst{4} = Rn{4};
1163 def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
1164 let Inst{7} = lane{0};
1165 let Inst{5} = Rn{4};
1166 let Inst{4} = Rn{4};
1169 def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1170 def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1171 def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1173 // VLD2LN : Vector Load (single 2-element structure to one lane)
1174 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1175 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
1176 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
1177 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
1178 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
1180 let Inst{4} = Rn{4};
1181 let DecoderMethod = "DecodeVLD2LN";
1184 def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
1185 let Inst{7-5} = lane{2-0};
1187 def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
1188 let Inst{7-6} = lane{1-0};
1190 def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
1191 let Inst{7} = lane{0};
1194 def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1195 def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1196 def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1198 // ...with double-spaced registers:
1199 def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
1200 let Inst{7-6} = lane{1-0};
1202 def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
1203 let Inst{7} = lane{0};
1206 def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1207 def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1209 // ...with address register writeback:
1210 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1211 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1212 (ins addrmode6:$Rn, am6offset:$Rm,
1213 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
1214 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
1215 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
1216 let Inst{4} = Rn{4};
1217 let DecoderMethod = "DecodeVLD2LN";
1220 def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
1221 let Inst{7-5} = lane{2-0};
1223 def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
1224 let Inst{7-6} = lane{1-0};
1226 def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
1227 let Inst{7} = lane{0};
1230 def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1231 def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1232 def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1234 def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
1235 let Inst{7-6} = lane{1-0};
1237 def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
1238 let Inst{7} = lane{0};
1241 def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1242 def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1244 // VLD3LN : Vector Load (single 3-element structure to one lane)
1245 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1246 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1247 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
1248 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
1249 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
1250 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
1252 let DecoderMethod = "DecodeVLD3LN";
1255 def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
1256 let Inst{7-5} = lane{2-0};
1258 def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
1259 let Inst{7-6} = lane{1-0};
1261 def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
1262 let Inst{7} = lane{0};
1265 def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1266 def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1267 def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1269 // ...with double-spaced registers:
1270 def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
1271 let Inst{7-6} = lane{1-0};
1273 def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
1274 let Inst{7} = lane{0};
1277 def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1278 def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1280 // ...with address register writeback:
1281 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1282 : NLdStLn<1, 0b10, op11_8, op7_4,
1283 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1284 (ins addrmode6:$Rn, am6offset:$Rm,
1285 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1286 IIC_VLD3lnu, "vld3", Dt,
1287 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
1288 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
1289 []>, Sched<[WriteVLD2]> {
1290 let DecoderMethod = "DecodeVLD3LN";
1293 def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
1294 let Inst{7-5} = lane{2-0};
1296 def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
1297 let Inst{7-6} = lane{1-0};
1299 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
1300 let Inst{7} = lane{0};
1303 def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1304 def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1305 def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1307 def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
1308 let Inst{7-6} = lane{1-0};
1310 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
1311 let Inst{7} = lane{0};
1314 def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1315 def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1317 // VLD4LN : Vector Load (single 4-element structure to one lane)
1318 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1319 : NLdStLn<1, 0b10, op11_8, op7_4,
1320 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1321 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
1322 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
1323 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
1324 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
1325 Sched<[WriteVLD2]> {
1327 let Inst{4} = Rn{4};
1328 let DecoderMethod = "DecodeVLD4LN";
1331 def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
1332 let Inst{7-5} = lane{2-0};
1334 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
1335 let Inst{7-6} = lane{1-0};
1337 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
1338 let Inst{7} = lane{0};
1339 let Inst{5} = Rn{5};
1342 def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1343 def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1344 def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1346 // ...with double-spaced registers:
1347 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
1348 let Inst{7-6} = lane{1-0};
1350 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
1351 let Inst{7} = lane{0};
1352 let Inst{5} = Rn{5};
1355 def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1356 def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1358 // ...with address register writeback:
1359 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1360 : NLdStLn<1, 0b10, op11_8, op7_4,
1361 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1362 (ins addrmode6:$Rn, am6offset:$Rm,
1363 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1364 IIC_VLD4lnu, "vld4", Dt,
1365 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
1366 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
1368 let Inst{4} = Rn{4};
1369 let DecoderMethod = "DecodeVLD4LN" ;
1372 def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
1373 let Inst{7-5} = lane{2-0};
1375 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
1376 let Inst{7-6} = lane{1-0};
1378 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
1379 let Inst{7} = lane{0};
1380 let Inst{5} = Rn{5};
1383 def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1384 def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1385 def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1387 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
1388 let Inst{7-6} = lane{1-0};
1390 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
1391 let Inst{7} = lane{0};
1392 let Inst{5} = Rn{5};
1395 def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1396 def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1398 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1400 // VLD1DUP : Vector Load (single element to all lanes)
1401 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1403 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
1405 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
1406 [(set VecListOneDAllLanes:$Vd,
1407 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
1408 Sched<[WriteVLD2]> {
1410 let Inst{4} = Rn{4};
1411 let DecoderMethod = "DecodeVLD1DupInstruction";
1413 def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
1414 addrmode6dupalignNone>;
1415 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
1416 addrmode6dupalign16>;
1417 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
1418 addrmode6dupalign32>;
1420 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1421 (VLD1DUPd32 addrmode6:$addr)>;
1423 class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1425 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
1426 (ins AddrMode:$Rn), IIC_VLD1dup,
1427 "vld1", Dt, "$Vd, $Rn", "",
1428 [(set VecListDPairAllLanes:$Vd,
1429 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
1431 let Inst{4} = Rn{4};
1432 let DecoderMethod = "DecodeVLD1DupInstruction";
1435 def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
1436 addrmode6dupalignNone>;
1437 def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
1438 addrmode6dupalign16>;
1439 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
1440 addrmode6dupalign32>;
1442 def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1443 (VLD1DUPq32 addrmode6:$addr)>;
1445 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1446 // ...with address register writeback:
1447 multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1448 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1449 (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1450 (ins AddrMode:$Rn), IIC_VLD1dupu,
1451 "vld1", Dt, "$Vd, $Rn!",
1452 "$Rn.addr = $wb", []> {
1453 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1454 let Inst{4} = Rn{4};
1455 let DecoderMethod = "DecodeVLD1DupInstruction";
1457 def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1458 (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1459 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1460 "vld1", Dt, "$Vd, $Rn, $Rm",
1461 "$Rn.addr = $wb", []> {
1462 let Inst{4} = Rn{4};
1463 let DecoderMethod = "DecodeVLD1DupInstruction";
1466 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1467 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1468 (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1469 (ins AddrMode:$Rn), IIC_VLD1dupu,
1470 "vld1", Dt, "$Vd, $Rn!",
1471 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1472 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1473 let Inst{4} = Rn{4};
1474 let DecoderMethod = "DecodeVLD1DupInstruction";
1476 def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1477 (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1478 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1479 "vld1", Dt, "$Vd, $Rn, $Rm",
1480 "$Rn.addr = $wb", []> {
1481 let Inst{4} = Rn{4};
1482 let DecoderMethod = "DecodeVLD1DupInstruction";
1486 defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
1487 defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
1488 defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
1490 defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
1491 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
1492 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
1494 // VLD2DUP : Vector Load (single 2-element structure to all lanes)
1495 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
1496 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
1497 (ins AddrMode:$Rn), IIC_VLD2dup,
1498 "vld2", Dt, "$Vd, $Rn", "", []> {
1500 let Inst{4} = Rn{4};
1501 let DecoderMethod = "DecodeVLD2DupInstruction";
1504 def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes,
1505 addrmode6dupalign16>;
1506 def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
1507 addrmode6dupalign32>;
1508 def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
1509 addrmode6dupalign64>;
1511 // HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
1512 // "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
1513 // ...with double-spaced registers
1514 def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes,
1515 addrmode6dupalign16>;
1516 def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1517 addrmode6dupalign32>;
1518 def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1519 addrmode6dupalign64>;
1521 def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1522 def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1523 def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1524 def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1525 def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1526 def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1528 // ...with address register writeback:
1529 multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
1531 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
1532 (outs VdTy:$Vd, GPR:$wb),
1533 (ins AddrMode:$Rn), IIC_VLD2dupu,
1534 "vld2", Dt, "$Vd, $Rn!",
1535 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1536 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1537 let Inst{4} = Rn{4};
1538 let DecoderMethod = "DecodeVLD2DupInstruction";
1540 def _register : NLdSt<1, 0b10, 0b1101, op7_4,
1541 (outs VdTy:$Vd, GPR:$wb),
1542 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
1543 "vld2", Dt, "$Vd, $Rn, $Rm",
1544 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1545 let Inst{4} = Rn{4};
1546 let DecoderMethod = "DecodeVLD2DupInstruction";
1550 defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes,
1551 addrmode6dupalign16>;
1552 defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
1553 addrmode6dupalign32>;
1554 defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
1555 addrmode6dupalign64>;
1557 defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes,
1558 addrmode6dupalign16>;
1559 defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1560 addrmode6dupalign32>;
1561 defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1562 addrmode6dupalign64>;
1564 // VLD3DUP : Vector Load (single 3-element structure to all lanes)
1565 class VLD3DUP<bits<4> op7_4, string Dt>
1566 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1567 (ins addrmode6dup:$Rn), IIC_VLD3dup,
1568 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
1569 Sched<[WriteVLD2]> {
1572 let DecoderMethod = "DecodeVLD3DupInstruction";
1575 def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
1576 def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1577 def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1579 def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1580 def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1581 def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1583 // ...with double-spaced registers (not used for codegen):
1584 def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
1585 def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
1586 def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
1588 def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1589 def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1590 def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1591 def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1592 def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1593 def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1595 // ...with address register writeback:
1596 class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
1597 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1598 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1599 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1600 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1602 let DecoderMethod = "DecodeVLD3DupInstruction";
1605 def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>;
1606 def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
1607 def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
1609 def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>;
1610 def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
1611 def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
1613 def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1614 def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1615 def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1617 // VLD4DUP : Vector Load (single 4-element structure to all lanes)
1618 class VLD4DUP<bits<4> op7_4, string Dt>
1619 : NLdSt<1, 0b10, 0b1111, op7_4,
1620 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1621 (ins addrmode6dup:$Rn), IIC_VLD4dup,
1622 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1624 let Inst{4} = Rn{4};
1625 let DecoderMethod = "DecodeVLD4DupInstruction";
1628 def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
1629 def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1630 def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1632 def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1633 def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1634 def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1636 // ...with double-spaced registers (not used for codegen):
1637 def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
1638 def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
1639 def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1641 def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1642 def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1643 def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1644 def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1645 def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1646 def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1648 // ...with address register writeback:
1649 class VLD4DUPWB<bits<4> op7_4, string Dt>
1650 : NLdSt<1, 0b10, 0b1111, op7_4,
1651 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1652 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1653 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1654 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1655 let Inst{4} = Rn{4};
1656 let DecoderMethod = "DecodeVLD4DupInstruction";
1659 def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
1660 def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1661 def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1663 def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
1664 def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1665 def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1667 def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1668 def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1669 def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1671 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1673 let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
1675 // Classes for VST* pseudo-instructions with multi-register operands.
1676 // These are expanded to real instructions after register allocation.
1677 class VSTQPseudo<InstrItinClass itin>
1678 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1679 class VSTQWBPseudo<InstrItinClass itin>
1680 : PseudoNLdSt<(outs GPR:$wb),
1681 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1682 "$addr.addr = $wb">;
1683 class VSTQWBfixedPseudo<InstrItinClass itin>
1684 : PseudoNLdSt<(outs GPR:$wb),
1685 (ins addrmode6:$addr, QPR:$src), itin,
1686 "$addr.addr = $wb">;
1687 class VSTQWBregisterPseudo<InstrItinClass itin>
1688 : PseudoNLdSt<(outs GPR:$wb),
1689 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
1690 "$addr.addr = $wb">;
1691 class VSTQQPseudo<InstrItinClass itin>
1692 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1693 class VSTQQWBPseudo<InstrItinClass itin>
1694 : PseudoNLdSt<(outs GPR:$wb),
1695 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1696 "$addr.addr = $wb">;
1697 class VSTQQWBfixedPseudo<InstrItinClass itin>
1698 : PseudoNLdSt<(outs GPR:$wb),
1699 (ins addrmode6:$addr, QQPR:$src), itin,
1700 "$addr.addr = $wb">;
1701 class VSTQQWBregisterPseudo<InstrItinClass itin>
1702 : PseudoNLdSt<(outs GPR:$wb),
1703 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
1704 "$addr.addr = $wb">;
1706 class VSTQQQQPseudo<InstrItinClass itin>
1707 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1708 class VSTQQQQWBPseudo<InstrItinClass itin>
1709 : PseudoNLdSt<(outs GPR:$wb),
1710 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1711 "$addr.addr = $wb">;
1713 // VST1 : Vector Store (multiple single elements)
1714 class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
1715 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
1716 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
1718 let Inst{4} = Rn{4};
1719 let DecoderMethod = "DecodeVLDST1Instruction";
1721 class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
1722 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
1723 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
1725 let Inst{5-4} = Rn{5-4};
1726 let DecoderMethod = "DecodeVLDST1Instruction";
1729 def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>;
1730 def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>;
1731 def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>;
1732 def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>;
1734 def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>;
1735 def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
1736 def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
1737 def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
1739 // ...with address register writeback:
1740 multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1741 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
1742 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
1743 "vst1", Dt, "$Vd, $Rn!",
1744 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1745 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1746 let Inst{4} = Rn{4};
1747 let DecoderMethod = "DecodeVLDST1Instruction";
1749 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
1750 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
1752 "vst1", Dt, "$Vd, $Rn, $Rm",
1753 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1754 let Inst{4} = Rn{4};
1755 let DecoderMethod = "DecodeVLDST1Instruction";
1758 multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1759 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1760 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
1761 "vst1", Dt, "$Vd, $Rn!",
1762 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1763 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1764 let Inst{5-4} = Rn{5-4};
1765 let DecoderMethod = "DecodeVLDST1Instruction";
1767 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1768 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
1770 "vst1", Dt, "$Vd, $Rn, $Rm",
1771 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1772 let Inst{5-4} = Rn{5-4};
1773 let DecoderMethod = "DecodeVLDST1Instruction";
1777 defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>;
1778 defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
1779 defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
1780 defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
1782 defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
1783 defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
1784 defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
1785 defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
1787 // ...with 3 registers
1788 class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
1789 : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1790 (ins AddrMode:$Rn, VecListThreeD:$Vd),
1791 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
1793 let Inst{4} = Rn{4};
1794 let DecoderMethod = "DecodeVLDST1Instruction";
1796 multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1797 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1798 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
1799 "vst1", Dt, "$Vd, $Rn!",
1800 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1801 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1802 let Inst{5-4} = Rn{5-4};
1803 let DecoderMethod = "DecodeVLDST1Instruction";
1805 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1806 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
1808 "vst1", Dt, "$Vd, $Rn, $Rm",
1809 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1810 let Inst{5-4} = Rn{5-4};
1811 let DecoderMethod = "DecodeVLDST1Instruction";
1815 def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>;
1816 def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
1817 def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
1818 def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
1820 defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>;
1821 defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
1822 defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
1823 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
1825 def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1826 def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1827 def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1828 def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1829 def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1830 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1832 def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1833 def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1834 def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1835 def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1836 def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1837 def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1838 def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1839 def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1841 // ...with 4 registers
1842 class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
1843 : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1844 (ins AddrMode:$Rn, VecListFourD:$Vd),
1845 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
1846 []>, Sched<[WriteVST4]> {
1848 let Inst{5-4} = Rn{5-4};
1849 let DecoderMethod = "DecodeVLDST1Instruction";
1851 multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1852 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1853 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
1854 "vst1", Dt, "$Vd, $Rn!",
1855 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1856 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1857 let Inst{5-4} = Rn{5-4};
1858 let DecoderMethod = "DecodeVLDST1Instruction";
1860 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1861 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1863 "vst1", Dt, "$Vd, $Rn, $Rm",
1864 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1865 let Inst{5-4} = Rn{5-4};
1866 let DecoderMethod = "DecodeVLDST1Instruction";
1870 def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
1871 def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
1872 def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
1873 def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
1875 defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1876 defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1877 defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1878 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
1880 def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1881 def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1882 def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1883 def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1884 def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1885 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1887 def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1888 def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1889 def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1890 def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1891 def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1892 def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1893 def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1894 def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1896 // VST2 : Vector Store (multiple 2-element structures)
1897 class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
1898 InstrItinClass itin, Operand AddrMode>
1899 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
1900 itin, "vst2", Dt, "$Vd, $Rn", "", []> {
1902 let Inst{5-4} = Rn{5-4};
1903 let DecoderMethod = "DecodeVLDST2Instruction";
1906 def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
1907 addrmode6align64or128>, Sched<[WriteVST2]>;
1908 def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
1909 addrmode6align64or128>, Sched<[WriteVST2]>;
1910 def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
1911 addrmode6align64or128>, Sched<[WriteVST2]>;
1913 def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
1914 addrmode6align64or128or256>, Sched<[WriteVST4]>;
1915 def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
1916 addrmode6align64or128or256>, Sched<[WriteVST4]>;
1917 def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
1918 addrmode6align64or128or256>, Sched<[WriteVST4]>;
1920 def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1921 def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1922 def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1924 // ...with address register writeback:
1925 multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
1926 RegisterOperand VdTy, Operand AddrMode> {
1927 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1928 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
1929 "vst2", Dt, "$Vd, $Rn!",
1930 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1931 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1932 let Inst{5-4} = Rn{5-4};
1933 let DecoderMethod = "DecodeVLDST2Instruction";
1935 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1936 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
1937 "vst2", Dt, "$Vd, $Rn, $Rm",
1938 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1939 let Inst{5-4} = Rn{5-4};
1940 let DecoderMethod = "DecodeVLDST2Instruction";
1943 multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1944 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1945 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
1946 "vst2", Dt, "$Vd, $Rn!",
1947 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1948 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1949 let Inst{5-4} = Rn{5-4};
1950 let DecoderMethod = "DecodeVLDST2Instruction";
1952 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1953 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1955 "vst2", Dt, "$Vd, $Rn, $Rm",
1956 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1957 let Inst{5-4} = Rn{5-4};
1958 let DecoderMethod = "DecodeVLDST2Instruction";
1962 defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair,
1963 addrmode6align64or128>;
1964 defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
1965 addrmode6align64or128>;
1966 defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
1967 addrmode6align64or128>;
1969 defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1970 defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1971 defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1973 def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1974 def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1975 def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1976 def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1977 def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1978 def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1980 // ...with double-spaced registers
1981 def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
1982 addrmode6align64or128>;
1983 def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
1984 addrmode6align64or128>;
1985 def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
1986 addrmode6align64or128>;
1987 defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced,
1988 addrmode6align64or128>;
1989 defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
1990 addrmode6align64or128>;
1991 defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
1992 addrmode6align64or128>;
1994 // VST3 : Vector Store (multiple 3-element structures)
1995 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1996 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1997 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1998 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
2000 let Inst{4} = Rn{4};
2001 let DecoderMethod = "DecodeVLDST3Instruction";
2004 def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
2005 def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
2006 def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
2008 def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2009 def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2010 def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2012 // ...with address register writeback:
2013 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2014 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
2015 (ins addrmode6:$Rn, am6offset:$Rm,
2016 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
2017 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
2018 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
2019 let Inst{4} = Rn{4};
2020 let DecoderMethod = "DecodeVLDST3Instruction";
2023 def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
2024 def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
2025 def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
2027 def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2028 def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2029 def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2031 // ...with double-spaced registers:
2032 def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
2033 def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
2034 def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
2035 def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
2036 def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
2037 def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
2039 def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2040 def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2041 def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2043 // ...alternate versions to be allocated odd register numbers:
2044 def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2045 def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2046 def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2048 def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2049 def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2050 def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2052 // VST4 : Vector Store (multiple 4-element structures)
2053 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
2054 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
2055 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
2056 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
2057 "", []>, Sched<[WriteVST4]> {
2059 let Inst{5-4} = Rn{5-4};
2060 let DecoderMethod = "DecodeVLDST4Instruction";
2063 def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
2064 def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
2065 def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
2067 def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2068 def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2069 def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2071 // ...with address register writeback:
2072 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2073 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
2074 (ins addrmode6:$Rn, am6offset:$Rm,
2075 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
2076 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
2077 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
2078 let Inst{5-4} = Rn{5-4};
2079 let DecoderMethod = "DecodeVLDST4Instruction";
2082 def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
2083 def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
2084 def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
2086 def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2087 def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2088 def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2090 // ...with double-spaced registers:
2091 def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
2092 def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
2093 def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
2094 def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
2095 def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
2096 def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
2098 def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2099 def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2100 def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2102 // ...alternate versions to be allocated odd register numbers:
2103 def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2104 def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2105 def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2107 def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2108 def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2109 def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2111 } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2113 // Classes for VST*LN pseudo-instructions with multi-register operands.
2114 // These are expanded to real instructions after register allocation.
2115 class VSTQLNPseudo<InstrItinClass itin>
2116 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
2118 class VSTQLNWBPseudo<InstrItinClass itin>
2119 : PseudoNLdSt<(outs GPR:$wb),
2120 (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
2121 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2122 class VSTQQLNPseudo<InstrItinClass itin>
2123 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
2125 class VSTQQLNWBPseudo<InstrItinClass itin>
2126 : PseudoNLdSt<(outs GPR:$wb),
2127 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
2128 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2129 class VSTQQQQLNPseudo<InstrItinClass itin>
2130 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
2132 class VSTQQQQLNWBPseudo<InstrItinClass itin>
2133 : PseudoNLdSt<(outs GPR:$wb),
2134 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
2135 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2137 // VST1LN : Vector Store (single element from one lane)
2138 class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2139 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
2140 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2141 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
2142 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
2143 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
2144 Sched<[WriteVST1]> {
2146 let DecoderMethod = "DecodeVST1LN";
2148 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2149 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
2150 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2154 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
2155 NEONvgetlaneu, addrmode6> {
2156 let Inst{7-5} = lane{2-0};
2158 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
2159 NEONvgetlaneu, addrmode6> {
2160 let Inst{7-6} = lane{1-0};
2161 let Inst{4} = Rn{4};
2164 def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
2166 let Inst{7} = lane{0};
2167 let Inst{5-4} = Rn{5-4};
2170 def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
2171 def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
2172 def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
2174 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
2175 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
2176 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
2177 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2179 // ...with address register writeback:
2180 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2181 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
2182 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2183 (ins AdrMode:$Rn, am6offset:$Rm,
2184 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
2185 "\\{$Vd[$lane]\\}, $Rn$Rm",
2187 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
2188 AdrMode:$Rn, am6offset:$Rm))]>,
2189 Sched<[WriteVST1]> {
2190 let DecoderMethod = "DecodeVST1LN";
2192 class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2193 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
2194 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2195 addrmode6:$addr, am6offset:$offset))];
2198 def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
2199 NEONvgetlaneu, addrmode6> {
2200 let Inst{7-5} = lane{2-0};
2202 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
2203 NEONvgetlaneu, addrmode6> {
2204 let Inst{7-6} = lane{1-0};
2205 let Inst{4} = Rn{4};
2207 def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
2208 extractelt, addrmode6oneL32> {
2209 let Inst{7} = lane{0};
2210 let Inst{5-4} = Rn{5-4};
2213 def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
2214 def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
2215 def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
2217 let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
2219 // VST2LN : Vector Store (single 2-element structure from one lane)
2220 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2221 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2222 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
2223 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
2224 "", []>, Sched<[WriteVST1]> {
2226 let Inst{4} = Rn{4};
2227 let DecoderMethod = "DecodeVST2LN";
2230 def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
2231 let Inst{7-5} = lane{2-0};
2233 def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
2234 let Inst{7-6} = lane{1-0};
2236 def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
2237 let Inst{7} = lane{0};
2240 def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2241 def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2242 def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2244 // ...with double-spaced registers:
2245 def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
2246 let Inst{7-6} = lane{1-0};
2247 let Inst{4} = Rn{4};
2249 def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
2250 let Inst{7} = lane{0};
2251 let Inst{4} = Rn{4};
2254 def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2255 def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2257 // ...with address register writeback:
2258 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2259 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2260 (ins addrmode6:$Rn, am6offset:$Rm,
2261 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
2262 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
2263 "$Rn.addr = $wb", []> {
2264 let Inst{4} = Rn{4};
2265 let DecoderMethod = "DecodeVST2LN";
2268 def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
2269 let Inst{7-5} = lane{2-0};
2271 def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
2272 let Inst{7-6} = lane{1-0};
2274 def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
2275 let Inst{7} = lane{0};
2278 def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2279 def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2280 def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2282 def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
2283 let Inst{7-6} = lane{1-0};
2285 def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
2286 let Inst{7} = lane{0};
2289 def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2290 def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2292 // VST3LN : Vector Store (single 3-element structure from one lane)
2293 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2294 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2295 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
2296 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
2297 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
2298 Sched<[WriteVST2]> {
2300 let DecoderMethod = "DecodeVST3LN";
2303 def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
2304 let Inst{7-5} = lane{2-0};
2306 def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
2307 let Inst{7-6} = lane{1-0};
2309 def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
2310 let Inst{7} = lane{0};
2313 def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2314 def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2315 def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2317 // ...with double-spaced registers:
2318 def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
2319 let Inst{7-6} = lane{1-0};
2321 def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
2322 let Inst{7} = lane{0};
2325 def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2326 def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2328 // ...with address register writeback:
2329 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2330 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2331 (ins addrmode6:$Rn, am6offset:$Rm,
2332 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
2333 IIC_VST3lnu, "vst3", Dt,
2334 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
2335 "$Rn.addr = $wb", []> {
2336 let DecoderMethod = "DecodeVST3LN";
2339 def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
2340 let Inst{7-5} = lane{2-0};
2342 def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
2343 let Inst{7-6} = lane{1-0};
2345 def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
2346 let Inst{7} = lane{0};
2349 def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2350 def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2351 def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2353 def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
2354 let Inst{7-6} = lane{1-0};
2356 def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
2357 let Inst{7} = lane{0};
2360 def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2361 def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2363 // VST4LN : Vector Store (single 4-element structure from one lane)
2364 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2365 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2366 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
2367 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
2368 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
2369 "", []>, Sched<[WriteVST2]> {
2371 let Inst{4} = Rn{4};
2372 let DecoderMethod = "DecodeVST4LN";
2375 def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
2376 let Inst{7-5} = lane{2-0};
2378 def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
2379 let Inst{7-6} = lane{1-0};
2381 def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
2382 let Inst{7} = lane{0};
2383 let Inst{5} = Rn{5};
2386 def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2387 def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2388 def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2390 // ...with double-spaced registers:
2391 def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
2392 let Inst{7-6} = lane{1-0};
2394 def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
2395 let Inst{7} = lane{0};
2396 let Inst{5} = Rn{5};
2399 def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2400 def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2402 // ...with address register writeback:
2403 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2404 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2405 (ins addrmode6:$Rn, am6offset:$Rm,
2406 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
2407 IIC_VST4lnu, "vst4", Dt,
2408 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
2409 "$Rn.addr = $wb", []> {
2410 let Inst{4} = Rn{4};
2411 let DecoderMethod = "DecodeVST4LN";
2414 def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
2415 let Inst{7-5} = lane{2-0};
2417 def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
2418 let Inst{7-6} = lane{1-0};
2420 def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
2421 let Inst{7} = lane{0};
2422 let Inst{5} = Rn{5};
2425 def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2426 def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2427 def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2429 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
2430 let Inst{7-6} = lane{1-0};
2432 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
2433 let Inst{7} = lane{0};
2434 let Inst{5} = Rn{5};
2437 def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2438 def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2440 } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2442 // Use vld1/vst1 for unaligned f64 load / store
2443 def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
2444 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
2445 def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
2446 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
2447 def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
2448 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
2449 def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
2450 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
2451 def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
2452 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
2453 def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
2454 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
2456 // Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
2457 // load / store if it's legal.
2458 def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
2459 (VLD1q64 addrmode6:$addr)>;
2460 def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2461 (VST1q64 addrmode6:$addr, QPR:$value)>;
2462 def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
2463 (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>;
2464 def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2465 (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
2466 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
2467 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
2468 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2469 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
2470 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
2471 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>;
2472 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2473 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
2475 //===----------------------------------------------------------------------===//
2476 // NEON pattern fragments
2477 //===----------------------------------------------------------------------===//
2479 // Extract D sub-registers of Q registers.
2480 def DSubReg_i8_reg : SDNodeXForm<imm, [{
2481 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2482 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N),
2485 def DSubReg_i16_reg : SDNodeXForm<imm, [{
2486 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2487 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N),
2490 def DSubReg_i32_reg : SDNodeXForm<imm, [{
2491 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2492 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N),
2495 def DSubReg_f64_reg : SDNodeXForm<imm, [{
2496 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2497 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N),
2501 // Extract S sub-registers of Q/D registers.
2502 def SSubReg_f32_reg : SDNodeXForm<imm, [{
2503 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
2504 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N),
2508 // Translate lane numbers from Q registers to D subregs.
2509 def SubReg_i8_lane : SDNodeXForm<imm, [{
2510 return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
2512 def SubReg_i16_lane : SDNodeXForm<imm, [{
2513 return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32);
2515 def SubReg_i32_lane : SDNodeXForm<imm, [{
2516 return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32);
2519 //===----------------------------------------------------------------------===//
2520 // Instruction Classes
2521 //===----------------------------------------------------------------------===//
2523 // Basic 2-register operations: double- and quad-register.
2524 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2525 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2526 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2527 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2528 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
2529 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
2530 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2531 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2532 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2533 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2534 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
2535 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
2537 // Basic 2-register intrinsics, both double- and quad-register.
2538 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2539 bits<2> op17_16, bits<5> op11_7, bit op4,
2540 InstrItinClass itin, string OpcodeStr, string Dt,
2541 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2542 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2543 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2544 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2545 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2546 bits<2> op17_16, bits<5> op11_7, bit op4,
2547 InstrItinClass itin, string OpcodeStr, string Dt,
2548 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2549 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2550 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2551 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2553 // Same as above, but not predicated.
2554 class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2555 InstrItinClass itin, string OpcodeStr, string Dt,
2556 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2557 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
2558 itin, OpcodeStr, Dt,
2559 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2561 class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2562 InstrItinClass itin, string OpcodeStr, string Dt,
2563 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2564 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
2565 itin, OpcodeStr, Dt,
2566 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2568 // Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
2569 class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2570 bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2571 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2572 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
2573 itin, OpcodeStr, Dt,
2574 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2576 // Same as N2VQIntXnp but with Vd as a src register.
2577 class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2578 bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2579 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2580 : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
2581 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
2582 itin, OpcodeStr, Dt,
2583 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
2584 let Constraints = "$src = $Vd";
2587 // Narrow 2-register operations.
2588 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2589 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2590 InstrItinClass itin, string OpcodeStr, string Dt,
2591 ValueType TyD, ValueType TyQ, SDNode OpNode>
2592 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2593 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2594 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
2596 // Narrow 2-register intrinsics.
2597 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2598 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2599 InstrItinClass itin, string OpcodeStr, string Dt,
2600 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
2601 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2602 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2603 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
2605 // Long 2-register operations (currently only used for VMOVL).
2606 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2607 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2608 InstrItinClass itin, string OpcodeStr, string Dt,
2609 ValueType TyQ, ValueType TyD, SDNode OpNode>
2610 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2611 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2612 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
2614 // Long 2-register intrinsics.
2615 class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2616 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2617 InstrItinClass itin, string OpcodeStr, string Dt,
2618 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2619 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2620 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2621 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
2623 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
2624 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
2625 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
2626 (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
2627 OpcodeStr, Dt, "$Vd, $Vm",
2628 "$src1 = $Vd, $src2 = $Vm", []>;
2629 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
2630 InstrItinClass itin, string OpcodeStr, string Dt>
2631 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
2632 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
2633 "$src1 = $Vd, $src2 = $Vm", []>;
2635 // Basic 3-register operations: double- and quad-register.
2636 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2637 InstrItinClass itin, string OpcodeStr, string Dt,
2638 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2639 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2640 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2641 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2642 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2643 // All of these have a two-operand InstAlias.
2644 let TwoOperandAliasConstraint = "$Vn = $Vd";
2645 let isCommutable = Commutable;
2647 // Same as N3VD but no data type.
2648 class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2649 InstrItinClass itin, string OpcodeStr,
2650 ValueType ResTy, ValueType OpTy,
2651 SDNode OpNode, bit Commutable>
2652 : N3VX<op24, op23, op21_20, op11_8, 0, op4,
2653 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2654 OpcodeStr, "$Vd, $Vn, $Vm", "",
2655 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
2656 // All of these have a two-operand InstAlias.
2657 let TwoOperandAliasConstraint = "$Vn = $Vd";
2658 let isCommutable = Commutable;
2661 class N3VDSL<bits<2> op21_20, bits<4> op11_8,
2662 InstrItinClass itin, string OpcodeStr, string Dt,
2663 ValueType Ty, SDNode ShOp>
2664 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2665 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2666 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2668 (Ty (ShOp (Ty DPR:$Vn),
2669 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2670 // All of these have a two-operand InstAlias.
2671 let TwoOperandAliasConstraint = "$Vn = $Vd";
2672 let isCommutable = 0;
2674 class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
2675 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
2676 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2677 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2678 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2680 (Ty (ShOp (Ty DPR:$Vn),
2681 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2682 // All of these have a two-operand InstAlias.
2683 let TwoOperandAliasConstraint = "$Vn = $Vd";
2684 let isCommutable = 0;
2687 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2688 InstrItinClass itin, string OpcodeStr, string Dt,
2689 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2690 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2691 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2692 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2693 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2694 // All of these have a two-operand InstAlias.
2695 let TwoOperandAliasConstraint = "$Vn = $Vd";
2696 let isCommutable = Commutable;
2698 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2699 InstrItinClass itin, string OpcodeStr,
2700 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2701 : N3VX<op24, op23, op21_20, op11_8, 1, op4,
2702 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2703 OpcodeStr, "$Vd, $Vn, $Vm", "",
2704 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
2705 // All of these have a two-operand InstAlias.
2706 let TwoOperandAliasConstraint = "$Vn = $Vd";
2707 let isCommutable = Commutable;
2709 class N3VQSL<bits<2> op21_20, bits<4> op11_8,
2710 InstrItinClass itin, string OpcodeStr, string Dt,
2711 ValueType ResTy, ValueType OpTy, SDNode ShOp>
2712 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2713 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2714 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2715 [(set (ResTy QPR:$Vd),
2716 (ResTy (ShOp (ResTy QPR:$Vn),
2717 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2719 // All of these have a two-operand InstAlias.
2720 let TwoOperandAliasConstraint = "$Vn = $Vd";
2721 let isCommutable = 0;
2723 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
2724 ValueType ResTy, ValueType OpTy, SDNode ShOp>
2725 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2726 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2727 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2728 [(set (ResTy QPR:$Vd),
2729 (ResTy (ShOp (ResTy QPR:$Vn),
2730 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2732 // All of these have a two-operand InstAlias.
2733 let TwoOperandAliasConstraint = "$Vn = $Vd";
2734 let isCommutable = 0;
2737 // Basic 3-register intrinsics, both double- and quad-register.
2738 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2739 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2740 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2741 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2742 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
2743 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2744 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2745 // All of these have a two-operand InstAlias.
2746 let TwoOperandAliasConstraint = "$Vn = $Vd";
2747 let isCommutable = Commutable;
2750 class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2751 bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2752 string Dt, ValueType ResTy, ValueType OpTy,
2753 SDPatternOperator IntOp, bit Commutable>
2754 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2755 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
2756 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2758 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2759 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2760 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2761 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2762 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2764 (Ty (IntOp (Ty DPR:$Vn),
2765 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2767 let isCommutable = 0;
2770 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2771 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2772 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2773 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2774 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2776 (Ty (IntOp (Ty DPR:$Vn),
2777 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2778 let isCommutable = 0;
2780 class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2781 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2782 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2783 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2784 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2785 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2786 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2787 let TwoOperandAliasConstraint = "$Vm = $Vd";
2788 let isCommutable = 0;
2791 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2792 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2793 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2794 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2795 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2796 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2797 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2798 // All of these have a two-operand InstAlias.
2799 let TwoOperandAliasConstraint = "$Vn = $Vd";
2800 let isCommutable = Commutable;
2803 class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2804 bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2805 string Dt, ValueType ResTy, ValueType OpTy,
2806 SDPatternOperator IntOp, bit Commutable>
2807 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2808 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
2809 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2811 // Same as N3VQIntnp but with Vd as a src register.
2812 class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2813 bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2814 string Dt, ValueType ResTy, ValueType OpTy,
2815 SDPatternOperator IntOp, bit Commutable>
2816 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2817 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
2818 f, itin, OpcodeStr, Dt,
2819 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
2820 (OpTy QPR:$Vm))))]> {
2821 let Constraints = "$src = $Vd";
2824 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2825 string OpcodeStr, string Dt,
2826 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2827 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2828 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2829 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2830 [(set (ResTy QPR:$Vd),
2831 (ResTy (IntOp (ResTy QPR:$Vn),
2832 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2834 let isCommutable = 0;
2836 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2837 string OpcodeStr, string Dt,
2838 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2839 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2840 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2841 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2842 [(set (ResTy QPR:$Vd),
2843 (ResTy (IntOp (ResTy QPR:$Vn),
2844 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2846 let isCommutable = 0;
2848 class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2849 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2850 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2851 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2852 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2853 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2854 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2855 let TwoOperandAliasConstraint = "$Vm = $Vd";
2856 let isCommutable = 0;
2859 // Multiply-Add/Sub operations: double- and quad-register.
2860 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2861 InstrItinClass itin, string OpcodeStr, string Dt,
2862 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2863 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2864 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2865 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2866 [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2867 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2869 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2870 string OpcodeStr, string Dt,
2871 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2872 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2874 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2876 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2878 (Ty (ShOp (Ty DPR:$src1),
2880 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2882 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2883 string OpcodeStr, string Dt,
2884 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2885 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2887 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2889 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2891 (Ty (ShOp (Ty DPR:$src1),
2893 (Ty (NEONvduplane (Ty DPR_8:$Vm),
2896 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2897 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2898 SDPatternOperator MulOp, SDPatternOperator OpNode>
2899 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2900 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2901 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2902 [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2903 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2904 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2905 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2906 SDPatternOperator MulOp, SDPatternOperator ShOp>
2907 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2909 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2911 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2912 [(set (ResTy QPR:$Vd),
2913 (ResTy (ShOp (ResTy QPR:$src1),
2914 (ResTy (MulOp QPR:$Vn,
2915 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2917 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2918 string OpcodeStr, string Dt,
2919 ValueType ResTy, ValueType OpTy,
2920 SDPatternOperator MulOp, SDPatternOperator ShOp>
2921 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2923 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2925 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2926 [(set (ResTy QPR:$Vd),
2927 (ResTy (ShOp (ResTy QPR:$src1),
2928 (ResTy (MulOp QPR:$Vn,
2929 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2932 // Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2933 class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2934 InstrItinClass itin, string OpcodeStr, string Dt,
2935 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2936 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2937 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2938 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2939 [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2940 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2941 class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2942 InstrItinClass itin, string OpcodeStr, string Dt,
2943 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2944 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2945 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2946 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2947 [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2948 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2950 // Neon 3-argument intrinsics, both double- and quad-register.
2951 // The destination register is also used as the first source operand register.
2952 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2953 InstrItinClass itin, string OpcodeStr, string Dt,
2954 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2955 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2956 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2957 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2958 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2959 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2960 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2961 InstrItinClass itin, string OpcodeStr, string Dt,
2962 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2963 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2964 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2965 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2966 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2967 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2969 // Long Multiply-Add/Sub operations.
2970 class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2971 InstrItinClass itin, string OpcodeStr, string Dt,
2972 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2973 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2974 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2975 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2976 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2977 (TyQ (MulOp (TyD DPR:$Vn),
2978 (TyD DPR:$Vm)))))]>;
2979 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2980 InstrItinClass itin, string OpcodeStr, string Dt,
2981 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2982 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2983 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2985 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2987 (OpNode (TyQ QPR:$src1),
2988 (TyQ (MulOp (TyD DPR:$Vn),
2989 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
2991 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2992 InstrItinClass itin, string OpcodeStr, string Dt,
2993 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2994 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2995 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2997 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2999 (OpNode (TyQ QPR:$src1),
3000 (TyQ (MulOp (TyD DPR:$Vn),
3001 (TyD (NEONvduplane (TyD DPR_8:$Vm),
3004 // Long Intrinsic-Op vector operations with explicit extend (VABAL).
3005 class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3006 InstrItinClass itin, string OpcodeStr, string Dt,
3007 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3009 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3010 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3011 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
3012 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
3013 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3014 (TyD DPR:$Vm)))))))]>;
3016 // Neon Long 3-argument intrinsic. The destination register is
3017 // a quad-register and is also used as the first source operand register.
3018 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3019 InstrItinClass itin, string OpcodeStr, string Dt,
3020 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
3021 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3022 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3023 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
3025 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
3026 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3027 string OpcodeStr, string Dt,
3028 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3029 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3031 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3033 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3034 [(set (ResTy QPR:$Vd),
3035 (ResTy (IntOp (ResTy QPR:$src1),
3037 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
3039 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3040 InstrItinClass itin, string OpcodeStr, string Dt,
3041 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3042 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3044 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3046 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
3047 [(set (ResTy QPR:$Vd),
3048 (ResTy (IntOp (ResTy QPR:$src1),
3050 (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
3053 // Narrowing 3-register intrinsics.
3054 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3055 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
3056 SDPatternOperator IntOp, bit Commutable>
3057 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3058 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
3059 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3060 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
3061 let isCommutable = Commutable;
3064 // Long 3-register operations.
3065 class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3066 InstrItinClass itin, string OpcodeStr, string Dt,
3067 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
3068 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3069 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3070 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3071 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3072 let isCommutable = Commutable;
3075 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
3076 InstrItinClass itin, string OpcodeStr, string Dt,
3077 ValueType TyQ, ValueType TyD, SDNode OpNode>
3078 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3079 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3080 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3082 (TyQ (OpNode (TyD DPR:$Vn),
3083 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
3084 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3085 InstrItinClass itin, string OpcodeStr, string Dt,
3086 ValueType TyQ, ValueType TyD, SDNode OpNode>
3087 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3088 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3089 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3091 (TyQ (OpNode (TyD DPR:$Vn),
3092 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
3094 // Long 3-register operations with explicitly extended operands.
3095 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3096 InstrItinClass itin, string OpcodeStr, string Dt,
3097 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
3099 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3100 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3101 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3102 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
3103 (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3104 let isCommutable = Commutable;
3107 // Long 3-register intrinsics with explicit extend (VABDL).
3108 class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3109 InstrItinClass itin, string OpcodeStr, string Dt,
3110 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3112 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3113 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3114 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3115 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3116 (TyD DPR:$Vm))))))]> {
3117 let isCommutable = Commutable;
3120 // Long 3-register intrinsics.
3121 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3122 InstrItinClass itin, string OpcodeStr, string Dt,
3123 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
3124 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3125 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3126 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3127 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3128 let isCommutable = Commutable;
3131 // Same as above, but not predicated.
3132 class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
3133 bit op4, InstrItinClass itin, string OpcodeStr,
3134 string Dt, ValueType ResTy, ValueType OpTy,
3135 SDPatternOperator IntOp, bit Commutable>
3136 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
3137 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
3138 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
3140 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3141 string OpcodeStr, string Dt,
3142 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3143 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3144 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3145 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3146 [(set (ResTy QPR:$Vd),
3147 (ResTy (IntOp (OpTy DPR:$Vn),
3148 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
3150 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3151 InstrItinClass itin, string OpcodeStr, string Dt,
3152 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3153 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3154 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3155 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3156 [(set (ResTy QPR:$Vd),
3157 (ResTy (IntOp (OpTy DPR:$Vn),
3158 (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
3161 // Wide 3-register operations.
3162 class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3163 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
3164 SDNode OpNode, SDNode ExtOp, bit Commutable>
3165 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3166 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
3167 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3168 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
3169 (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3170 // All of these have a two-operand InstAlias.
3171 let TwoOperandAliasConstraint = "$Vn = $Vd";
3172 let isCommutable = Commutable;
3175 // Pairwise long 2-register intrinsics, both double- and quad-register.
3176 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3177 bits<2> op17_16, bits<5> op11_7, bit op4,
3178 string OpcodeStr, string Dt,
3179 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3180 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
3181 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3182 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
3183 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3184 bits<2> op17_16, bits<5> op11_7, bit op4,
3185 string OpcodeStr, string Dt,
3186 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3187 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
3188 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3189 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
3191 // Pairwise long 2-register accumulate intrinsics,
3192 // both double- and quad-register.
3193 // The destination register is also used as the first source operand register.
3194 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3195 bits<2> op17_16, bits<5> op11_7, bit op4,
3196 string OpcodeStr, string Dt,
3197 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3198 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
3199 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
3200 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3201 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
3202 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3203 bits<2> op17_16, bits<5> op11_7, bit op4,
3204 string OpcodeStr, string Dt,
3205 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3206 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
3207 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
3208 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3209 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
3211 // Shift by immediate,
3212 // both double- and quad-register.
3213 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3214 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3215 Format f, InstrItinClass itin, Operand ImmTy,
3216 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3217 : N2VImm<op24, op23, op11_8, op7, 0, op4,
3218 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
3219 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3220 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
3221 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3222 Format f, InstrItinClass itin, Operand ImmTy,
3223 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3224 : N2VImm<op24, op23, op11_8, op7, 1, op4,
3225 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
3226 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3227 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
3230 // Long shift by immediate.
3231 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3232 string OpcodeStr, string Dt,
3233 ValueType ResTy, ValueType OpTy, Operand ImmTy,
3234 SDPatternOperator OpNode>
3235 : N2VImm<op24, op23, op11_8, op7, op6, op4,
3236 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
3237 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3238 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
3240 // Narrow shift by immediate.
3241 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3242 InstrItinClass itin, string OpcodeStr, string Dt,
3243 ValueType ResTy, ValueType OpTy, Operand ImmTy,
3244 SDPatternOperator OpNode>
3245 : N2VImm<op24, op23, op11_8, op7, op6, op4,
3246 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
3247 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3248 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
3249 (i32 ImmTy:$SIMM))))]>;
3251 // Shift right by immediate and accumulate,
3252 // both double- and quad-register.
3253 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3254 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3255 Operand ImmTy, string OpcodeStr, string Dt,
3256 ValueType Ty, SDNode ShOp>
3257 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3258 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3259 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3260 [(set DPR:$Vd, (Ty (add DPR:$src1,
3261 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
3262 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3263 Operand ImmTy, string OpcodeStr, string Dt,
3264 ValueType Ty, SDNode ShOp>
3265 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3266 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3267 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3268 [(set QPR:$Vd, (Ty (add QPR:$src1,
3269 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
3272 // Shift by immediate and insert,
3273 // both double- and quad-register.
3274 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3275 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3276 Operand ImmTy, Format f, string OpcodeStr, string Dt,
3277 ValueType Ty,SDNode ShOp>
3278 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3279 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
3280 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3281 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
3282 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3283 Operand ImmTy, Format f, string OpcodeStr, string Dt,
3284 ValueType Ty,SDNode ShOp>
3285 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3286 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
3287 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3288 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
3291 // Convert, with fractional bits immediate,
3292 // both double- and quad-register.
3293 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3294 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3295 SDPatternOperator IntOp>
3296 : N2VImm<op24, op23, op11_8, op7, 0, op4,
3297 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3298 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3299 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
3300 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3301 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3302 SDPatternOperator IntOp>
3303 : N2VImm<op24, op23, op11_8, op7, 1, op4,
3304 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3305 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3306 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
3308 //===----------------------------------------------------------------------===//
3310 //===----------------------------------------------------------------------===//
3312 // Abbreviations used in multiclass suffixes:
3313 // Q = quarter int (8 bit) elements
3314 // H = half int (16 bit) elements
3315 // S = single int (32 bit) elements
3316 // D = double int (64 bit) elements
3318 // Neon 2-register vector operations and intrinsics.
3320 // Neon 2-register comparisons.
3321 // source operand element sizes of 8, 16 and 32 bits:
3322 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3323 bits<5> op11_7, bit op4, string opc, string Dt,
3324 string asm, SDNode OpNode> {
3325 // 64-bit vector types.
3326 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
3327 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3328 opc, !strconcat(Dt, "8"), asm, "",
3329 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
3330 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3331 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3332 opc, !strconcat(Dt, "16"), asm, "",
3333 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
3334 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3335 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3336 opc, !strconcat(Dt, "32"), asm, "",
3337 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
3338 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3339 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3340 opc, "f32", asm, "",
3341 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
3342 let Inst{10} = 1; // overwrite F = 1
3344 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3345 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3346 opc, "f16", asm, "",
3347 [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>,
3348 Requires<[HasNEON,HasFullFP16]> {
3349 let Inst{10} = 1; // overwrite F = 1
3352 // 128-bit vector types.
3353 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
3354 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3355 opc, !strconcat(Dt, "8"), asm, "",
3356 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
3357 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3358 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3359 opc, !strconcat(Dt, "16"), asm, "",
3360 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
3361 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3362 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3363 opc, !strconcat(Dt, "32"), asm, "",
3364 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
3365 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3366 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3367 opc, "f32", asm, "",
3368 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
3369 let Inst{10} = 1; // overwrite F = 1
3371 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3372 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3373 opc, "f16", asm, "",
3374 [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>,
3375 Requires<[HasNEON,HasFullFP16]> {
3376 let Inst{10} = 1; // overwrite F = 1
3381 // Neon 2-register vector intrinsics,
3382 // element sizes of 8, 16 and 32 bits:
3383 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3384 bits<5> op11_7, bit op4,
3385 InstrItinClass itinD, InstrItinClass itinQ,
3386 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3387 // 64-bit vector types.
3388 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3389 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3390 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3391 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
3392 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3393 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
3395 // 128-bit vector types.
3396 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3397 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
3398 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3399 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
3400 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3401 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
3405 // Neon Narrowing 2-register vector operations,
3406 // source operand element sizes of 16, 32 and 64 bits:
3407 multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3408 bits<5> op11_7, bit op6, bit op4,
3409 InstrItinClass itin, string OpcodeStr, string Dt,
3411 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3412 itin, OpcodeStr, !strconcat(Dt, "16"),
3413 v8i8, v8i16, OpNode>;
3414 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3415 itin, OpcodeStr, !strconcat(Dt, "32"),
3416 v4i16, v4i32, OpNode>;
3417 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3418 itin, OpcodeStr, !strconcat(Dt, "64"),
3419 v2i32, v2i64, OpNode>;
3422 // Neon Narrowing 2-register vector intrinsics,
3423 // source operand element sizes of 16, 32 and 64 bits:
3424 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3425 bits<5> op11_7, bit op6, bit op4,
3426 InstrItinClass itin, string OpcodeStr, string Dt,
3427 SDPatternOperator IntOp> {
3428 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3429 itin, OpcodeStr, !strconcat(Dt, "16"),
3430 v8i8, v8i16, IntOp>;
3431 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3432 itin, OpcodeStr, !strconcat(Dt, "32"),
3433 v4i16, v4i32, IntOp>;
3434 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3435 itin, OpcodeStr, !strconcat(Dt, "64"),
3436 v2i32, v2i64, IntOp>;
3440 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
3441 // source operand element sizes of 16, 32 and 64 bits:
3442 multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
3443 string OpcodeStr, string Dt, SDNode OpNode> {
3444 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3445 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
3446 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3447 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3448 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3449 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3453 // Neon 3-register vector operations.
3455 // First with only element sizes of 8, 16 and 32 bits:
3456 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3457 InstrItinClass itinD16, InstrItinClass itinD32,
3458 InstrItinClass itinQ16, InstrItinClass itinQ32,
3459 string OpcodeStr, string Dt,
3460 SDNode OpNode, bit Commutable = 0> {
3461 // 64-bit vector types.
3462 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
3463 OpcodeStr, !strconcat(Dt, "8"),
3464 v8i8, v8i8, OpNode, Commutable>;
3465 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
3466 OpcodeStr, !strconcat(Dt, "16"),
3467 v4i16, v4i16, OpNode, Commutable>;
3468 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
3469 OpcodeStr, !strconcat(Dt, "32"),
3470 v2i32, v2i32, OpNode, Commutable>;
3472 // 128-bit vector types.
3473 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
3474 OpcodeStr, !strconcat(Dt, "8"),
3475 v16i8, v16i8, OpNode, Commutable>;
3476 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
3477 OpcodeStr, !strconcat(Dt, "16"),
3478 v8i16, v8i16, OpNode, Commutable>;
3479 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
3480 OpcodeStr, !strconcat(Dt, "32"),
3481 v4i32, v4i32, OpNode, Commutable>;
3484 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
3485 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
3486 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
3487 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
3488 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
3489 v4i32, v2i32, ShOp>;
3492 // ....then also with element size 64 bits:
3493 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3494 InstrItinClass itinD, InstrItinClass itinQ,
3495 string OpcodeStr, string Dt,
3496 SDNode OpNode, bit Commutable = 0>
3497 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
3498 OpcodeStr, Dt, OpNode, Commutable> {
3499 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
3500 OpcodeStr, !strconcat(Dt, "64"),
3501 v1i64, v1i64, OpNode, Commutable>;
3502 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
3503 OpcodeStr, !strconcat(Dt, "64"),
3504 v2i64, v2i64, OpNode, Commutable>;
3508 // Neon 3-register vector intrinsics.
3510 // First with only element sizes of 16 and 32 bits:
3511 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3512 InstrItinClass itinD16, InstrItinClass itinD32,
3513 InstrItinClass itinQ16, InstrItinClass itinQ32,
3514 string OpcodeStr, string Dt,
3515 SDPatternOperator IntOp, bit Commutable = 0> {
3516 // 64-bit vector types.
3517 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
3518 OpcodeStr, !strconcat(Dt, "16"),
3519 v4i16, v4i16, IntOp, Commutable>;
3520 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
3521 OpcodeStr, !strconcat(Dt, "32"),
3522 v2i32, v2i32, IntOp, Commutable>;
3524 // 128-bit vector types.
3525 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3526 OpcodeStr, !strconcat(Dt, "16"),
3527 v8i16, v8i16, IntOp, Commutable>;
3528 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3529 OpcodeStr, !strconcat(Dt, "32"),
3530 v4i32, v4i32, IntOp, Commutable>;
3532 multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3533 InstrItinClass itinD16, InstrItinClass itinD32,
3534 InstrItinClass itinQ16, InstrItinClass itinQ32,
3535 string OpcodeStr, string Dt,
3536 SDPatternOperator IntOp> {
3537 // 64-bit vector types.
3538 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
3539 OpcodeStr, !strconcat(Dt, "16"),
3540 v4i16, v4i16, IntOp>;
3541 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
3542 OpcodeStr, !strconcat(Dt, "32"),
3543 v2i32, v2i32, IntOp>;
3545 // 128-bit vector types.
3546 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3547 OpcodeStr, !strconcat(Dt, "16"),
3548 v8i16, v8i16, IntOp>;
3549 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3550 OpcodeStr, !strconcat(Dt, "32"),
3551 v4i32, v4i32, IntOp>;
3554 multiclass N3VIntSL_HS<bits<4> op11_8,
3555 InstrItinClass itinD16, InstrItinClass itinD32,
3556 InstrItinClass itinQ16, InstrItinClass itinQ32,
3557 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3558 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
3559 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
3560 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
3561 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
3562 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
3563 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
3564 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
3565 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
3568 // ....then also with element size of 8 bits:
3569 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3570 InstrItinClass itinD16, InstrItinClass itinD32,
3571 InstrItinClass itinQ16, InstrItinClass itinQ32,
3572 string OpcodeStr, string Dt,
3573 SDPatternOperator IntOp, bit Commutable = 0>
3574 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3575 OpcodeStr, Dt, IntOp, Commutable> {
3576 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
3577 OpcodeStr, !strconcat(Dt, "8"),
3578 v8i8, v8i8, IntOp, Commutable>;
3579 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3580 OpcodeStr, !strconcat(Dt, "8"),
3581 v16i8, v16i8, IntOp, Commutable>;
3583 multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3584 InstrItinClass itinD16, InstrItinClass itinD32,
3585 InstrItinClass itinQ16, InstrItinClass itinQ32,
3586 string OpcodeStr, string Dt,
3587 SDPatternOperator IntOp>
3588 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3589 OpcodeStr, Dt, IntOp> {
3590 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
3591 OpcodeStr, !strconcat(Dt, "8"),
3593 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3594 OpcodeStr, !strconcat(Dt, "8"),
3595 v16i8, v16i8, IntOp>;
3599 // ....then also with element size of 64 bits:
3600 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3601 InstrItinClass itinD16, InstrItinClass itinD32,
3602 InstrItinClass itinQ16, InstrItinClass itinQ32,
3603 string OpcodeStr, string Dt,
3604 SDPatternOperator IntOp, bit Commutable = 0>
3605 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3606 OpcodeStr, Dt, IntOp, Commutable> {
3607 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
3608 OpcodeStr, !strconcat(Dt, "64"),
3609 v1i64, v1i64, IntOp, Commutable>;
3610 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3611 OpcodeStr, !strconcat(Dt, "64"),
3612 v2i64, v2i64, IntOp, Commutable>;
3614 multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3615 InstrItinClass itinD16, InstrItinClass itinD32,
3616 InstrItinClass itinQ16, InstrItinClass itinQ32,
3617 string OpcodeStr, string Dt,
3618 SDPatternOperator IntOp>
3619 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3620 OpcodeStr, Dt, IntOp> {
3621 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
3622 OpcodeStr, !strconcat(Dt, "64"),
3623 v1i64, v1i64, IntOp>;
3624 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3625 OpcodeStr, !strconcat(Dt, "64"),
3626 v2i64, v2i64, IntOp>;
3629 // Neon Narrowing 3-register vector intrinsics,
3630 // source operand element sizes of 16, 32 and 64 bits:
3631 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3632 string OpcodeStr, string Dt,
3633 SDPatternOperator IntOp, bit Commutable = 0> {
3634 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
3635 OpcodeStr, !strconcat(Dt, "16"),
3636 v8i8, v8i16, IntOp, Commutable>;
3637 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
3638 OpcodeStr, !strconcat(Dt, "32"),
3639 v4i16, v4i32, IntOp, Commutable>;
3640 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
3641 OpcodeStr, !strconcat(Dt, "64"),
3642 v2i32, v2i64, IntOp, Commutable>;
3646 // Neon Long 3-register vector operations.
3648 multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3649 InstrItinClass itin16, InstrItinClass itin32,
3650 string OpcodeStr, string Dt,
3651 SDNode OpNode, bit Commutable = 0> {
3652 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
3653 OpcodeStr, !strconcat(Dt, "8"),
3654 v8i16, v8i8, OpNode, Commutable>;
3655 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
3656 OpcodeStr, !strconcat(Dt, "16"),
3657 v4i32, v4i16, OpNode, Commutable>;
3658 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
3659 OpcodeStr, !strconcat(Dt, "32"),
3660 v2i64, v2i32, OpNode, Commutable>;
3663 multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
3664 InstrItinClass itin, string OpcodeStr, string Dt,
3666 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
3667 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3668 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
3669 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3672 multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3673 InstrItinClass itin16, InstrItinClass itin32,
3674 string OpcodeStr, string Dt,
3675 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3676 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
3677 OpcodeStr, !strconcat(Dt, "8"),
3678 v8i16, v8i8, OpNode, ExtOp, Commutable>;
3679 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
3680 OpcodeStr, !strconcat(Dt, "16"),
3681 v4i32, v4i16, OpNode, ExtOp, Commutable>;
3682 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
3683 OpcodeStr, !strconcat(Dt, "32"),
3684 v2i64, v2i32, OpNode, ExtOp, Commutable>;
3687 // Neon Long 3-register vector intrinsics.
3689 // First with only element sizes of 16 and 32 bits:
3690 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3691 InstrItinClass itin16, InstrItinClass itin32,
3692 string OpcodeStr, string Dt,
3693 SDPatternOperator IntOp, bit Commutable = 0> {
3694 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
3695 OpcodeStr, !strconcat(Dt, "16"),
3696 v4i32, v4i16, IntOp, Commutable>;
3697 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
3698 OpcodeStr, !strconcat(Dt, "32"),
3699 v2i64, v2i32, IntOp, Commutable>;
3702 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
3703 InstrItinClass itin, string OpcodeStr, string Dt,
3704 SDPatternOperator IntOp> {
3705 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
3706 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3707 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
3708 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3711 // ....then also with element size of 8 bits:
3712 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3713 InstrItinClass itin16, InstrItinClass itin32,
3714 string OpcodeStr, string Dt,
3715 SDPatternOperator IntOp, bit Commutable = 0>
3716 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
3717 IntOp, Commutable> {
3718 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
3719 OpcodeStr, !strconcat(Dt, "8"),
3720 v8i16, v8i8, IntOp, Commutable>;
3723 // ....with explicit extend (VABDL).
3724 multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3725 InstrItinClass itin, string OpcodeStr, string Dt,
3726 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
3727 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
3728 OpcodeStr, !strconcat(Dt, "8"),
3729 v8i16, v8i8, IntOp, ExtOp, Commutable>;
3730 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
3731 OpcodeStr, !strconcat(Dt, "16"),
3732 v4i32, v4i16, IntOp, ExtOp, Commutable>;
3733 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
3734 OpcodeStr, !strconcat(Dt, "32"),
3735 v2i64, v2i32, IntOp, ExtOp, Commutable>;
3739 // Neon Wide 3-register vector intrinsics,
3740 // source operand element sizes of 8, 16 and 32 bits:
3741 multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3742 string OpcodeStr, string Dt,
3743 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3744 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
3745 OpcodeStr, !strconcat(Dt, "8"),
3746 v8i16, v8i8, OpNode, ExtOp, Commutable>;
3747 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
3748 OpcodeStr, !strconcat(Dt, "16"),
3749 v4i32, v4i16, OpNode, ExtOp, Commutable>;
3750 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
3751 OpcodeStr, !strconcat(Dt, "32"),
3752 v2i64, v2i32, OpNode, ExtOp, Commutable>;
3756 // Neon Multiply-Op vector operations,
3757 // element sizes of 8, 16 and 32 bits:
3758 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3759 InstrItinClass itinD16, InstrItinClass itinD32,
3760 InstrItinClass itinQ16, InstrItinClass itinQ32,
3761 string OpcodeStr, string Dt, SDNode OpNode> {
3762 // 64-bit vector types.
3763 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
3764 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
3765 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
3766 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
3767 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
3768 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
3770 // 128-bit vector types.
3771 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
3772 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
3773 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
3774 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
3775 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
3776 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
3779 multiclass N3VMulOpSL_HS<bits<4> op11_8,
3780 InstrItinClass itinD16, InstrItinClass itinD32,
3781 InstrItinClass itinQ16, InstrItinClass itinQ32,
3782 string OpcodeStr, string Dt, SDPatternOperator ShOp> {
3783 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
3784 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
3785 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
3786 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
3787 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
3788 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
3790 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
3791 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
3795 // Neon Intrinsic-Op vector operations,
3796 // element sizes of 8, 16 and 32 bits:
3797 multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3798 InstrItinClass itinD, InstrItinClass itinQ,
3799 string OpcodeStr, string Dt, SDPatternOperator IntOp,
3801 // 64-bit vector types.
3802 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
3803 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
3804 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
3805 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
3806 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
3807 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
3809 // 128-bit vector types.
3810 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
3811 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
3812 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
3813 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
3814 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
3815 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
3818 // Neon 3-argument intrinsics,
3819 // element sizes of 16 and 32 bits:
3820 multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3821 InstrItinClass itinD16, InstrItinClass itinD32,
3822 InstrItinClass itinQ16, InstrItinClass itinQ32,
3823 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3824 // 64-bit vector types.
3825 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
3826 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
3827 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
3828 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3830 // 128-bit vector types.
3831 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
3832 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3833 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
3834 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3837 // element sizes of 8, 16 and 32 bits:
3838 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3839 InstrItinClass itinD16, InstrItinClass itinD32,
3840 InstrItinClass itinQ16, InstrItinClass itinQ32,
3841 string OpcodeStr, string Dt, SDPatternOperator IntOp>
3842 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
3843 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
3844 // 64-bit vector types.
3845 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
3846 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3847 // 128-bit vector types.
3848 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
3849 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3852 // Neon Long Multiply-Op vector operations,
3853 // element sizes of 8, 16 and 32 bits:
3854 multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3855 InstrItinClass itin16, InstrItinClass itin32,
3856 string OpcodeStr, string Dt, SDNode MulOp,
3858 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3859 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3860 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3861 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3862 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3863 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3866 multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3867 string Dt, SDNode MulOp, SDNode OpNode> {
3868 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3869 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3870 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3871 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3875 // Neon Long 3-argument intrinsics.
3877 // First with only element sizes of 16 and 32 bits:
3878 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3879 InstrItinClass itin16, InstrItinClass itin32,
3880 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3881 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3882 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3883 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3884 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3887 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3888 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3889 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3890 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3891 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3892 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3895 // ....then also with element size of 8 bits:
3896 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3897 InstrItinClass itin16, InstrItinClass itin32,
3898 string OpcodeStr, string Dt, SDPatternOperator IntOp>
3899 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3900 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3901 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3904 // ....with explicit extend (VABAL).
3905 multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3906 InstrItinClass itin, string OpcodeStr, string Dt,
3907 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
3908 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3909 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3910 IntOp, ExtOp, OpNode>;
3911 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3912 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3913 IntOp, ExtOp, OpNode>;
3914 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3915 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3916 IntOp, ExtOp, OpNode>;
3920 // Neon Pairwise long 2-register intrinsics,
3921 // element sizes of 8, 16 and 32 bits:
3922 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3923 bits<5> op11_7, bit op4,
3924 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3925 // 64-bit vector types.
3926 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3927 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3928 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3929 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3930 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3931 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3933 // 128-bit vector types.
3934 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3935 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3936 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3937 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3938 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3939 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3943 // Neon Pairwise long 2-register accumulate intrinsics,
3944 // element sizes of 8, 16 and 32 bits:
3945 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3946 bits<5> op11_7, bit op4,
3947 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3948 // 64-bit vector types.
3949 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3950 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3951 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3952 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3953 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3954 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3956 // 128-bit vector types.
3957 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3958 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3959 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3960 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3961 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3962 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3966 // Neon 2-register vector shift by immediate,
3967 // with f of either N2RegVShLFrm or N2RegVShRFrm
3968 // element sizes of 8, 16, 32 and 64 bits:
3969 multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3970 InstrItinClass itin, string OpcodeStr, string Dt,
3972 // 64-bit vector types.
3973 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3974 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3975 let Inst{21-19} = 0b001; // imm6 = 001xxx
3977 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3978 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3979 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3981 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3982 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3983 let Inst{21} = 0b1; // imm6 = 1xxxxx
3985 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3986 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3989 // 128-bit vector types.
3990 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3991 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3992 let Inst{21-19} = 0b001; // imm6 = 001xxx
3994 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3995 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3996 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3998 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3999 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
4000 let Inst{21} = 0b1; // imm6 = 1xxxxx
4002 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
4003 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
4006 multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4007 InstrItinClass itin, string OpcodeStr, string Dt,
4008 string baseOpc, SDNode OpNode> {
4009 // 64-bit vector types.
4010 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4011 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
4012 let Inst{21-19} = 0b001; // imm6 = 001xxx
4014 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4015 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
4016 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4018 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4019 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
4020 let Inst{21} = 0b1; // imm6 = 1xxxxx
4022 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4023 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
4026 // 128-bit vector types.
4027 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4028 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
4029 let Inst{21-19} = 0b001; // imm6 = 001xxx
4031 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4032 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
4033 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4035 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4036 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
4037 let Inst{21} = 0b1; // imm6 = 1xxxxx
4039 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4040 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
4044 // Neon Shift-Accumulate vector operations,
4045 // element sizes of 8, 16, 32 and 64 bits:
4046 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4047 string OpcodeStr, string Dt, SDNode ShOp> {
4048 // 64-bit vector types.
4049 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4050 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
4051 let Inst{21-19} = 0b001; // imm6 = 001xxx
4053 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4054 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
4055 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4057 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4058 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
4059 let Inst{21} = 0b1; // imm6 = 1xxxxx
4061 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4062 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
4065 // 128-bit vector types.
4066 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4067 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
4068 let Inst{21-19} = 0b001; // imm6 = 001xxx
4070 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4071 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
4072 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4074 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4075 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
4076 let Inst{21} = 0b1; // imm6 = 1xxxxx
4078 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4079 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
4083 // Neon Shift-Insert vector operations,
4084 // with f of either N2RegVShLFrm or N2RegVShRFrm
4085 // element sizes of 8, 16, 32 and 64 bits:
4086 multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4088 // 64-bit vector types.
4089 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4090 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
4091 let Inst{21-19} = 0b001; // imm6 = 001xxx
4093 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4094 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
4095 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4097 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4098 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
4099 let Inst{21} = 0b1; // imm6 = 1xxxxx
4101 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
4102 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
4105 // 128-bit vector types.
4106 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4107 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
4108 let Inst{21-19} = 0b001; // imm6 = 001xxx
4110 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4111 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
4112 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4114 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4115 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
4116 let Inst{21} = 0b1; // imm6 = 1xxxxx
4118 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
4119 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
4122 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4124 // 64-bit vector types.
4125 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4126 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
4127 let Inst{21-19} = 0b001; // imm6 = 001xxx
4129 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4130 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
4131 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4133 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4134 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
4135 let Inst{21} = 0b1; // imm6 = 1xxxxx
4137 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4138 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
4141 // 128-bit vector types.
4142 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4143 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
4144 let Inst{21-19} = 0b001; // imm6 = 001xxx
4146 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4147 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
4148 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4150 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4151 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
4152 let Inst{21} = 0b1; // imm6 = 1xxxxx
4154 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4155 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
4159 // Neon Shift Long operations,
4160 // element sizes of 8, 16, 32 bits:
4161 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4162 bit op4, string OpcodeStr, string Dt,
4163 SDPatternOperator OpNode> {
4164 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4165 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
4166 let Inst{21-19} = 0b001; // imm6 = 001xxx
4168 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4169 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
4170 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4172 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4173 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
4174 let Inst{21} = 0b1; // imm6 = 1xxxxx
4178 // Neon Shift Narrow operations,
4179 // element sizes of 16, 32, 64 bits:
4180 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4181 bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
4182 SDPatternOperator OpNode> {
4183 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4184 OpcodeStr, !strconcat(Dt, "16"),
4185 v8i8, v8i16, shr_imm8, OpNode> {
4186 let Inst{21-19} = 0b001; // imm6 = 001xxx
4188 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4189 OpcodeStr, !strconcat(Dt, "32"),
4190 v4i16, v4i32, shr_imm16, OpNode> {
4191 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4193 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4194 OpcodeStr, !strconcat(Dt, "64"),
4195 v2i32, v2i64, shr_imm32, OpNode> {
4196 let Inst{21} = 0b1; // imm6 = 1xxxxx
4200 //===----------------------------------------------------------------------===//
4201 // Instruction Definitions.
4202 //===----------------------------------------------------------------------===//
4204 // Vector Add Operations.
4206 // VADD : Vector Add (integer and floating-point)
4207 defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
4209 def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
4210 v2f32, v2f32, fadd, 1>;
4211 def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
4212 v4f32, v4f32, fadd, 1>;
4213 def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
4214 v4f16, v4f16, fadd, 1>,
4215 Requires<[HasNEON,HasFullFP16]>;
4216 def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
4217 v8f16, v8f16, fadd, 1>,
4218 Requires<[HasNEON,HasFullFP16]>;
4219 // VADDL : Vector Add Long (Q = D + D)
4220 defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4221 "vaddl", "s", add, sext, 1>;
4222 defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4223 "vaddl", "u", add, zext, 1>;
4224 // VADDW : Vector Add Wide (Q = Q + D)
4225 defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
4226 defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
4227 // VHADD : Vector Halving Add
4228 defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
4229 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4230 "vhadd", "s", int_arm_neon_vhadds, 1>;
4231 defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
4232 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4233 "vhadd", "u", int_arm_neon_vhaddu, 1>;
4234 // VRHADD : Vector Rounding Halving Add
4235 defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
4236 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4237 "vrhadd", "s", int_arm_neon_vrhadds, 1>;
4238 defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
4239 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4240 "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
4241 // VQADD : Vector Saturating Add
4242 defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
4243 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4244 "vqadd", "s", int_arm_neon_vqadds, 1>;
4245 defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
4246 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4247 "vqadd", "u", int_arm_neon_vqaddu, 1>;
4248 // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
4249 defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
4250 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
4251 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
4252 int_arm_neon_vraddhn, 1>;
4254 def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4255 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
4256 def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4257 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
4258 def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4259 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
4261 // Vector Multiply Operations.
4263 // VMUL : Vector Multiply (integer, polynomial and floating-point)
4264 defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
4265 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
4266 def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
4267 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
4268 def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
4269 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
4270 def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
4271 v2f32, v2f32, fmul, 1>;
4272 def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
4273 v4f32, v4f32, fmul, 1>;
4274 def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
4275 v4f16, v4f16, fmul, 1>,
4276 Requires<[HasNEON,HasFullFP16]>;
4277 def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
4278 v8f16, v8f16, fmul, 1>,
4279 Requires<[HasNEON,HasFullFP16]>;
4280 defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
4281 def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
4282 def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
4284 def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
4285 Requires<[HasNEON,HasFullFP16]>;
4286 def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
4288 Requires<[HasNEON,HasFullFP16]>;
4290 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
4291 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
4292 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
4293 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4294 (DSubReg_i16_reg imm:$lane))),
4295 (SubReg_i16_lane imm:$lane)))>;
4296 def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
4297 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
4298 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
4299 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4300 (DSubReg_i32_reg imm:$lane))),
4301 (SubReg_i32_lane imm:$lane)))>;
4302 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
4303 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
4304 (v4f32 (VMULslfq (v4f32 QPR:$src1),
4305 (v2f32 (EXTRACT_SUBREG QPR:$src2,
4306 (DSubReg_i32_reg imm:$lane))),
4307 (SubReg_i32_lane imm:$lane)))>;
4310 def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
4312 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4314 def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
4316 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4320 // VQDMULH : Vector Saturating Doubling Multiply Returning High Half
4321 defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
4322 IIC_VMULi16Q, IIC_VMULi32Q,
4323 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
4324 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
4325 IIC_VMULi16Q, IIC_VMULi32Q,
4326 "vqdmulh", "s", int_arm_neon_vqdmulh>;
4327 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
4328 (v8i16 (NEONvduplane (v8i16 QPR:$src2),
4330 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
4331 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4332 (DSubReg_i16_reg imm:$lane))),
4333 (SubReg_i16_lane imm:$lane)))>;
4334 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
4335 (v4i32 (NEONvduplane (v4i32 QPR:$src2),
4337 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
4338 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4339 (DSubReg_i32_reg imm:$lane))),
4340 (SubReg_i32_lane imm:$lane)))>;
4342 // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
4343 defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
4344 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
4345 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
4346 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
4347 IIC_VMULi16Q, IIC_VMULi32Q,
4348 "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
4349 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
4350 (v8i16 (NEONvduplane (v8i16 QPR:$src2),
4352 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
4353 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4354 (DSubReg_i16_reg imm:$lane))),
4355 (SubReg_i16_lane imm:$lane)))>;
4356 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
4357 (v4i32 (NEONvduplane (v4i32 QPR:$src2),
4359 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
4360 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4361 (DSubReg_i32_reg imm:$lane))),
4362 (SubReg_i32_lane imm:$lane)))>;
4364 // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
4365 let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
4366 DecoderNamespace = "NEONData" in {
4367 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4368 "vmull", "s", NEONvmulls, 1>;
4369 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4370 "vmull", "u", NEONvmullu, 1>;
4371 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
4372 v8i16, v8i8, int_arm_neon_vmullp, 1>;
4373 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
4374 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
4375 Requires<[HasV8, HasCrypto]>;
4377 defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
4378 defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
4380 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
4381 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
4382 "vqdmull", "s", int_arm_neon_vqdmull, 1>;
4383 defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
4384 "vqdmull", "s", int_arm_neon_vqdmull>;
4386 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
4388 // VMLA : Vector Multiply Accumulate (integer and floating-point)
4389 defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4390 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4391 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
4392 v2f32, fmul_su, fadd_mlx>,
4393 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4394 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
4395 v4f32, fmul_su, fadd_mlx>,
4396 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4397 def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
4398 v4f16, fmul_su, fadd_mlx>,
4399 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
4400 def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
4401 v8f16, fmul_su, fadd_mlx>,
4402 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
4403 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
4404 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4405 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
4406 v2f32, fmul_su, fadd_mlx>,
4407 Requires<[HasNEON, UseFPVMLx]>;
4408 def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
4409 v4f32, v2f32, fmul_su, fadd_mlx>,
4410 Requires<[HasNEON, UseFPVMLx]>;
4411 def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
4413 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4414 def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
4415 v8f16, v4f16, fmul, fadd>,
4416 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4418 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
4419 (mul (v8i16 QPR:$src2),
4420 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
4421 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4422 (v4i16 (EXTRACT_SUBREG QPR:$src3,
4423 (DSubReg_i16_reg imm:$lane))),
4424 (SubReg_i16_lane imm:$lane)))>;
4426 def : Pat<(v4i32 (add (v4i32 QPR:$src1),
4427 (mul (v4i32 QPR:$src2),
4428 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
4429 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4430 (v2i32 (EXTRACT_SUBREG QPR:$src3,
4431 (DSubReg_i32_reg imm:$lane))),
4432 (SubReg_i32_lane imm:$lane)))>;
4434 def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
4435 (fmul_su (v4f32 QPR:$src2),
4436 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
4437 (v4f32 (VMLAslfq (v4f32 QPR:$src1),
4439 (v2f32 (EXTRACT_SUBREG QPR:$src3,
4440 (DSubReg_i32_reg imm:$lane))),
4441 (SubReg_i32_lane imm:$lane)))>,
4442 Requires<[HasNEON, UseFPVMLx]>;
4444 // VMLAL : Vector Multiply Accumulate Long (Q += D * D)
4445 defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4446 "vmlal", "s", NEONvmulls, add>;
4447 defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4448 "vmlal", "u", NEONvmullu, add>;
4450 defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
4451 defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
4453 let Predicates = [HasNEON, HasV8_1a] in {
4454 // v8.1a Neon Rounding Double Multiply-Op vector operations,
4455 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
4457 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
4458 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4460 def : Pat<(v4i16 (int_arm_neon_vqadds
4462 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4463 (v4i16 DPR:$Vm))))),
4464 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4465 def : Pat<(v2i32 (int_arm_neon_vqadds
4467 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4468 (v2i32 DPR:$Vm))))),
4469 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4470 def : Pat<(v8i16 (int_arm_neon_vqadds
4472 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4473 (v8i16 QPR:$Vm))))),
4474 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4475 def : Pat<(v4i32 (int_arm_neon_vqadds
4477 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4478 (v4i32 QPR:$Vm))))),
4479 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4481 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
4482 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4484 def : Pat<(v4i16 (int_arm_neon_vqadds
4486 (v4i16 (int_arm_neon_vqrdmulh
4488 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4490 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
4492 def : Pat<(v2i32 (int_arm_neon_vqadds
4494 (v2i32 (int_arm_neon_vqrdmulh
4496 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4498 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4500 def : Pat<(v8i16 (int_arm_neon_vqadds
4502 (v8i16 (int_arm_neon_vqrdmulh
4504 (v8i16 (NEONvduplane (v8i16 QPR:$src3),
4506 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
4508 (v4i16 (EXTRACT_SUBREG
4510 (DSubReg_i16_reg imm:$lane))),
4511 (SubReg_i16_lane imm:$lane)))>;
4512 def : Pat<(v4i32 (int_arm_neon_vqadds
4514 (v4i32 (int_arm_neon_vqrdmulh
4516 (v4i32 (NEONvduplane (v4i32 QPR:$src3),
4518 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
4520 (v2i32 (EXTRACT_SUBREG
4522 (DSubReg_i32_reg imm:$lane))),
4523 (SubReg_i32_lane imm:$lane)))>;
4525 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
4527 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
4528 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4530 def : Pat<(v4i16 (int_arm_neon_vqsubs
4532 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4533 (v4i16 DPR:$Vm))))),
4534 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4535 def : Pat<(v2i32 (int_arm_neon_vqsubs
4537 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4538 (v2i32 DPR:$Vm))))),
4539 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4540 def : Pat<(v8i16 (int_arm_neon_vqsubs
4542 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4543 (v8i16 QPR:$Vm))))),
4544 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4545 def : Pat<(v4i32 (int_arm_neon_vqsubs
4547 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4548 (v4i32 QPR:$Vm))))),
4549 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4551 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
4552 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4554 def : Pat<(v4i16 (int_arm_neon_vqsubs
4556 (v4i16 (int_arm_neon_vqrdmulh
4558 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4560 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
4561 def : Pat<(v2i32 (int_arm_neon_vqsubs
4563 (v2i32 (int_arm_neon_vqrdmulh
4565 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4567 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4569 def : Pat<(v8i16 (int_arm_neon_vqsubs
4571 (v8i16 (int_arm_neon_vqrdmulh
4573 (v8i16 (NEONvduplane (v8i16 QPR:$src3),
4575 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
4577 (v4i16 (EXTRACT_SUBREG
4579 (DSubReg_i16_reg imm:$lane))),
4580 (SubReg_i16_lane imm:$lane)))>;
4581 def : Pat<(v4i32 (int_arm_neon_vqsubs
4583 (v4i32 (int_arm_neon_vqrdmulh
4585 (v4i32 (NEONvduplane (v4i32 QPR:$src3),
4587 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
4589 (v2i32 (EXTRACT_SUBREG
4591 (DSubReg_i32_reg imm:$lane))),
4592 (SubReg_i32_lane imm:$lane)))>;
4594 // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
4595 defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4596 "vqdmlal", "s", null_frag>;
4597 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
4599 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
4600 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4601 (v4i16 DPR:$Vm))))),
4602 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4603 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
4604 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4605 (v2i32 DPR:$Vm))))),
4606 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4607 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
4608 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4609 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4611 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4612 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
4613 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4614 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4616 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4618 // VMLS : Vector Multiply Subtract (integer and floating-point)
4619 defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4620 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4621 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
4622 v2f32, fmul_su, fsub_mlx>,
4623 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4624 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
4625 v4f32, fmul_su, fsub_mlx>,
4626 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4627 def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
4629 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
4630 def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
4632 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
4633 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
4634 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4635 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
4636 v2f32, fmul_su, fsub_mlx>,
4637 Requires<[HasNEON, UseFPVMLx]>;
4638 def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
4639 v4f32, v2f32, fmul_su, fsub_mlx>,
4640 Requires<[HasNEON, UseFPVMLx]>;
4641 def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
4643 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4644 def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
4645 v8f16, v4f16, fmul, fsub>,
4646 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4648 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
4649 (mul (v8i16 QPR:$src2),
4650 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
4651 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4652 (v4i16 (EXTRACT_SUBREG QPR:$src3,
4653 (DSubReg_i16_reg imm:$lane))),
4654 (SubReg_i16_lane imm:$lane)))>;
4656 def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
4657 (mul (v4i32 QPR:$src2),
4658 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
4659 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4660 (v2i32 (EXTRACT_SUBREG QPR:$src3,
4661 (DSubReg_i32_reg imm:$lane))),
4662 (SubReg_i32_lane imm:$lane)))>;
4664 def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
4665 (fmul_su (v4f32 QPR:$src2),
4666 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
4667 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
4668 (v2f32 (EXTRACT_SUBREG QPR:$src3,
4669 (DSubReg_i32_reg imm:$lane))),
4670 (SubReg_i32_lane imm:$lane)))>,
4671 Requires<[HasNEON, UseFPVMLx]>;
4673 // VMLSL : Vector Multiply Subtract Long (Q -= D * D)
4674 defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4675 "vmlsl", "s", NEONvmulls, sub>;
4676 defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4677 "vmlsl", "u", NEONvmullu, sub>;
4679 defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
4680 defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
4682 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
4683 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
4684 "vqdmlsl", "s", null_frag>;
4685 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
4687 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
4688 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4689 (v4i16 DPR:$Vm))))),
4690 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4691 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
4692 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4693 (v2i32 DPR:$Vm))))),
4694 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4695 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
4696 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4697 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4699 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4700 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
4701 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4702 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4704 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4706 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4707 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
4708 v2f32, fmul_su, fadd_mlx>,
4709 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4711 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
4712 v4f32, fmul_su, fadd_mlx>,
4713 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4714 def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
4716 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4718 def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
4720 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4722 // Fused Vector Multiply Subtract (floating-point)
4723 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
4724 v2f32, fmul_su, fsub_mlx>,
4725 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4726 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
4727 v4f32, fmul_su, fsub_mlx>,
4728 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4729 def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
4731 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4732 def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
4734 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4736 // Match @llvm.fma.* intrinsics
4737 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4738 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4739 Requires<[HasVFP4]>;
4740 def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4741 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4742 Requires<[HasVFP4]>;
4743 def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
4744 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4745 Requires<[HasVFP4]>;
4746 def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
4747 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4748 Requires<[HasVFP4]>;
4750 // ARMv8.2a dot product instructions.
4751 // We put them in the VFPV8 decoder namespace because the ARM and Thumb
4752 // encodings are the same and thus no further bit twiddling is necessary
4753 // in the disassembler.
4754 class VDOT<bit op6, bit op4, RegisterClass RegTy, string Asm, string AsmTy,
4755 ValueType AccumTy, ValueType InputTy,
4756 SDPatternOperator OpNode> :
4757 N3Vnp<0b11000, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
4758 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD,
4760 [(set (AccumTy RegTy:$dst),
4761 (OpNode (AccumTy RegTy:$Vd),
4762 (InputTy RegTy:$Vn),
4763 (InputTy RegTy:$Vm)))]> {
4764 let Predicates = [HasDotProd];
4765 let DecoderNamespace = "VFPV8";
4766 let Constraints = "$dst = $Vd";
4769 def VUDOTD : VDOT<0, 1, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>;
4770 def VSDOTD : VDOT<0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>;
4771 def VUDOTQ : VDOT<1, 1, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
4772 def VSDOTQ : VDOT<1, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
4774 // Indexed dot product instructions:
4775 multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
4776 ValueType AccumType, ValueType InputType, SDPatternOperator OpNode,
4778 def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst),
4779 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
4780 N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
4783 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
4784 let Constraints = "$dst = $Vd";
4785 let Predicates = [HasDotProd];
4786 let DecoderNamespace = "VFPV8";
4790 (AccumType (OpNode (AccumType Ty:$Vd),
4792 (InputType (bitconvert (AccumType
4793 (NEONvduplane (AccumType Ty:$Vm),
4794 VectorIndex32:$lane)))))),
4795 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
4798 defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8,
4799 int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>;
4800 defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8,
4801 int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>;
4802 defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8,
4803 int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4804 defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8,
4805 int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4808 // ARMv8.3 complex operations
4809 class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,
4810 InstrItinClass itin, dag oops, dag iops,
4811 string opc, string dt, list<dag> pattern>
4812 : N3VCP8<{?,?}, {op21,s}, q, op4, oops,
4813 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{
4815 let Inst{24-23} = rot;
4818 class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q,
4819 InstrItinClass itin, dag oops, dag iops, string opc,
4820 string dt, list<dag> pattern>
4821 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops,
4822 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> {
4827 class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin,
4828 dag oops, dag iops, string opc, string dt,
4830 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4831 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4835 let Inst{21-20} = rot;
4839 class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
4840 dag oops, dag iops, string opc, string dt,
4842 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4843 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4847 let Inst{21-20} = rot;
4848 let Inst{5} = Vm{4};
4849 // This is needed because the lane operand does not have any bits in the
4850 // encoding (it only has one possible value), so we need to manually set it
4851 // to it's default value.
4852 let DecoderMethod = "DecodeNEONComplexLane64Instruction";
4855 multiclass N3VCP8ComplexTied<bit op21, bit op4,
4856 string OpcodeStr, SDPatternOperator Op> {
4857 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4858 def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
4859 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4860 OpcodeStr, "f16", []>;
4861 def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd),
4862 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4863 OpcodeStr, "f16", []>;
4865 let Predicates = [HasNEON,HasV8_3a] in {
4866 def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd),
4867 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4868 OpcodeStr, "f32", []>;
4869 def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd),
4870 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4871 OpcodeStr, "f32", []>;
4875 multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
4876 string OpcodeStr, SDPatternOperator Op> {
4877 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4878 def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
4880 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4881 OpcodeStr, "f16", []>;
4882 def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ,
4884 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4885 OpcodeStr, "f16", []>;
4887 let Predicates = [HasNEON,HasV8_3a] in {
4888 def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD,
4890 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4891 OpcodeStr, "f32", []>;
4892 def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ,
4894 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4895 OpcodeStr, "f32", []>;
4899 // These instructions index by pairs of lanes, so the VectorIndexes are twice
4900 // as wide as the data types.
4901 multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
4902 SDPatternOperator Op> {
4903 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4904 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
4906 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4907 VectorIndex32:$lane, complexrotateop:$rot),
4908 OpcodeStr, "f16", []>;
4909 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ,
4911 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm,
4912 VectorIndex32:$lane, complexrotateop:$rot),
4913 OpcodeStr, "f16", []>;
4915 let Predicates = [HasNEON,HasV8_3a] in {
4916 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD,
4918 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
4919 complexrotateop:$rot),
4920 OpcodeStr, "f32", []>;
4921 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ,
4923 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
4924 complexrotateop:$rot),
4925 OpcodeStr, "f32", []>;
4929 defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
4930 defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
4931 defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
4933 // Vector Subtract Operations.
4935 // VSUB : Vector Subtract (integer and floating-point)
4936 defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
4937 "vsub", "i", sub, 0>;
4938 def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
4939 v2f32, v2f32, fsub, 0>;
4940 def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
4941 v4f32, v4f32, fsub, 0>;
4942 def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
4943 v4f16, v4f16, fsub, 0>,
4944 Requires<[HasNEON,HasFullFP16]>;
4945 def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
4946 v8f16, v8f16, fsub, 0>,
4947 Requires<[HasNEON,HasFullFP16]>;
4948 // VSUBL : Vector Subtract Long (Q = D - D)
4949 defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4950 "vsubl", "s", sub, sext, 0>;
4951 defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4952 "vsubl", "u", sub, zext, 0>;
4953 // VSUBW : Vector Subtract Wide (Q = Q - D)
4954 defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
4955 defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
4956 // VHSUB : Vector Halving Subtract
4957 defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
4958 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4959 "vhsub", "s", int_arm_neon_vhsubs, 0>;
4960 defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
4961 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4962 "vhsub", "u", int_arm_neon_vhsubu, 0>;
4963 // VQSUB : Vector Saturing Subtract
4964 defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
4965 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4966 "vqsub", "s", int_arm_neon_vqsubs, 0>;
4967 defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
4968 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4969 "vqsub", "u", int_arm_neon_vqsubu, 0>;
4970 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
4971 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
4972 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
4973 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
4974 int_arm_neon_vrsubhn, 0>;
4976 def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4977 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
4978 def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4979 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
4980 def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4981 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
4983 // Vector Comparisons.
4985 // VCEQ : Vector Compare Equal
4986 defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4987 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
4988 def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
4990 def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
4992 def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
4994 Requires<[HasNEON, HasFullFP16]>;
4995 def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
4997 Requires<[HasNEON, HasFullFP16]>;
4999 let TwoOperandAliasConstraint = "$Vm = $Vd" in
5000 defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
5001 "$Vd, $Vm, #0", NEONvceqz>;
5003 // VCGE : Vector Compare Greater Than or Equal
5004 defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5005 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
5006 defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5007 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
5008 def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
5010 def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
5012 def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
5014 Requires<[HasNEON, HasFullFP16]>;
5015 def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
5017 Requires<[HasNEON, HasFullFP16]>;
5019 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5020 defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
5021 "$Vd, $Vm, #0", NEONvcgez>;
5022 defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
5023 "$Vd, $Vm, #0", NEONvclez>;
5026 // VCGT : Vector Compare Greater Than
5027 defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5028 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
5029 defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5030 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
5031 def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
5033 def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
5035 def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
5037 Requires<[HasNEON, HasFullFP16]>;
5038 def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
5040 Requires<[HasNEON, HasFullFP16]>;
5042 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5043 defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
5044 "$Vd, $Vm, #0", NEONvcgtz>;
5045 defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
5046 "$Vd, $Vm, #0", NEONvcltz>;
5049 // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
5050 def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5051 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
5052 def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5053 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
5054 def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5055 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
5056 Requires<[HasNEON, HasFullFP16]>;
5057 def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5058 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
5059 Requires<[HasNEON, HasFullFP16]>;
5060 // VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
5061 def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5062 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
5063 def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5064 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
5065 def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5066 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
5067 Requires<[HasNEON, HasFullFP16]>;
5068 def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5069 "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>,
5070 Requires<[HasNEON, HasFullFP16]>;
5071 // VTST : Vector Test Bits
5072 defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
5073 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
5075 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5076 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5077 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5078 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5079 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5080 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5081 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5082 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5083 let Predicates = [HasNEON, HasFullFP16] in {
5084 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5085 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5086 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5087 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5088 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5089 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5090 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5091 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5094 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5095 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5096 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5097 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5098 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5099 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5100 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5101 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5102 let Predicates = [HasNEON, HasFullFP16] in {
5103 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5104 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5105 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5106 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5107 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5108 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5109 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5110 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5113 // Vector Bitwise Operations.
5115 def vnotd : PatFrag<(ops node:$in),
5116 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
5117 def vnotq : PatFrag<(ops node:$in),
5118 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
5121 // VAND : Vector Bitwise AND
5122 def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
5123 v2i32, v2i32, and, 1>;
5124 def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
5125 v4i32, v4i32, and, 1>;
5127 // VEOR : Vector Bitwise Exclusive OR
5128 def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
5129 v2i32, v2i32, xor, 1>;
5130 def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
5131 v4i32, v4i32, xor, 1>;
5133 // VORR : Vector Bitwise OR
5134 def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
5135 v2i32, v2i32, or, 1>;
5136 def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
5137 v4i32, v4i32, or, 1>;
5139 def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
5140 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5142 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5144 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
5145 let Inst{9} = SIMM{9};
5148 def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
5149 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5151 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5153 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
5154 let Inst{10-9} = SIMM{10-9};
5157 def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
5158 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5160 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5162 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
5163 let Inst{9} = SIMM{9};
5166 def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
5167 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5169 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5171 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
5172 let Inst{10-9} = SIMM{10-9};
5176 // VBIC : Vector Bitwise Bit Clear (AND NOT)
5177 let TwoOperandAliasConstraint = "$Vn = $Vd" in {
5178 def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5179 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5180 "vbic", "$Vd, $Vn, $Vm", "",
5181 [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
5182 (vnotd DPR:$Vm))))]>;
5183 def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5184 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5185 "vbic", "$Vd, $Vn, $Vm", "",
5186 [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
5187 (vnotq QPR:$Vm))))]>;
5190 def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
5191 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5193 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5195 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
5196 let Inst{9} = SIMM{9};
5199 def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
5200 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5202 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5204 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
5205 let Inst{10-9} = SIMM{10-9};
5208 def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
5209 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5211 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5213 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
5214 let Inst{9} = SIMM{9};
5217 def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
5218 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5220 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5222 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
5223 let Inst{10-9} = SIMM{10-9};
5226 // VORN : Vector Bitwise OR NOT
5227 def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
5228 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5229 "vorn", "$Vd, $Vn, $Vm", "",
5230 [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
5231 (vnotd DPR:$Vm))))]>;
5232 def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
5233 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5234 "vorn", "$Vd, $Vn, $Vm", "",
5235 [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
5236 (vnotq QPR:$Vm))))]>;
5238 // VMVN : Vector Bitwise NOT (Immediate)
5240 let isReMaterializable = 1 in {
5242 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
5243 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5244 "vmvn", "i16", "$Vd, $SIMM", "",
5245 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
5246 let Inst{9} = SIMM{9};
5249 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
5250 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5251 "vmvn", "i16", "$Vd, $SIMM", "",
5252 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
5253 let Inst{9} = SIMM{9};
5256 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
5257 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5258 "vmvn", "i32", "$Vd, $SIMM", "",
5259 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
5260 let Inst{11-8} = SIMM{11-8};
5263 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
5264 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5265 "vmvn", "i32", "$Vd, $SIMM", "",
5266 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
5267 let Inst{11-8} = SIMM{11-8};
5271 // VMVN : Vector Bitwise NOT
5272 def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
5273 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
5274 "vmvn", "$Vd, $Vm", "",
5275 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
5276 def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
5277 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
5278 "vmvn", "$Vd, $Vm", "",
5279 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
5280 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
5281 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
5283 // VBSL : Vector Bitwise Select
5284 def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5285 (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5286 N3RegFrm, IIC_VCNTiD,
5287 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5289 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
5290 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
5291 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
5292 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5293 Requires<[HasNEON]>;
5294 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
5295 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
5296 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5297 Requires<[HasNEON]>;
5298 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
5299 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
5300 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5301 Requires<[HasNEON]>;
5302 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
5303 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
5304 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5305 Requires<[HasNEON]>;
5306 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
5307 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
5308 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5309 Requires<[HasNEON]>;
5311 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
5312 (and DPR:$Vm, (vnotd DPR:$Vd)))),
5313 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
5314 Requires<[HasNEON]>;
5316 def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
5317 (and DPR:$Vm, (vnotd DPR:$Vd)))),
5318 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
5319 Requires<[HasNEON]>;
5321 def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5322 (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5323 N3RegFrm, IIC_VCNTiQ,
5324 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5326 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
5328 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
5329 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
5330 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5331 Requires<[HasNEON]>;
5332 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
5333 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
5334 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5335 Requires<[HasNEON]>;
5336 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
5337 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
5338 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5339 Requires<[HasNEON]>;
5340 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
5341 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
5342 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5343 Requires<[HasNEON]>;
5344 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
5345 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
5346 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5347 Requires<[HasNEON]>;
5349 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
5350 (and QPR:$Vm, (vnotq QPR:$Vd)))),
5351 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
5352 Requires<[HasNEON]>;
5353 def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
5354 (and QPR:$Vm, (vnotq QPR:$Vd)))),
5355 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
5356 Requires<[HasNEON]>;
5358 // VBIF : Vector Bitwise Insert if False
5359 // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
5360 // FIXME: This instruction's encoding MAY NOT BE correct.
5361 def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
5362 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5363 N3RegFrm, IIC_VBINiD,
5364 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5366 def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
5367 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5368 N3RegFrm, IIC_VBINiQ,
5369 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5372 // VBIT : Vector Bitwise Insert if True
5373 // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
5374 // FIXME: This instruction's encoding MAY NOT BE correct.
5375 def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
5376 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5377 N3RegFrm, IIC_VBINiD,
5378 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5380 def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
5381 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5382 N3RegFrm, IIC_VBINiQ,
5383 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5386 // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
5387 // for equivalent operations with different register constraints; it just
5390 // Vector Absolute Differences.
5392 // VABD : Vector Absolute Difference
5393 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
5394 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5395 "vabd", "s", int_arm_neon_vabds, 1>;
5396 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
5397 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5398 "vabd", "u", int_arm_neon_vabdu, 1>;
5399 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
5400 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
5401 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5402 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
5403 def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
5404 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
5405 Requires<[HasNEON, HasFullFP16]>;
5406 def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5407 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
5408 Requires<[HasNEON, HasFullFP16]>;
5410 // VABDL : Vector Absolute Difference Long (Q = | D - D |)
5411 defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
5412 "vabdl", "s", int_arm_neon_vabds, zext, 1>;
5413 defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
5414 "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
5416 def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
5417 (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
5418 def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
5419 (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
5421 // ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
5422 // shift/xor pattern for ABS.
5425 PatFrag<(ops node:$in1, node:$in2, node:$shift),
5426 (NEONvshrs (sub (zext node:$in1),
5427 (zext node:$in2)), (i32 $shift))>;
5429 def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
5430 (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
5431 (zext (v2i32 DPR:$opB))),
5432 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
5433 (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
5435 // VABA : Vector Absolute Difference and Accumulate
5436 defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5437 "vaba", "s", int_arm_neon_vabds, add>;
5438 defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5439 "vaba", "u", int_arm_neon_vabdu, add>;
5441 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
5442 defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
5443 "vabal", "s", int_arm_neon_vabds, zext, add>;
5444 defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
5445 "vabal", "u", int_arm_neon_vabdu, zext, add>;
5447 // Vector Maximum and Minimum.
5449 // VMAX : Vector Maximum
5450 defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
5451 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5452 "vmax", "s", smax, 1>;
5453 defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
5454 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5455 "vmax", "u", umax, 1>;
5456 def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
5458 v2f32, v2f32, fmaxnan, 1>;
5459 def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5461 v4f32, v4f32, fmaxnan, 1>;
5462 def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
5464 v4f16, v4f16, fmaxnan, 1>,
5465 Requires<[HasNEON, HasFullFP16]>;
5466 def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5468 v8f16, v8f16, fmaxnan, 1>,
5469 Requires<[HasNEON, HasFullFP16]>;
5472 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5473 def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
5474 N3RegFrm, NoItinerary, "vmaxnm", "f32",
5475 v2f32, v2f32, fmaxnum, 1>,
5476 Requires<[HasV8, HasNEON]>;
5477 def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
5478 N3RegFrm, NoItinerary, "vmaxnm", "f32",
5479 v4f32, v4f32, fmaxnum, 1>,
5480 Requires<[HasV8, HasNEON]>;
5481 def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
5482 N3RegFrm, NoItinerary, "vmaxnm", "f16",
5483 v4f16, v4f16, fmaxnum, 1>,
5484 Requires<[HasV8, HasNEON, HasFullFP16]>;
5485 def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
5486 N3RegFrm, NoItinerary, "vmaxnm", "f16",
5487 v8f16, v8f16, fmaxnum, 1>,
5488 Requires<[HasV8, HasNEON, HasFullFP16]>;
5491 // VMIN : Vector Minimum
5492 defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
5493 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5494 "vmin", "s", smin, 1>;
5495 defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
5496 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5497 "vmin", "u", umin, 1>;
5498 def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
5500 v2f32, v2f32, fminnan, 1>;
5501 def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5503 v4f32, v4f32, fminnan, 1>;
5504 def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
5506 v4f16, v4f16, fminnan, 1>,
5507 Requires<[HasNEON, HasFullFP16]>;
5508 def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5510 v8f16, v8f16, fminnan, 1>,
5511 Requires<[HasNEON, HasFullFP16]>;
5514 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5515 def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
5516 N3RegFrm, NoItinerary, "vminnm", "f32",
5517 v2f32, v2f32, fminnum, 1>,
5518 Requires<[HasV8, HasNEON]>;
5519 def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
5520 N3RegFrm, NoItinerary, "vminnm", "f32",
5521 v4f32, v4f32, fminnum, 1>,
5522 Requires<[HasV8, HasNEON]>;
5523 def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
5524 N3RegFrm, NoItinerary, "vminnm", "f16",
5525 v4f16, v4f16, fminnum, 1>,
5526 Requires<[HasV8, HasNEON, HasFullFP16]>;
5527 def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
5528 N3RegFrm, NoItinerary, "vminnm", "f16",
5529 v8f16, v8f16, fminnum, 1>,
5530 Requires<[HasV8, HasNEON, HasFullFP16]>;
5533 // Vector Pairwise Operations.
5535 // VPADD : Vector Pairwise Add
5536 def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5538 v8i8, v8i8, int_arm_neon_vpadd, 0>;
5539 def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5541 v4i16, v4i16, int_arm_neon_vpadd, 0>;
5542 def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5544 v2i32, v2i32, int_arm_neon_vpadd, 0>;
5545 def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
5546 IIC_VPBIND, "vpadd", "f32",
5547 v2f32, v2f32, int_arm_neon_vpadd, 0>;
5548 def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
5549 IIC_VPBIND, "vpadd", "f16",
5550 v4f16, v4f16, int_arm_neon_vpadd, 0>,
5551 Requires<[HasNEON, HasFullFP16]>;
5553 // VPADDL : Vector Pairwise Add Long
5554 defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
5555 int_arm_neon_vpaddls>;
5556 defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
5557 int_arm_neon_vpaddlu>;
5559 // VPADAL : Vector Pairwise Add and Accumulate Long
5560 defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
5561 int_arm_neon_vpadals>;
5562 defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
5563 int_arm_neon_vpadalu>;
5565 // VPMAX : Vector Pairwise Maximum
5566 def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5567 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
5568 def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5569 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
5570 def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5571 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
5572 def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5573 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
5574 def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5575 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
5576 def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5577 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
5578 def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5579 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
5580 def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5581 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
5582 Requires<[HasNEON, HasFullFP16]>;
5584 // VPMIN : Vector Pairwise Minimum
5585 def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5586 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
5587 def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5588 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
5589 def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5590 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
5591 def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5592 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
5593 def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5594 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
5595 def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5596 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
5597 def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5598 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
5599 def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5600 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
5601 Requires<[HasNEON, HasFullFP16]>;
5603 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
5605 // VRECPE : Vector Reciprocal Estimate
5606 def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5607 IIC_VUNAD, "vrecpe", "u32",
5608 v2i32, v2i32, int_arm_neon_vrecpe>;
5609 def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5610 IIC_VUNAQ, "vrecpe", "u32",
5611 v4i32, v4i32, int_arm_neon_vrecpe>;
5612 def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5613 IIC_VUNAD, "vrecpe", "f32",
5614 v2f32, v2f32, int_arm_neon_vrecpe>;
5615 def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5616 IIC_VUNAQ, "vrecpe", "f32",
5617 v4f32, v4f32, int_arm_neon_vrecpe>;
5618 def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5619 IIC_VUNAD, "vrecpe", "f16",
5620 v4f16, v4f16, int_arm_neon_vrecpe>,
5621 Requires<[HasNEON, HasFullFP16]>;
5622 def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5623 IIC_VUNAQ, "vrecpe", "f16",
5624 v8f16, v8f16, int_arm_neon_vrecpe>,
5625 Requires<[HasNEON, HasFullFP16]>;
5627 // VRECPS : Vector Reciprocal Step
5628 def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5629 IIC_VRECSD, "vrecps", "f32",
5630 v2f32, v2f32, int_arm_neon_vrecps, 1>;
5631 def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5632 IIC_VRECSQ, "vrecps", "f32",
5633 v4f32, v4f32, int_arm_neon_vrecps, 1>;
5634 def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5635 IIC_VRECSD, "vrecps", "f16",
5636 v4f16, v4f16, int_arm_neon_vrecps, 1>,
5637 Requires<[HasNEON, HasFullFP16]>;
5638 def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5639 IIC_VRECSQ, "vrecps", "f16",
5640 v8f16, v8f16, int_arm_neon_vrecps, 1>,
5641 Requires<[HasNEON, HasFullFP16]>;
5643 // VRSQRTE : Vector Reciprocal Square Root Estimate
5644 def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5645 IIC_VUNAD, "vrsqrte", "u32",
5646 v2i32, v2i32, int_arm_neon_vrsqrte>;
5647 def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5648 IIC_VUNAQ, "vrsqrte", "u32",
5649 v4i32, v4i32, int_arm_neon_vrsqrte>;
5650 def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5651 IIC_VUNAD, "vrsqrte", "f32",
5652 v2f32, v2f32, int_arm_neon_vrsqrte>;
5653 def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5654 IIC_VUNAQ, "vrsqrte", "f32",
5655 v4f32, v4f32, int_arm_neon_vrsqrte>;
5656 def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5657 IIC_VUNAD, "vrsqrte", "f16",
5658 v4f16, v4f16, int_arm_neon_vrsqrte>,
5659 Requires<[HasNEON, HasFullFP16]>;
5660 def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5661 IIC_VUNAQ, "vrsqrte", "f16",
5662 v8f16, v8f16, int_arm_neon_vrsqrte>,
5663 Requires<[HasNEON, HasFullFP16]>;
5665 // VRSQRTS : Vector Reciprocal Square Root Step
5666 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5667 IIC_VRECSD, "vrsqrts", "f32",
5668 v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
5669 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5670 IIC_VRECSQ, "vrsqrts", "f32",
5671 v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
5672 def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5673 IIC_VRECSD, "vrsqrts", "f16",
5674 v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
5675 Requires<[HasNEON, HasFullFP16]>;
5676 def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5677 IIC_VRECSQ, "vrsqrts", "f16",
5678 v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
5679 Requires<[HasNEON, HasFullFP16]>;
5683 // VSHL : Vector Shift
5684 defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
5685 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5686 "vshl", "s", int_arm_neon_vshifts>;
5687 defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
5688 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5689 "vshl", "u", int_arm_neon_vshiftu>;
5691 // VSHL : Vector Shift Left (Immediate)
5692 defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
5694 // VSHR : Vector Shift Right (Immediate)
5695 defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
5697 defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
5700 // VSHLL : Vector Shift Left Long
5701 defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
5702 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>;
5703 defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
5704 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>;
5706 // VSHLL : Vector Shift Left Long (with maximum shift count)
5707 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
5708 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
5709 ValueType OpTy, Operand ImmTy>
5710 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
5711 ResTy, OpTy, ImmTy, null_frag> {
5712 let Inst{21-16} = op21_16;
5713 let DecoderMethod = "DecodeVSHLMaxInstruction";
5715 def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
5717 def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
5718 v4i32, v4i16, imm16>;
5719 def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
5720 v2i64, v2i32, imm32>;
5722 def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))),
5723 (VSHLLi8 DPR:$Rn, 8)>;
5724 def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))),
5725 (VSHLLi16 DPR:$Rn, 16)>;
5726 def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))),
5727 (VSHLLi32 DPR:$Rn, 32)>;
5728 def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))),
5729 (VSHLLi8 DPR:$Rn, 8)>;
5730 def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))),
5731 (VSHLLi16 DPR:$Rn, 16)>;
5732 def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))),
5733 (VSHLLi32 DPR:$Rn, 32)>;
5734 def : Pat<(v8i16 (NEONvshl (anyext (v8i8 DPR:$Rn)), (i32 8))),
5735 (VSHLLi8 DPR:$Rn, 8)>;
5736 def : Pat<(v4i32 (NEONvshl (anyext (v4i16 DPR:$Rn)), (i32 16))),
5737 (VSHLLi16 DPR:$Rn, 16)>;
5738 def : Pat<(v2i64 (NEONvshl (anyext (v2i32 DPR:$Rn)), (i32 32))),
5739 (VSHLLi32 DPR:$Rn, 32)>;
5741 // VSHRN : Vector Shift Right and Narrow
5742 defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
5743 PatFrag<(ops node:$Rn, node:$amt),
5744 (trunc (NEONvshrs node:$Rn, node:$amt))>>;
5746 def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))),
5747 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
5748 def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))),
5749 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
5750 def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))),
5751 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
5753 // VRSHL : Vector Rounding Shift
5754 defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
5755 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5756 "vrshl", "s", int_arm_neon_vrshifts>;
5757 defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
5758 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5759 "vrshl", "u", int_arm_neon_vrshiftu>;
5760 // VRSHR : Vector Rounding Shift Right
5761 defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
5763 defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
5766 // VRSHRN : Vector Rounding Shift Right and Narrow
5767 defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
5770 // VQSHL : Vector Saturating Shift
5771 defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
5772 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5773 "vqshl", "s", int_arm_neon_vqshifts>;
5774 defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
5775 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5776 "vqshl", "u", int_arm_neon_vqshiftu>;
5777 // VQSHL : Vector Saturating Shift Left (Immediate)
5778 defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
5779 defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
5781 // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
5782 defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
5784 // VQSHRN : Vector Saturating Shift Right and Narrow
5785 defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
5787 defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
5790 // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
5791 defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
5794 // VQRSHL : Vector Saturating Rounding Shift
5795 defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
5796 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5797 "vqrshl", "s", int_arm_neon_vqrshifts>;
5798 defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
5799 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5800 "vqrshl", "u", int_arm_neon_vqrshiftu>;
5802 // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
5803 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
5805 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
5808 // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
5809 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
5812 // VSRA : Vector Shift Right and Accumulate
5813 defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
5814 defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
5815 // VRSRA : Vector Rounding Shift Right and Accumulate
5816 defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
5817 defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
5819 // VSLI : Vector Shift Left and Insert
5820 defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
5822 // VSRI : Vector Shift Right and Insert
5823 defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
5825 // Vector Absolute and Saturating Absolute.
5827 // VABS : Vector Absolute Value
5828 defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
5829 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>;
5830 def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
5832 v2f32, v2f32, fabs>;
5833 def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
5835 v4f32, v4f32, fabs>;
5836 def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
5838 v4f16, v4f16, fabs>,
5839 Requires<[HasNEON, HasFullFP16]>;
5840 def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
5842 v8f16, v8f16, fabs>,
5843 Requires<[HasNEON, HasFullFP16]>;
5845 // VQABS : Vector Saturating Absolute Value
5846 defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
5847 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
5848 int_arm_neon_vqabs>;
5852 def vnegd : PatFrag<(ops node:$in),
5853 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
5854 def vnegq : PatFrag<(ops node:$in),
5855 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
5857 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
5858 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
5859 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
5860 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
5861 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
5862 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
5863 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
5864 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
5866 // VNEG : Vector Negate (integer)
5867 def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
5868 def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
5869 def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
5870 def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
5871 def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
5872 def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
5874 // VNEG : Vector Negate (floating-point)
5875 def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
5876 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
5877 "vneg", "f32", "$Vd, $Vm", "",
5878 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
5879 def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
5880 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
5881 "vneg", "f32", "$Vd, $Vm", "",
5882 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
5883 def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
5884 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
5885 "vneg", "f16", "$Vd, $Vm", "",
5886 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
5887 Requires<[HasNEON, HasFullFP16]>;
5888 def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
5889 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
5890 "vneg", "f16", "$Vd, $Vm", "",
5891 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
5892 Requires<[HasNEON, HasFullFP16]>;
5894 def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
5895 def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
5896 def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
5897 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
5898 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
5899 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
5901 // VQNEG : Vector Saturating Negate
5902 defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
5903 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
5904 int_arm_neon_vqneg>;
5906 // Vector Bit Counting Operations.
5908 // VCLS : Vector Count Leading Sign Bits
5909 defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
5910 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
5912 // VCLZ : Vector Count Leading Zeros
5913 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
5914 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
5916 // VCNT : Vector Count One Bits
5917 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
5918 IIC_VCNTiD, "vcnt", "8",
5920 def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
5921 IIC_VCNTiQ, "vcnt", "8",
5922 v16i8, v16i8, ctpop>;
5925 def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
5926 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
5927 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
5929 def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
5930 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
5931 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
5934 // Vector Move Operations.
5936 // VMOV : Vector Move (Register)
5937 def : NEONInstAlias<"vmov${p} $Vd, $Vm",
5938 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
5939 def : NEONInstAlias<"vmov${p} $Vd, $Vm",
5940 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
5942 // VMOV : Vector Move (Immediate)
5944 // Although VMOVs are not strictly speaking cheap, they are as expensive
5945 // as their copies counterpart (VORR), so we should prefer rematerialization
5946 // over splitting when it applies.
5947 let isReMaterializable = 1, isAsCheapAsAMove=1 in {
5948 def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
5949 (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
5950 "vmov", "i8", "$Vd, $SIMM", "",
5951 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
5952 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
5953 (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
5954 "vmov", "i8", "$Vd, $SIMM", "",
5955 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
5957 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
5958 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5959 "vmov", "i16", "$Vd, $SIMM", "",
5960 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
5961 let Inst{9} = SIMM{9};
5964 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
5965 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5966 "vmov", "i16", "$Vd, $SIMM", "",
5967 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
5968 let Inst{9} = SIMM{9};
5971 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
5972 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5973 "vmov", "i32", "$Vd, $SIMM", "",
5974 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
5975 let Inst{11-8} = SIMM{11-8};
5978 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
5979 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5980 "vmov", "i32", "$Vd, $SIMM", "",
5981 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
5982 let Inst{11-8} = SIMM{11-8};
5985 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
5986 (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
5987 "vmov", "i64", "$Vd, $SIMM", "",
5988 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
5989 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
5990 (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
5991 "vmov", "i64", "$Vd, $SIMM", "",
5992 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
5994 def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
5995 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
5996 "vmov", "f32", "$Vd, $SIMM", "",
5997 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
5998 def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
5999 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6000 "vmov", "f32", "$Vd, $SIMM", "",
6001 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
6002 } // isReMaterializable, isAsCheapAsAMove
6004 // Add support for bytes replication feature, so it could be GAS compatible.
6005 multiclass NEONImmReplicateI8InstAlias<ValueType To> {
6006 // E.g. instructions below:
6007 // "vmov.i32 d0, #0xffffffff"
6008 // "vmov.i32 d0, #0xabababab"
6009 // "vmov.i16 d0, #0xabab"
6010 // are incorrect, but we could deal with such cases.
6011 // For last two instructions, for example, it should emit:
6012 // "vmov.i8 d0, #0xab"
6013 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6014 (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6015 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6016 (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6017 // Also add same support for VMVN instructions. So instruction:
6018 // "vmvn.i32 d0, #0xabababab"
6020 // "vmov.i8 d0, #0x54"
6021 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6022 (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6023 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6024 (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6027 defm : NEONImmReplicateI8InstAlias<i16>;
6028 defm : NEONImmReplicateI8InstAlias<i32>;
6029 defm : NEONImmReplicateI8InstAlias<i64>;
6031 // Similar to above for types other than i8, e.g.:
6032 // "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00"
6033 // "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000"
6034 // In this case we do not canonicalize VMVN to VMOV
6035 multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16,
6036 NeonI NV8, NeonI NV16, ValueType To> {
6037 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6038 (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6039 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6040 (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6041 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6042 (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6043 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6044 (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6047 defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6048 VMVNv4i16, VMVNv8i16, i32>;
6049 defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6050 VMVNv4i16, VMVNv8i16, i64>;
6051 defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
6052 VMVNv2i32, VMVNv4i32, i64>;
6053 // TODO: add "VMOV <-> VMVN" conversion for cases like
6054 // "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55"
6055 // "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00"
6057 // On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
6058 // require zero cycles to execute so they should be used wherever possible for
6059 // setting a register to zero.
6061 // Even without these pseudo-insts we would probably end up with the correct
6062 // instruction, but we could not mark the general ones with "isAsCheapAsAMove"
6063 // since they are sometimes rather expensive (in general).
6065 let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
6066 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
6067 [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
6068 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
6070 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
6071 [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
6072 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
6076 // VMOV : Vector Get Lane (move scalar to ARM core register)
6078 def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
6079 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6080 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
6081 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
6083 let Inst{21} = lane{2};
6084 let Inst{6-5} = lane{1-0};
6086 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
6087 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6088 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
6089 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
6091 let Inst{21} = lane{1};
6092 let Inst{6} = lane{0};
6094 def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
6095 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6096 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
6097 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
6099 let Inst{21} = lane{2};
6100 let Inst{6-5} = lane{1-0};
6102 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
6103 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6104 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
6105 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
6107 let Inst{21} = lane{1};
6108 let Inst{6} = lane{0};
6110 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
6111 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
6112 IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
6113 [(set GPR:$R, (extractelt (v2i32 DPR:$V),
6115 Requires<[HasVFP2, HasFastVGETLNi32]> {
6116 let Inst{21} = lane{0};
6118 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
6119 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
6120 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6121 (DSubReg_i8_reg imm:$lane))),
6122 (SubReg_i8_lane imm:$lane))>;
6123 def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
6124 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6125 (DSubReg_i16_reg imm:$lane))),
6126 (SubReg_i16_lane imm:$lane))>;
6127 def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
6128 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6129 (DSubReg_i8_reg imm:$lane))),
6130 (SubReg_i8_lane imm:$lane))>;
6131 def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
6132 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6133 (DSubReg_i16_reg imm:$lane))),
6134 (SubReg_i16_lane imm:$lane))>;
6135 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6136 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
6137 (DSubReg_i32_reg imm:$lane))),
6138 (SubReg_i32_lane imm:$lane))>,
6139 Requires<[HasNEON, HasFastVGETLNi32]>;
6140 def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
6142 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6143 Requires<[HasNEON, HasSlowVGETLNi32]>;
6144 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6146 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6147 Requires<[HasNEON, HasSlowVGETLNi32]>;
6148 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
6149 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
6150 (SSubReg_f32_reg imm:$src2))>;
6151 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
6152 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
6153 (SSubReg_f32_reg imm:$src2))>;
6154 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
6155 // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6156 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
6157 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6160 // VMOV : Vector Set Lane (move ARM core register to scalar)
6162 let Constraints = "$src1 = $V" in {
6163 def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
6164 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
6165 IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
6166 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
6167 GPR:$R, imm:$lane))]> {
6168 let Inst{21} = lane{2};
6169 let Inst{6-5} = lane{1-0};
6171 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
6172 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
6173 IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
6174 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
6175 GPR:$R, imm:$lane))]> {
6176 let Inst{21} = lane{1};
6177 let Inst{6} = lane{0};
6179 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
6180 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
6181 IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
6182 [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
6183 GPR:$R, imm:$lane))]>,
6184 Requires<[HasVFP2]> {
6185 let Inst{21} = lane{0};
6186 // This instruction is equivalent as
6187 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
6188 let isInsertSubreg = 1;
6191 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
6192 (v16i8 (INSERT_SUBREG QPR:$src1,
6193 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
6194 (DSubReg_i8_reg imm:$lane))),
6195 GPR:$src2, (SubReg_i8_lane imm:$lane))),
6196 (DSubReg_i8_reg imm:$lane)))>;
6197 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
6198 (v8i16 (INSERT_SUBREG QPR:$src1,
6199 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
6200 (DSubReg_i16_reg imm:$lane))),
6201 GPR:$src2, (SubReg_i16_lane imm:$lane))),
6202 (DSubReg_i16_reg imm:$lane)))>;
6203 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
6204 (v4i32 (INSERT_SUBREG QPR:$src1,
6205 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
6206 (DSubReg_i32_reg imm:$lane))),
6207 GPR:$src2, (SubReg_i32_lane imm:$lane))),
6208 (DSubReg_i32_reg imm:$lane)))>;
6210 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
6211 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
6212 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6213 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
6214 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
6215 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6217 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6218 // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6219 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6220 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6222 def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
6223 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6224 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
6225 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
6226 def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
6227 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6229 def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
6230 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6231 def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
6232 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6233 def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
6234 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6236 def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
6237 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6238 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6240 def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
6241 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6242 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6244 def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
6245 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6246 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6249 // VDUP : Vector Duplicate (from ARM core register to all elements)
6251 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6252 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
6253 IIC_VMOVIS, "vdup", Dt, "$V, $R",
6254 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
6255 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6256 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
6257 IIC_VMOVIS, "vdup", Dt, "$V, $R",
6258 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
6260 def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
6261 def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
6262 def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
6263 Requires<[HasNEON, HasFastVDUP32]>;
6264 def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
6265 def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
6266 def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
6268 // NEONvdup patterns for uarchs with fast VDUP.32.
6269 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
6270 Requires<[HasNEON,HasFastVDUP32]>;
6271 def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
6273 // NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
6274 def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
6275 Requires<[HasNEON,HasSlowVDUP32]>;
6276 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
6277 Requires<[HasNEON,HasSlowVDUP32]>;
6279 // VDUP : Vector Duplicate Lane (from scalar to all elements)
6281 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
6282 ValueType Ty, Operand IdxTy>
6283 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6284 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
6285 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
6287 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
6288 ValueType ResTy, ValueType OpTy, Operand IdxTy>
6289 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6290 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
6291 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
6292 VectorIndex32:$lane)))]>;
6294 // Inst{19-16} is partially specified depending on the element size.
6296 def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
6298 let Inst{19-17} = lane{2-0};
6300 def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
6302 let Inst{19-18} = lane{1-0};
6304 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
6306 let Inst{19} = lane{0};
6308 def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
6310 let Inst{19-17} = lane{2-0};
6312 def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
6314 let Inst{19-18} = lane{1-0};
6316 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
6318 let Inst{19} = lane{0};
6321 def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
6322 (VDUPLN32d DPR:$Vm, imm:$lane)>;
6324 def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
6325 (VDUPLN32q DPR:$Vm, imm:$lane)>;
6327 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
6328 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
6329 (DSubReg_i8_reg imm:$lane))),
6330 (SubReg_i8_lane imm:$lane)))>;
6331 def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
6332 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
6333 (DSubReg_i16_reg imm:$lane))),
6334 (SubReg_i16_lane imm:$lane)))>;
6335 def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
6336 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
6337 (DSubReg_i32_reg imm:$lane))),
6338 (SubReg_i32_lane imm:$lane)))>;
6339 def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
6340 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
6341 (DSubReg_i32_reg imm:$lane))),
6342 (SubReg_i32_lane imm:$lane)))>;
6344 def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))),
6345 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6346 SPR:$src, ssub_0), (i32 0)))>;
6347 def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))),
6348 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6349 SPR:$src, ssub_0), (i32 0)))>;
6351 // VMOVN : Vector Narrowing Move
6352 defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
6353 "vmovn", "i", trunc>;
6354 // VQMOVN : Vector Saturating Narrowing Move
6355 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
6356 "vqmovn", "s", int_arm_neon_vqmovns>;
6357 defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
6358 "vqmovn", "u", int_arm_neon_vqmovnu>;
6359 defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
6360 "vqmovun", "s", int_arm_neon_vqmovnsu>;
6361 // VMOVL : Vector Lengthening Move
6362 defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
6363 defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
6364 def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
6365 def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
6366 def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
6368 // Vector Conversions.
6370 // VCVT : Vector Convert Between Floating-Point and Integers
6371 def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6372 v2i32, v2f32, fp_to_sint>;
6373 def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6374 v2i32, v2f32, fp_to_uint>;
6375 def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6376 v2f32, v2i32, sint_to_fp>;
6377 def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6378 v2f32, v2i32, uint_to_fp>;
6380 def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6381 v4i32, v4f32, fp_to_sint>;
6382 def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6383 v4i32, v4f32, fp_to_uint>;
6384 def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6385 v4f32, v4i32, sint_to_fp>;
6386 def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6387 v4f32, v4i32, uint_to_fp>;
6389 def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6390 v4i16, v4f16, fp_to_sint>,
6391 Requires<[HasNEON, HasFullFP16]>;
6392 def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6393 v4i16, v4f16, fp_to_uint>,
6394 Requires<[HasNEON, HasFullFP16]>;
6395 def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6396 v4f16, v4i16, sint_to_fp>,
6397 Requires<[HasNEON, HasFullFP16]>;
6398 def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6399 v4f16, v4i16, uint_to_fp>,
6400 Requires<[HasNEON, HasFullFP16]>;
6402 def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6403 v8i16, v8f16, fp_to_sint>,
6404 Requires<[HasNEON, HasFullFP16]>;
6405 def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6406 v8i16, v8f16, fp_to_uint>,
6407 Requires<[HasNEON, HasFullFP16]>;
6408 def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6409 v8f16, v8i16, sint_to_fp>,
6410 Requires<[HasNEON, HasFullFP16]>;
6411 def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6412 v8f16, v8i16, uint_to_fp>,
6413 Requires<[HasNEON, HasFullFP16]>;
6416 multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
6417 SDPatternOperator IntU> {
6418 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
6419 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6420 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
6421 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6422 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
6423 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6424 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
6425 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6426 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
6427 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6428 "s16.f16", v4i16, v4f16, IntS>,
6429 Requires<[HasV8, HasNEON, HasFullFP16]>;
6430 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6431 "s16.f16", v8i16, v8f16, IntS>,
6432 Requires<[HasV8, HasNEON, HasFullFP16]>;
6433 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6434 "u16.f16", v4i16, v4f16, IntU>,
6435 Requires<[HasV8, HasNEON, HasFullFP16]>;
6436 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6437 "u16.f16", v8i16, v8f16, IntU>,
6438 Requires<[HasV8, HasNEON, HasFullFP16]>;
6442 defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
6443 defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
6444 defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
6445 defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
6447 // VCVT : Vector Convert Between Floating-Point and Fixed-Point.
6448 let DecoderMethod = "DecodeVCVTD" in {
6449 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6450 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
6451 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6452 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
6453 def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6454 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
6455 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6456 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
6457 let Predicates = [HasNEON, HasFullFP16] in {
6458 def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6459 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
6460 def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6461 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
6462 def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6463 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
6464 def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6465 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
6466 } // Predicates = [HasNEON, HasFullFP16]
6469 let DecoderMethod = "DecodeVCVTQ" in {
6470 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6471 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
6472 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6473 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
6474 def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6475 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
6476 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6477 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
6478 let Predicates = [HasNEON, HasFullFP16] in {
6479 def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6480 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
6481 def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6482 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
6483 def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6484 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
6485 def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6486 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
6487 } // Predicates = [HasNEON, HasFullFP16]
6490 def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
6491 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6492 def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
6493 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6494 def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
6495 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6496 def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
6497 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6499 def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
6500 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6501 def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
6502 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6503 def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
6504 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6505 def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
6506 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6508 def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
6509 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6510 def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
6511 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6512 def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
6513 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6514 def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
6515 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6517 def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
6518 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6519 def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
6520 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6521 def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
6522 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6523 def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
6524 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6527 // VCVT : Vector Convert Between Half-Precision and Single-Precision.
6528 def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
6529 IIC_VUNAQ, "vcvt", "f16.f32",
6530 v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
6531 Requires<[HasNEON, HasFP16]>;
6532 def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
6533 IIC_VUNAQ, "vcvt", "f32.f16",
6534 v4f32, v4i16, int_arm_neon_vcvthf2fp>,
6535 Requires<[HasNEON, HasFP16]>;
6539 // VREV64 : Vector Reverse elements within 64-bit doublewords
6541 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6542 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
6543 (ins DPR:$Vm), IIC_VMOVD,
6544 OpcodeStr, Dt, "$Vd, $Vm", "",
6545 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
6546 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6547 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
6548 (ins QPR:$Vm), IIC_VMOVQ,
6549 OpcodeStr, Dt, "$Vd, $Vm", "",
6550 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
6552 def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
6553 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
6554 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
6555 def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
6557 def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
6558 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
6559 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
6560 def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
6562 // VREV32 : Vector Reverse elements within 32-bit words
6564 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6565 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
6566 (ins DPR:$Vm), IIC_VMOVD,
6567 OpcodeStr, Dt, "$Vd, $Vm", "",
6568 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
6569 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6570 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
6571 (ins QPR:$Vm), IIC_VMOVQ,
6572 OpcodeStr, Dt, "$Vd, $Vm", "",
6573 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
6575 def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
6576 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
6578 def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
6579 def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
6581 // VREV16 : Vector Reverse elements within 16-bit halfwords
6583 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6584 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
6585 (ins DPR:$Vm), IIC_VMOVD,
6586 OpcodeStr, Dt, "$Vd, $Vm", "",
6587 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
6588 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6589 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
6590 (ins QPR:$Vm), IIC_VMOVQ,
6591 OpcodeStr, Dt, "$Vd, $Vm", "",
6592 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
6594 def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
6595 def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
6597 // Other Vector Shuffles.
6599 // Aligned extractions: really just dropping registers
6601 class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
6602 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
6603 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
6605 def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
6607 def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
6609 def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
6611 def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
6613 def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
6616 // VEXT : Vector Extract
6619 // All of these have a two-operand InstAlias.
6620 let TwoOperandAliasConstraint = "$Vn = $Vd" in {
6621 class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6622 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
6623 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
6624 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6625 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
6626 (Ty DPR:$Vm), imm:$index)))]> {
6629 let Inst{10-8} = index{2-0};
6632 class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6633 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
6634 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
6635 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6636 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
6637 (Ty QPR:$Vm), imm:$index)))]> {
6639 let Inst{11-8} = index{3-0};
6643 def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
6644 let Inst{10-8} = index{2-0};
6646 def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
6647 let Inst{10-9} = index{1-0};
6650 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
6651 let Inst{10} = index{0};
6652 let Inst{9-8} = 0b00;
6654 def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
6657 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
6659 def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
6660 let Inst{11-8} = index{3-0};
6662 def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
6663 let Inst{11-9} = index{2-0};
6666 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
6667 let Inst{11-10} = index{1-0};
6668 let Inst{9-8} = 0b00;
6670 def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
6671 let Inst{11} = index{0};
6672 let Inst{10-8} = 0b000;
6674 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
6677 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
6679 // VTRN : Vector Transpose
6681 def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
6682 def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
6683 def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
6685 def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
6686 def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
6687 def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
6689 // VUZP : Vector Unzip (Deinterleave)
6691 def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
6692 def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
6693 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6694 def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
6695 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6697 def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
6698 def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
6699 def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
6701 // VZIP : Vector Zip (Interleave)
6703 def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
6704 def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
6705 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6706 def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
6707 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6709 def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
6710 def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
6711 def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
6713 // Vector Table Lookup and Table Extension.
6715 // VTBL : Vector Table Lookup
6716 let DecoderMethod = "DecodeTBLInstruction" in {
6718 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
6719 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
6720 "vtbl", "8", "$Vd, $Vn, $Vm", "",
6721 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
6723 let hasExtraSrcRegAllocReq = 1 in {
6725 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
6726 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
6727 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6729 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
6730 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
6731 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6733 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
6734 (ins VecListFourD:$Vn, DPR:$Vm),
6736 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6737 } // hasExtraSrcRegAllocReq = 1
6740 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
6742 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
6744 // VTBX : Vector Table Extension
6746 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
6747 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
6748 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
6749 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
6750 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
6751 let hasExtraSrcRegAllocReq = 1 in {
6753 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
6754 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
6755 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
6757 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
6758 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
6759 NVTBLFrm, IIC_VTBX3,
6760 "vtbx", "8", "$Vd, $Vn, $Vm",
6763 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
6764 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
6765 "vtbx", "8", "$Vd, $Vn, $Vm",
6767 } // hasExtraSrcRegAllocReq = 1
6770 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
6771 IIC_VTBX3, "$orig = $dst", []>;
6773 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
6774 IIC_VTBX4, "$orig = $dst", []>;
6775 } // DecoderMethod = "DecodeTBLInstruction"
6777 def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
6778 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
6781 def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
6783 (v8i8 (VTBX2 v8i8:$orig,
6784 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
6788 def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
6789 v8i8:$Vn2, v8i8:$Vm)),
6790 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6793 (v8i8 (IMPLICIT_DEF)), dsub_3),
6795 def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
6796 v8i8:$Vn2, v8i8:$Vm)),
6797 (v8i8 (VTBX3Pseudo v8i8:$orig,
6798 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6801 (v8i8 (IMPLICIT_DEF)), dsub_3),
6804 def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
6805 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
6806 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6811 def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
6812 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
6813 (v8i8 (VTBX4Pseudo v8i8:$orig,
6814 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6820 // VRINT : Vector Rounding
6821 multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
6822 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
6823 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
6824 !strconcat("vrint", op), "f32",
6825 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
6826 let Inst{9-7} = op9_7;
6828 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
6829 !strconcat("vrint", op), "f32",
6830 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
6831 let Inst{9-7} = op9_7;
6833 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
6834 !strconcat("vrint", op), "f16",
6836 Requires<[HasV8, HasNEON, HasFullFP16]> {
6837 let Inst{9-7} = op9_7;
6839 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
6840 !strconcat("vrint", op), "f16",
6842 Requires<[HasV8, HasNEON, HasFullFP16]> {
6843 let Inst{9-7} = op9_7;
6847 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
6848 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
6849 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
6850 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
6851 let Predicates = [HasNEON, HasFullFP16] in {
6852 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
6853 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
6854 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
6855 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
6859 defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
6860 defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
6861 defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
6862 defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
6863 defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
6864 defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
6866 // Cryptography instructions
6867 let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
6868 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
6869 class AES<string op, bit op7, bit op6, SDPatternOperator Int>
6870 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
6871 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
6872 Requires<[HasV8, HasCrypto]>;
6873 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
6874 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
6875 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
6876 Requires<[HasV8, HasCrypto]>;
6877 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
6878 SDPatternOperator Int>
6879 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
6880 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
6881 Requires<[HasV8, HasCrypto]>;
6882 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
6883 SDPatternOperator Int>
6884 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
6885 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
6886 Requires<[HasV8, HasCrypto]>;
6887 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
6888 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
6889 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
6890 Requires<[HasV8, HasCrypto]>;
6893 def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
6894 def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
6895 def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
6896 def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
6898 def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
6899 def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
6900 def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
6901 def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
6902 def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
6903 def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
6904 def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
6905 def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
6906 def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
6907 def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
6909 def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
6910 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
6911 (SHA1H (SUBREG_TO_REG (i64 0),
6912 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
6916 def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
6917 (SHA1C v4i32:$hash_abcd,
6918 (SUBREG_TO_REG (i64 0),
6919 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
6923 def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
6924 (SHA1M v4i32:$hash_abcd,
6925 (SUBREG_TO_REG (i64 0),
6926 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
6930 def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
6931 (SHA1P v4i32:$hash_abcd,
6932 (SUBREG_TO_REG (i64 0),
6933 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
6937 //===----------------------------------------------------------------------===//
6938 // NEON instructions for single-precision FP math
6939 //===----------------------------------------------------------------------===//
6941 class N2VSPat<SDNode OpNode, NeonI Inst>
6942 : NEONFPPat<(f32 (OpNode SPR:$a)),
6944 (v2f32 (COPY_TO_REGCLASS (Inst
6946 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6947 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
6949 class N3VSPat<SDNode OpNode, NeonI Inst>
6950 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
6952 (v2f32 (COPY_TO_REGCLASS (Inst
6954 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6957 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6958 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
6960 class N3VSPatFP16<SDNode OpNode, NeonI Inst>
6961 : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)),
6963 (v4f16 (COPY_TO_REGCLASS (Inst
6965 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
6968 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
6969 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
6971 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
6972 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
6974 (v2f32 (COPY_TO_REGCLASS (Inst
6976 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6979 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6982 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6983 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
6985 class NVCVTIFPat<SDNode OpNode, NeonI Inst>
6986 : NEONFPPat<(f32 (OpNode GPR:$a)),
6987 (f32 (EXTRACT_SUBREG
6990 (v2f32 (IMPLICIT_DEF)),
6991 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
6993 class NVCVTFIPat<SDNode OpNode, NeonI Inst>
6994 : NEONFPPat<(i32 (OpNode SPR:$a)),
6995 (i32 (EXTRACT_SUBREG
6996 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
7000 def : N3VSPat<fadd, VADDfd>;
7001 def : N3VSPat<fsub, VSUBfd>;
7002 def : N3VSPat<fmul, VMULfd>;
7003 def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
7004 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
7005 def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
7006 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
7007 def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
7008 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7009 def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
7010 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7011 def : N2VSPat<fabs, VABSfd>;
7012 def : N2VSPat<fneg, VNEGfd>;
7013 def : N3VSPatFP16<fmaxnan, VMAXhd>, Requires<[HasFullFP16]>;
7014 def : N3VSPatFP16<fminnan, VMINhd>, Requires<[HasFullFP16]>;
7015 def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>;
7016 def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>;
7017 def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
7018 def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
7019 def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
7020 def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
7022 // NEON doesn't have any f64 conversions, so provide patterns to make
7023 // sure the VFP conversions match when extracting from a vector.
7024 def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7025 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7026 def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7027 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7028 def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7029 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7030 def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7031 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7034 // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
7035 def : Pat<(f32 (bitconvert GPR:$a)),
7036 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7037 Requires<[HasNEON, DontUseVMOVSR]>;
7038 def : Pat<(arm_vmovsr GPR:$a),
7039 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7040 Requires<[HasNEON, DontUseVMOVSR]>;
7042 //===----------------------------------------------------------------------===//
7043 // Non-Instruction Patterns
7044 //===----------------------------------------------------------------------===//
7047 let Predicates = [IsLE] in {
7048 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
7049 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
7050 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
7052 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
7053 let Predicates = [IsLE] in {
7054 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
7055 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
7056 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
7057 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
7058 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
7060 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
7061 let Predicates = [IsLE] in {
7062 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
7063 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
7064 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
7065 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
7066 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
7067 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
7068 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
7069 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
7070 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
7071 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
7073 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
7074 let Predicates = [IsLE] in {
7075 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
7076 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
7077 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>;
7078 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
7079 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
7080 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
7081 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>;
7082 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
7084 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
7085 let Predicates = [IsLE] in {
7086 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
7087 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
7090 let Predicates = [IsLE] in {
7091 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
7092 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
7093 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
7095 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
7096 let Predicates = [IsLE] in {
7097 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
7098 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
7099 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
7100 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
7101 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
7103 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
7104 let Predicates = [IsLE] in {
7105 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
7106 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
7107 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
7108 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
7109 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
7110 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
7111 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
7112 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
7113 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
7114 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
7115 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
7116 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
7118 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
7119 let Predicates = [IsLE] in {
7120 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
7121 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
7122 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
7124 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
7125 let Predicates = [IsLE] in {
7126 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
7127 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
7128 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
7129 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
7130 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
7133 let Predicates = [IsBE] in {
7134 // 64 bit conversions
7135 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7136 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7137 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
7138 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7139 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7140 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7141 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
7142 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
7143 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7144 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7145 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
7146 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
7147 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7148 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
7149 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
7150 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
7151 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
7152 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
7153 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7154 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
7155 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7156 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
7157 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7158 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
7159 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7160 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7161 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
7163 // 128 bit conversions
7164 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7165 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7166 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
7167 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7168 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7169 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7170 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
7171 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7172 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7173 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7174 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
7175 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7176 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7177 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7178 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
7179 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
7180 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
7181 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
7182 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
7183 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7184 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7185 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
7186 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
7187 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7188 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7189 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7190 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
7191 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
7192 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7195 // Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
7196 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
7197 (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>;
7198 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7199 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>;
7200 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
7201 (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>;
7202 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7203 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>;
7205 // Fold extracting an element out of a v2i32 into a vfp register.
7206 def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
7207 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7209 // Vector lengthening move with load, matching extending loads.
7211 // extload, zextload and sextload for a standard lengthening load. Example:
7212 // Lengthen_Single<"8", "i16", "8"> =
7213 // Pat<(v8i16 (extloadvi8 addrmode6:$addr))
7214 // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
7215 // (f64 (IMPLICIT_DEF)), (i32 0)))>;
7216 multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
7217 let AddedComplexity = 10 in {
7218 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7219 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
7220 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7221 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
7223 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7224 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
7225 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7226 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
7228 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7229 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
7230 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
7231 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
7235 // extload, zextload and sextload for a lengthening load which only uses
7236 // half the lanes available. Example:
7237 // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
7238 // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
7239 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7240 // (f64 (IMPLICIT_DEF)), (i32 0))),
7242 multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
7243 string InsnLanes, string InsnTy> {
7244 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7245 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7246 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7247 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7249 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7250 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7251 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7252 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7254 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7255 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7256 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7257 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7261 // The following class definition is basically a copy of the
7262 // Lengthen_HalfSingle definition above, however with an additional parameter
7263 // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7264 // data loaded by VLD1LN into proper vector format in big endian mode.
7265 multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7266 string InsnLanes, string InsnTy, string RevLanes> {
7267 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7268 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7269 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7270 (!cast<Instruction>("VREV32d" # RevLanes)
7271 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7273 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7274 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7275 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7276 (!cast<Instruction>("VREV32d" # RevLanes)
7277 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7279 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7280 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7281 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7282 (!cast<Instruction>("VREV32d" # RevLanes)
7283 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7287 // extload, zextload and sextload for a lengthening load followed by another
7288 // lengthening load, to quadruple the initial length.
7290 // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
7291 // Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
7292 // (EXTRACT_SUBREG (VMOVLuv4i32
7293 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7294 // (f64 (IMPLICIT_DEF)),
7298 multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
7299 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7301 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7302 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7303 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7304 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7305 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7307 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7308 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7309 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7310 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7311 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7313 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7314 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7315 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7316 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7317 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7321 // The following class definition is basically a copy of the
7322 // Lengthen_Double definition above, however with an additional parameter
7323 // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7324 // data loaded by VLD1LN into proper vector format in big endian mode.
7325 multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7326 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7327 string Insn2Ty, string RevLanes> {
7328 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7329 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7330 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7331 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7332 (!cast<Instruction>("VREV32d" # RevLanes)
7333 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7335 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7336 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7337 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7338 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7339 (!cast<Instruction>("VREV32d" # RevLanes)
7340 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7342 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7343 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7344 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7345 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7346 (!cast<Instruction>("VREV32d" # RevLanes)
7347 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7351 // extload, zextload and sextload for a lengthening load followed by another
7352 // lengthening load, to quadruple the initial length, but which ends up only
7353 // requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
7355 // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
7356 // Pat<(v2i32 (extloadvi8 addrmode6:$addr))
7357 // (EXTRACT_SUBREG (VMOVLuv4i32
7358 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
7359 // (f64 (IMPLICIT_DEF)), (i32 0))),
7362 multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
7363 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7365 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7366 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7367 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7368 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7369 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7372 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7373 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7374 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7375 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7376 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7379 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7380 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7381 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7382 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7383 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7388 // The following class definition is basically a copy of the
7389 // Lengthen_HalfDouble definition above, however with an additional VREV16d8
7390 // instruction to convert data loaded by VLD1LN into proper vector format
7391 // in big endian mode.
7392 multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7393 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7395 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7396 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7397 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7398 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7399 (!cast<Instruction>("VREV16d8")
7400 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7403 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7404 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7405 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7406 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7407 (!cast<Instruction>("VREV16d8")
7408 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7411 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7412 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7413 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7414 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7415 (!cast<Instruction>("VREV16d8")
7416 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7421 defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
7422 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
7423 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
7425 let Predicates = [IsLE] in {
7426 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
7427 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
7429 // Double lengthening - v4i8 -> v4i16 -> v4i32
7430 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
7431 // v2i8 -> v2i16 -> v2i32
7432 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
7433 // v2i16 -> v2i32 -> v2i64
7434 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
7437 let Predicates = [IsBE] in {
7438 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
7439 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
7441 // Double lengthening - v4i8 -> v4i16 -> v4i32
7442 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
7443 // v2i8 -> v2i16 -> v2i32
7444 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
7445 // v2i16 -> v2i32 -> v2i64
7446 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
7449 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
7450 let Predicates = [IsLE] in {
7451 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7452 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7453 (VLD1LNd16 addrmode6:$addr,
7454 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7455 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7456 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7457 (VLD1LNd16 addrmode6:$addr,
7458 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7459 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7460 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7461 (VLD1LNd16 addrmode6:$addr,
7462 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7464 // The following patterns are basically a copy of the patterns above,
7465 // however with an additional VREV16d instruction to convert data
7466 // loaded by VLD1LN into proper vector format in big endian mode.
7467 let Predicates = [IsBE] in {
7468 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7469 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7470 (!cast<Instruction>("VREV16d8")
7471 (VLD1LNd16 addrmode6:$addr,
7472 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7473 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7474 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7475 (!cast<Instruction>("VREV16d8")
7476 (VLD1LNd16 addrmode6:$addr,
7477 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7478 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7479 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7480 (!cast<Instruction>("VREV16d8")
7481 (VLD1LNd16 addrmode6:$addr,
7482 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7485 def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
7486 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7487 def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7488 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7489 def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7490 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7491 def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
7492 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7493 def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7494 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7496 //===----------------------------------------------------------------------===//
7497 // Assembler aliases
7500 def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
7501 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
7502 def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
7503 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
7505 // VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
7506 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7507 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7508 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7509 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7510 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
7511 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7512 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
7513 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7514 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
7515 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7516 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
7517 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7518 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
7519 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7520 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
7521 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7522 // ... two-operand aliases
7523 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
7524 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7525 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
7526 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7527 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
7528 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7529 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
7530 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7531 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
7532 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7533 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
7534 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7536 def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
7537 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
7538 def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
7539 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
7540 def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
7541 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
7542 def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
7543 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
7546 // VLD1 single-lane pseudo-instructions. These need special handling for
7547 // the lane index that an InstAlias can't handle, so we use these instead.
7548 def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
7549 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7551 def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
7552 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7554 def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
7555 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7558 def VLD1LNdWB_fixed_Asm_8 :
7559 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
7560 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7562 def VLD1LNdWB_fixed_Asm_16 :
7563 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
7564 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7566 def VLD1LNdWB_fixed_Asm_32 :
7567 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
7568 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7570 def VLD1LNdWB_register_Asm_8 :
7571 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
7572 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7573 rGPR:$Rm, pred:$p)>;
7574 def VLD1LNdWB_register_Asm_16 :
7575 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
7576 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7577 rGPR:$Rm, pred:$p)>;
7578 def VLD1LNdWB_register_Asm_32 :
7579 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
7580 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7581 rGPR:$Rm, pred:$p)>;
7584 // VST1 single-lane pseudo-instructions. These need special handling for
7585 // the lane index that an InstAlias can't handle, so we use these instead.
7586 def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
7587 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7589 def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
7590 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7592 def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
7593 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7596 def VST1LNdWB_fixed_Asm_8 :
7597 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
7598 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7600 def VST1LNdWB_fixed_Asm_16 :
7601 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
7602 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7604 def VST1LNdWB_fixed_Asm_32 :
7605 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
7606 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7608 def VST1LNdWB_register_Asm_8 :
7609 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
7610 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7611 rGPR:$Rm, pred:$p)>;
7612 def VST1LNdWB_register_Asm_16 :
7613 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
7614 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7615 rGPR:$Rm, pred:$p)>;
7616 def VST1LNdWB_register_Asm_32 :
7617 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
7618 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7619 rGPR:$Rm, pred:$p)>;
7621 // VLD2 single-lane pseudo-instructions. These need special handling for
7622 // the lane index that an InstAlias can't handle, so we use these instead.
7623 def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
7624 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7626 def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
7627 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7629 def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
7630 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
7631 def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
7632 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7634 def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
7635 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7638 def VLD2LNdWB_fixed_Asm_8 :
7639 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
7640 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7642 def VLD2LNdWB_fixed_Asm_16 :
7643 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
7644 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7646 def VLD2LNdWB_fixed_Asm_32 :
7647 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
7648 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7650 def VLD2LNqWB_fixed_Asm_16 :
7651 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
7652 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7654 def VLD2LNqWB_fixed_Asm_32 :
7655 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
7656 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7658 def VLD2LNdWB_register_Asm_8 :
7659 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
7660 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7661 rGPR:$Rm, pred:$p)>;
7662 def VLD2LNdWB_register_Asm_16 :
7663 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
7664 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7665 rGPR:$Rm, pred:$p)>;
7666 def VLD2LNdWB_register_Asm_32 :
7667 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
7668 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7669 rGPR:$Rm, pred:$p)>;
7670 def VLD2LNqWB_register_Asm_16 :
7671 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
7672 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7673 rGPR:$Rm, pred:$p)>;
7674 def VLD2LNqWB_register_Asm_32 :
7675 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
7676 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7677 rGPR:$Rm, pred:$p)>;
7680 // VST2 single-lane pseudo-instructions. These need special handling for
7681 // the lane index that an InstAlias can't handle, so we use these instead.
7682 def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
7683 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7685 def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
7686 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7688 def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
7689 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7691 def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
7692 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7694 def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
7695 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7698 def VST2LNdWB_fixed_Asm_8 :
7699 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
7700 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7702 def VST2LNdWB_fixed_Asm_16 :
7703 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
7704 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7706 def VST2LNdWB_fixed_Asm_32 :
7707 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
7708 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7710 def VST2LNqWB_fixed_Asm_16 :
7711 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
7712 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7714 def VST2LNqWB_fixed_Asm_32 :
7715 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
7716 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7718 def VST2LNdWB_register_Asm_8 :
7719 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
7720 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7721 rGPR:$Rm, pred:$p)>;
7722 def VST2LNdWB_register_Asm_16 :
7723 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
7724 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7725 rGPR:$Rm, pred:$p)>;
7726 def VST2LNdWB_register_Asm_32 :
7727 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
7728 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7729 rGPR:$Rm, pred:$p)>;
7730 def VST2LNqWB_register_Asm_16 :
7731 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
7732 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7733 rGPR:$Rm, pred:$p)>;
7734 def VST2LNqWB_register_Asm_32 :
7735 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
7736 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7737 rGPR:$Rm, pred:$p)>;
7739 // VLD3 all-lanes pseudo-instructions. These need special handling for
7740 // the lane index that an InstAlias can't handle, so we use these instead.
7741 def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7742 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7744 def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7745 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7747 def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7748 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7750 def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7751 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7753 def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7754 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7756 def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7757 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7760 def VLD3DUPdWB_fixed_Asm_8 :
7761 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7762 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7764 def VLD3DUPdWB_fixed_Asm_16 :
7765 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7766 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7768 def VLD3DUPdWB_fixed_Asm_32 :
7769 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7770 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7772 def VLD3DUPqWB_fixed_Asm_8 :
7773 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7774 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7776 def VLD3DUPqWB_fixed_Asm_16 :
7777 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7778 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7780 def VLD3DUPqWB_fixed_Asm_32 :
7781 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7782 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7784 def VLD3DUPdWB_register_Asm_8 :
7785 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7786 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7787 rGPR:$Rm, pred:$p)>;
7788 def VLD3DUPdWB_register_Asm_16 :
7789 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7790 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7791 rGPR:$Rm, pred:$p)>;
7792 def VLD3DUPdWB_register_Asm_32 :
7793 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7794 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7795 rGPR:$Rm, pred:$p)>;
7796 def VLD3DUPqWB_register_Asm_8 :
7797 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7798 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7799 rGPR:$Rm, pred:$p)>;
7800 def VLD3DUPqWB_register_Asm_16 :
7801 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7802 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7803 rGPR:$Rm, pred:$p)>;
7804 def VLD3DUPqWB_register_Asm_32 :
7805 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7806 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7807 rGPR:$Rm, pred:$p)>;
7810 // VLD3 single-lane pseudo-instructions. These need special handling for
7811 // the lane index that an InstAlias can't handle, so we use these instead.
7812 def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7813 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7815 def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7816 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7818 def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7819 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7821 def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7822 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7824 def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7825 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7828 def VLD3LNdWB_fixed_Asm_8 :
7829 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7830 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7832 def VLD3LNdWB_fixed_Asm_16 :
7833 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7834 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7836 def VLD3LNdWB_fixed_Asm_32 :
7837 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7838 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7840 def VLD3LNqWB_fixed_Asm_16 :
7841 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7842 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7844 def VLD3LNqWB_fixed_Asm_32 :
7845 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7846 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7848 def VLD3LNdWB_register_Asm_8 :
7849 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7850 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7851 rGPR:$Rm, pred:$p)>;
7852 def VLD3LNdWB_register_Asm_16 :
7853 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7854 (ins VecListThreeDHWordIndexed:$list,
7855 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7856 def VLD3LNdWB_register_Asm_32 :
7857 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7858 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7859 rGPR:$Rm, pred:$p)>;
7860 def VLD3LNqWB_register_Asm_16 :
7861 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7862 (ins VecListThreeQHWordIndexed:$list,
7863 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7864 def VLD3LNqWB_register_Asm_32 :
7865 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7866 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7867 rGPR:$Rm, pred:$p)>;
7869 // VLD3 multiple structure pseudo-instructions. These need special handling for
7870 // the vector operands that the normal instructions don't yet model.
7871 // FIXME: Remove these when the register classes and instructions are updated.
7872 def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7873 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7874 def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7875 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7876 def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7877 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7878 def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7879 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7880 def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7881 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7882 def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7883 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7885 def VLD3dWB_fixed_Asm_8 :
7886 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7887 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7888 def VLD3dWB_fixed_Asm_16 :
7889 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7890 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7891 def VLD3dWB_fixed_Asm_32 :
7892 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7893 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7894 def VLD3qWB_fixed_Asm_8 :
7895 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7896 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7897 def VLD3qWB_fixed_Asm_16 :
7898 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7899 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7900 def VLD3qWB_fixed_Asm_32 :
7901 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7902 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7903 def VLD3dWB_register_Asm_8 :
7904 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7905 (ins VecListThreeD:$list, addrmode6align64:$addr,
7906 rGPR:$Rm, pred:$p)>;
7907 def VLD3dWB_register_Asm_16 :
7908 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7909 (ins VecListThreeD:$list, addrmode6align64:$addr,
7910 rGPR:$Rm, pred:$p)>;
7911 def VLD3dWB_register_Asm_32 :
7912 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7913 (ins VecListThreeD:$list, addrmode6align64:$addr,
7914 rGPR:$Rm, pred:$p)>;
7915 def VLD3qWB_register_Asm_8 :
7916 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7917 (ins VecListThreeQ:$list, addrmode6align64:$addr,
7918 rGPR:$Rm, pred:$p)>;
7919 def VLD3qWB_register_Asm_16 :
7920 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7921 (ins VecListThreeQ:$list, addrmode6align64:$addr,
7922 rGPR:$Rm, pred:$p)>;
7923 def VLD3qWB_register_Asm_32 :
7924 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7925 (ins VecListThreeQ:$list, addrmode6align64:$addr,
7926 rGPR:$Rm, pred:$p)>;
7928 // VST3 single-lane pseudo-instructions. These need special handling for
7929 // the lane index that an InstAlias can't handle, so we use these instead.
7930 def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
7931 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7933 def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
7934 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7936 def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
7937 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7939 def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
7940 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7942 def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
7943 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7946 def VST3LNdWB_fixed_Asm_8 :
7947 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
7948 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7950 def VST3LNdWB_fixed_Asm_16 :
7951 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
7952 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7954 def VST3LNdWB_fixed_Asm_32 :
7955 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
7956 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7958 def VST3LNqWB_fixed_Asm_16 :
7959 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
7960 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7962 def VST3LNqWB_fixed_Asm_32 :
7963 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
7964 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7966 def VST3LNdWB_register_Asm_8 :
7967 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
7968 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7969 rGPR:$Rm, pred:$p)>;
7970 def VST3LNdWB_register_Asm_16 :
7971 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
7972 (ins VecListThreeDHWordIndexed:$list,
7973 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7974 def VST3LNdWB_register_Asm_32 :
7975 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
7976 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7977 rGPR:$Rm, pred:$p)>;
7978 def VST3LNqWB_register_Asm_16 :
7979 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
7980 (ins VecListThreeQHWordIndexed:$list,
7981 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7982 def VST3LNqWB_register_Asm_32 :
7983 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
7984 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7985 rGPR:$Rm, pred:$p)>;
7988 // VST3 multiple structure pseudo-instructions. These need special handling for
7989 // the vector operands that the normal instructions don't yet model.
7990 // FIXME: Remove these when the register classes and instructions are updated.
7991 def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
7992 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7993 def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
7994 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7995 def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
7996 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7997 def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
7998 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7999 def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8000 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8001 def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8002 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8004 def VST3dWB_fixed_Asm_8 :
8005 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8006 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8007 def VST3dWB_fixed_Asm_16 :
8008 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8009 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8010 def VST3dWB_fixed_Asm_32 :
8011 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8012 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8013 def VST3qWB_fixed_Asm_8 :
8014 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8015 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8016 def VST3qWB_fixed_Asm_16 :
8017 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8018 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8019 def VST3qWB_fixed_Asm_32 :
8020 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8021 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8022 def VST3dWB_register_Asm_8 :
8023 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8024 (ins VecListThreeD:$list, addrmode6align64:$addr,
8025 rGPR:$Rm, pred:$p)>;
8026 def VST3dWB_register_Asm_16 :
8027 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8028 (ins VecListThreeD:$list, addrmode6align64:$addr,
8029 rGPR:$Rm, pred:$p)>;
8030 def VST3dWB_register_Asm_32 :
8031 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8032 (ins VecListThreeD:$list, addrmode6align64:$addr,
8033 rGPR:$Rm, pred:$p)>;
8034 def VST3qWB_register_Asm_8 :
8035 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8036 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8037 rGPR:$Rm, pred:$p)>;
8038 def VST3qWB_register_Asm_16 :
8039 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8040 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8041 rGPR:$Rm, pred:$p)>;
8042 def VST3qWB_register_Asm_32 :
8043 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8044 (ins VecListThreeQ:$list, addrmode6align64:$addr,
8045 rGPR:$Rm, pred:$p)>;
8047 // VLD4 all-lanes pseudo-instructions. These need special handling for
8048 // the lane index that an InstAlias can't handle, so we use these instead.
8049 def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8050 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8052 def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8053 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8055 def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8056 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8058 def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8059 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8061 def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8062 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8064 def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8065 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8068 def VLD4DUPdWB_fixed_Asm_8 :
8069 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8070 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8072 def VLD4DUPdWB_fixed_Asm_16 :
8073 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8074 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8076 def VLD4DUPdWB_fixed_Asm_32 :
8077 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8078 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8080 def VLD4DUPqWB_fixed_Asm_8 :
8081 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8082 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8084 def VLD4DUPqWB_fixed_Asm_16 :
8085 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8086 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8088 def VLD4DUPqWB_fixed_Asm_32 :
8089 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8090 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8092 def VLD4DUPdWB_register_Asm_8 :
8093 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8094 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8095 rGPR:$Rm, pred:$p)>;
8096 def VLD4DUPdWB_register_Asm_16 :
8097 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8098 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8099 rGPR:$Rm, pred:$p)>;
8100 def VLD4DUPdWB_register_Asm_32 :
8101 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8102 (ins VecListFourDAllLanes:$list,
8103 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8104 def VLD4DUPqWB_register_Asm_8 :
8105 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8106 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8107 rGPR:$Rm, pred:$p)>;
8108 def VLD4DUPqWB_register_Asm_16 :
8109 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8110 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8111 rGPR:$Rm, pred:$p)>;
8112 def VLD4DUPqWB_register_Asm_32 :
8113 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8114 (ins VecListFourQAllLanes:$list,
8115 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8118 // VLD4 single-lane pseudo-instructions. These need special handling for
8119 // the lane index that an InstAlias can't handle, so we use these instead.
8120 def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8121 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8123 def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8124 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8126 def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8127 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8129 def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8130 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8132 def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8133 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8136 def VLD4LNdWB_fixed_Asm_8 :
8137 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8138 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8140 def VLD4LNdWB_fixed_Asm_16 :
8141 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8142 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8144 def VLD4LNdWB_fixed_Asm_32 :
8145 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8146 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8148 def VLD4LNqWB_fixed_Asm_16 :
8149 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8150 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8152 def VLD4LNqWB_fixed_Asm_32 :
8153 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8154 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8156 def VLD4LNdWB_register_Asm_8 :
8157 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8158 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8159 rGPR:$Rm, pred:$p)>;
8160 def VLD4LNdWB_register_Asm_16 :
8161 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8162 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8163 rGPR:$Rm, pred:$p)>;
8164 def VLD4LNdWB_register_Asm_32 :
8165 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8166 (ins VecListFourDWordIndexed:$list,
8167 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8168 def VLD4LNqWB_register_Asm_16 :
8169 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8170 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8171 rGPR:$Rm, pred:$p)>;
8172 def VLD4LNqWB_register_Asm_32 :
8173 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8174 (ins VecListFourQWordIndexed:$list,
8175 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8179 // VLD4 multiple structure pseudo-instructions. These need special handling for
8180 // the vector operands that the normal instructions don't yet model.
8181 // FIXME: Remove these when the register classes and instructions are updated.
8182 def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8183 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8185 def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8186 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8188 def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8189 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8191 def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8192 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8194 def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8195 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8197 def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8198 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8201 def VLD4dWB_fixed_Asm_8 :
8202 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8203 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8205 def VLD4dWB_fixed_Asm_16 :
8206 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8207 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8209 def VLD4dWB_fixed_Asm_32 :
8210 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8211 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8213 def VLD4qWB_fixed_Asm_8 :
8214 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8215 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8217 def VLD4qWB_fixed_Asm_16 :
8218 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8219 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8221 def VLD4qWB_fixed_Asm_32 :
8222 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8223 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8225 def VLD4dWB_register_Asm_8 :
8226 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8227 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8228 rGPR:$Rm, pred:$p)>;
8229 def VLD4dWB_register_Asm_16 :
8230 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8231 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8232 rGPR:$Rm, pred:$p)>;
8233 def VLD4dWB_register_Asm_32 :
8234 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8235 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8236 rGPR:$Rm, pred:$p)>;
8237 def VLD4qWB_register_Asm_8 :
8238 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8239 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8240 rGPR:$Rm, pred:$p)>;
8241 def VLD4qWB_register_Asm_16 :
8242 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8243 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8244 rGPR:$Rm, pred:$p)>;
8245 def VLD4qWB_register_Asm_32 :
8246 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8247 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8248 rGPR:$Rm, pred:$p)>;
8250 // VST4 single-lane pseudo-instructions. These need special handling for
8251 // the lane index that an InstAlias can't handle, so we use these instead.
8252 def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8253 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8255 def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8256 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8258 def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8259 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8261 def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8262 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8264 def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8265 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8268 def VST4LNdWB_fixed_Asm_8 :
8269 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8270 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8272 def VST4LNdWB_fixed_Asm_16 :
8273 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8274 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8276 def VST4LNdWB_fixed_Asm_32 :
8277 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8278 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8280 def VST4LNqWB_fixed_Asm_16 :
8281 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8282 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8284 def VST4LNqWB_fixed_Asm_32 :
8285 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8286 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8288 def VST4LNdWB_register_Asm_8 :
8289 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8290 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8291 rGPR:$Rm, pred:$p)>;
8292 def VST4LNdWB_register_Asm_16 :
8293 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8294 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8295 rGPR:$Rm, pred:$p)>;
8296 def VST4LNdWB_register_Asm_32 :
8297 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8298 (ins VecListFourDWordIndexed:$list,
8299 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8300 def VST4LNqWB_register_Asm_16 :
8301 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8302 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8303 rGPR:$Rm, pred:$p)>;
8304 def VST4LNqWB_register_Asm_32 :
8305 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8306 (ins VecListFourQWordIndexed:$list,
8307 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8310 // VST4 multiple structure pseudo-instructions. These need special handling for
8311 // the vector operands that the normal instructions don't yet model.
8312 // FIXME: Remove these when the register classes and instructions are updated.
8313 def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8314 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8316 def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8317 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8319 def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8320 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8322 def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8323 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8325 def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8326 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8328 def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8329 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8332 def VST4dWB_fixed_Asm_8 :
8333 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8334 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8336 def VST4dWB_fixed_Asm_16 :
8337 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8338 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8340 def VST4dWB_fixed_Asm_32 :
8341 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8342 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8344 def VST4qWB_fixed_Asm_8 :
8345 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8346 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8348 def VST4qWB_fixed_Asm_16 :
8349 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8350 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8352 def VST4qWB_fixed_Asm_32 :
8353 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8354 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8356 def VST4dWB_register_Asm_8 :
8357 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8358 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8359 rGPR:$Rm, pred:$p)>;
8360 def VST4dWB_register_Asm_16 :
8361 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8362 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8363 rGPR:$Rm, pred:$p)>;
8364 def VST4dWB_register_Asm_32 :
8365 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8366 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8367 rGPR:$Rm, pred:$p)>;
8368 def VST4qWB_register_Asm_8 :
8369 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8370 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8371 rGPR:$Rm, pred:$p)>;
8372 def VST4qWB_register_Asm_16 :
8373 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8374 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8375 rGPR:$Rm, pred:$p)>;
8376 def VST4qWB_register_Asm_32 :
8377 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8378 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8379 rGPR:$Rm, pred:$p)>;
8381 // VMOV/VMVN takes an optional datatype suffix
8382 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8383 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
8384 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8385 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
8387 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8388 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
8389 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8390 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
8392 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8393 // D-register versions.
8394 def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
8395 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8396 def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
8397 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8398 def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
8399 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8400 def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
8401 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8402 def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
8403 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8404 def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
8405 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8406 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
8407 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8408 let Predicates = [HasNEON, HasFullFP16] in
8409 def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
8410 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8411 // Q-register versions.
8412 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
8413 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8414 def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
8415 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8416 def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
8417 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8418 def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
8419 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8420 def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
8421 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8422 def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
8423 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8424 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
8425 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8426 let Predicates = [HasNEON, HasFullFP16] in
8427 def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
8428 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8430 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8431 // D-register versions.
8432 def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
8433 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8434 def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
8435 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8436 def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
8437 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8438 def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
8439 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8440 def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
8441 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8442 def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
8443 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8444 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
8445 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8446 let Predicates = [HasNEON, HasFullFP16] in
8447 def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
8448 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8449 // Q-register versions.
8450 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
8451 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8452 def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
8453 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8454 def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
8455 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8456 def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
8457 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8458 def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
8459 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8460 def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
8461 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8462 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
8463 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8464 let Predicates = [HasNEON, HasFullFP16] in
8465 def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
8466 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8468 // VSWP allows, but does not require, a type suffix.
8469 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8470 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
8471 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8472 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
8474 // VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
8475 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8476 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8477 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8478 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8479 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8480 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8481 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8482 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8483 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8484 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8485 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8486 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8488 // "vmov Rd, #-imm" can be handled via "vmvn".
8489 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8490 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8491 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8492 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8493 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8494 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8495 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8496 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8498 // 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
8499 // these should restrict to just the Q register variants, but the register
8500 // classes are enough to match correctly regardless, so we keep it simple
8501 // and just use MnemonicAlias.
8502 def : NEONMnemonicAlias<"vbicq", "vbic">;
8503 def : NEONMnemonicAlias<"vandq", "vand">;
8504 def : NEONMnemonicAlias<"veorq", "veor">;
8505 def : NEONMnemonicAlias<"vorrq", "vorr">;
8507 def : NEONMnemonicAlias<"vmovq", "vmov">;
8508 def : NEONMnemonicAlias<"vmvnq", "vmvn">;
8509 // Explicit versions for floating point so that the FPImm variants get
8510 // handled early. The parser gets confused otherwise.
8511 def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
8512 def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
8514 def : NEONMnemonicAlias<"vaddq", "vadd">;
8515 def : NEONMnemonicAlias<"vsubq", "vsub">;
8517 def : NEONMnemonicAlias<"vminq", "vmin">;
8518 def : NEONMnemonicAlias<"vmaxq", "vmax">;
8520 def : NEONMnemonicAlias<"vmulq", "vmul">;
8522 def : NEONMnemonicAlias<"vabsq", "vabs">;
8524 def : NEONMnemonicAlias<"vshlq", "vshl">;
8525 def : NEONMnemonicAlias<"vshrq", "vshr">;
8527 def : NEONMnemonicAlias<"vcvtq", "vcvt">;
8529 def : NEONMnemonicAlias<"vcleq", "vcle">;
8530 def : NEONMnemonicAlias<"vceqq", "vceq">;
8532 def : NEONMnemonicAlias<"vzipq", "vzip">;
8533 def : NEONMnemonicAlias<"vswpq", "vswp">;
8535 def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
8536 def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
8539 // Alias for loading floating point immediates that aren't representable
8540 // using the vmov.f32 encoding but the bitpattern is representable using
8541 // the .i32 encoding.
8542 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
8543 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
8544 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
8545 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;