1 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the ARM NEON instruction set.
12 //===----------------------------------------------------------------------===//
15 //===----------------------------------------------------------------------===//
16 // NEON-specific Operands.
17 //===----------------------------------------------------------------------===//
18 def nModImm : Operand<i32> {
19 let PrintMethod = "printNEONModImmOperand";
22 def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
23 def nImmSplatI8 : Operand<i32> {
24 let PrintMethod = "printNEONModImmOperand";
25 let ParserMatchClass = nImmSplatI8AsmOperand;
27 def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
28 def nImmSplatI16 : Operand<i32> {
29 let PrintMethod = "printNEONModImmOperand";
30 let ParserMatchClass = nImmSplatI16AsmOperand;
32 def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
33 def nImmSplatI32 : Operand<i32> {
34 let PrintMethod = "printNEONModImmOperand";
35 let ParserMatchClass = nImmSplatI32AsmOperand;
37 def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
38 def nImmSplatNotI16 : Operand<i32> {
39 let ParserMatchClass = nImmSplatNotI16AsmOperand;
41 def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
42 def nImmSplatNotI32 : Operand<i32> {
43 let ParserMatchClass = nImmSplatNotI32AsmOperand;
45 def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
46 def nImmVMOVI32 : Operand<i32> {
47 let PrintMethod = "printNEONModImmOperand";
48 let ParserMatchClass = nImmVMOVI32AsmOperand;
51 def nImmVMOVI16AsmOperandByteReplicate :
53 let Name = "NEONi16vmovByteReplicate";
54 let PredicateMethod = "isNEONi16ByteReplicate";
55 let RenderMethod = "addNEONvmovByteReplicateOperands";
57 def nImmVMOVI32AsmOperandByteReplicate :
59 let Name = "NEONi32vmovByteReplicate";
60 let PredicateMethod = "isNEONi32ByteReplicate";
61 let RenderMethod = "addNEONvmovByteReplicateOperands";
63 def nImmVMVNI16AsmOperandByteReplicate :
65 let Name = "NEONi16invByteReplicate";
66 let PredicateMethod = "isNEONi16ByteReplicate";
67 let RenderMethod = "addNEONinvByteReplicateOperands";
69 def nImmVMVNI32AsmOperandByteReplicate :
71 let Name = "NEONi32invByteReplicate";
72 let PredicateMethod = "isNEONi32ByteReplicate";
73 let RenderMethod = "addNEONinvByteReplicateOperands";
76 def nImmVMOVI16ByteReplicate : Operand<i32> {
77 let PrintMethod = "printNEONModImmOperand";
78 let ParserMatchClass = nImmVMOVI16AsmOperandByteReplicate;
80 def nImmVMOVI32ByteReplicate : Operand<i32> {
81 let PrintMethod = "printNEONModImmOperand";
82 let ParserMatchClass = nImmVMOVI32AsmOperandByteReplicate;
84 def nImmVMVNI16ByteReplicate : Operand<i32> {
85 let PrintMethod = "printNEONModImmOperand";
86 let ParserMatchClass = nImmVMVNI16AsmOperandByteReplicate;
88 def nImmVMVNI32ByteReplicate : Operand<i32> {
89 let PrintMethod = "printNEONModImmOperand";
90 let ParserMatchClass = nImmVMVNI32AsmOperandByteReplicate;
93 def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
94 def nImmVMOVI32Neg : Operand<i32> {
95 let PrintMethod = "printNEONModImmOperand";
96 let ParserMatchClass = nImmVMOVI32NegAsmOperand;
98 def nImmVMOVF32 : Operand<i32> {
99 let PrintMethod = "printFPImmOperand";
100 let ParserMatchClass = FPImmOperand;
102 def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
103 def nImmSplatI64 : Operand<i32> {
104 let PrintMethod = "printNEONModImmOperand";
105 let ParserMatchClass = nImmSplatI64AsmOperand;
108 def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
109 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
110 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
111 def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
112 return ((uint64_t)Imm) < 8;
114 let ParserMatchClass = VectorIndex8Operand;
115 let PrintMethod = "printVectorIndex";
116 let MIOperandInfo = (ops i32imm);
118 def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
119 return ((uint64_t)Imm) < 4;
121 let ParserMatchClass = VectorIndex16Operand;
122 let PrintMethod = "printVectorIndex";
123 let MIOperandInfo = (ops i32imm);
125 def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
126 return ((uint64_t)Imm) < 2;
128 let ParserMatchClass = VectorIndex32Operand;
129 let PrintMethod = "printVectorIndex";
130 let MIOperandInfo = (ops i32imm);
133 // Register list of one D register.
134 def VecListOneDAsmOperand : AsmOperandClass {
135 let Name = "VecListOneD";
136 let ParserMethod = "parseVectorList";
137 let RenderMethod = "addVecListOperands";
139 def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
140 let ParserMatchClass = VecListOneDAsmOperand;
142 // Register list of two sequential D registers.
143 def VecListDPairAsmOperand : AsmOperandClass {
144 let Name = "VecListDPair";
145 let ParserMethod = "parseVectorList";
146 let RenderMethod = "addVecListOperands";
148 def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
149 let ParserMatchClass = VecListDPairAsmOperand;
151 // Register list of three sequential D registers.
152 def VecListThreeDAsmOperand : AsmOperandClass {
153 let Name = "VecListThreeD";
154 let ParserMethod = "parseVectorList";
155 let RenderMethod = "addVecListOperands";
157 def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
158 let ParserMatchClass = VecListThreeDAsmOperand;
160 // Register list of four sequential D registers.
161 def VecListFourDAsmOperand : AsmOperandClass {
162 let Name = "VecListFourD";
163 let ParserMethod = "parseVectorList";
164 let RenderMethod = "addVecListOperands";
166 def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
167 let ParserMatchClass = VecListFourDAsmOperand;
169 // Register list of two D registers spaced by 2 (two sequential Q registers).
170 def VecListDPairSpacedAsmOperand : AsmOperandClass {
171 let Name = "VecListDPairSpaced";
172 let ParserMethod = "parseVectorList";
173 let RenderMethod = "addVecListOperands";
175 def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
176 let ParserMatchClass = VecListDPairSpacedAsmOperand;
178 // Register list of three D registers spaced by 2 (three Q registers).
179 def VecListThreeQAsmOperand : AsmOperandClass {
180 let Name = "VecListThreeQ";
181 let ParserMethod = "parseVectorList";
182 let RenderMethod = "addVecListOperands";
184 def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
185 let ParserMatchClass = VecListThreeQAsmOperand;
187 // Register list of three D registers spaced by 2 (three Q registers).
188 def VecListFourQAsmOperand : AsmOperandClass {
189 let Name = "VecListFourQ";
190 let ParserMethod = "parseVectorList";
191 let RenderMethod = "addVecListOperands";
193 def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
194 let ParserMatchClass = VecListFourQAsmOperand;
197 // Register list of one D register, with "all lanes" subscripting.
198 def VecListOneDAllLanesAsmOperand : AsmOperandClass {
199 let Name = "VecListOneDAllLanes";
200 let ParserMethod = "parseVectorList";
201 let RenderMethod = "addVecListOperands";
203 def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
204 let ParserMatchClass = VecListOneDAllLanesAsmOperand;
206 // Register list of two D registers, with "all lanes" subscripting.
207 def VecListDPairAllLanesAsmOperand : AsmOperandClass {
208 let Name = "VecListDPairAllLanes";
209 let ParserMethod = "parseVectorList";
210 let RenderMethod = "addVecListOperands";
212 def VecListDPairAllLanes : RegisterOperand<DPair,
213 "printVectorListTwoAllLanes"> {
214 let ParserMatchClass = VecListDPairAllLanesAsmOperand;
216 // Register list of two D registers spaced by 2 (two sequential Q registers).
217 def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
218 let Name = "VecListDPairSpacedAllLanes";
219 let ParserMethod = "parseVectorList";
220 let RenderMethod = "addVecListOperands";
222 def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
223 "printVectorListTwoSpacedAllLanes"> {
224 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
226 // Register list of three D registers, with "all lanes" subscripting.
227 def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
228 let Name = "VecListThreeDAllLanes";
229 let ParserMethod = "parseVectorList";
230 let RenderMethod = "addVecListOperands";
232 def VecListThreeDAllLanes : RegisterOperand<DPR,
233 "printVectorListThreeAllLanes"> {
234 let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
236 // Register list of three D registers spaced by 2 (three sequential Q regs).
237 def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
238 let Name = "VecListThreeQAllLanes";
239 let ParserMethod = "parseVectorList";
240 let RenderMethod = "addVecListOperands";
242 def VecListThreeQAllLanes : RegisterOperand<DPR,
243 "printVectorListThreeSpacedAllLanes"> {
244 let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
246 // Register list of four D registers, with "all lanes" subscripting.
247 def VecListFourDAllLanesAsmOperand : AsmOperandClass {
248 let Name = "VecListFourDAllLanes";
249 let ParserMethod = "parseVectorList";
250 let RenderMethod = "addVecListOperands";
252 def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
253 let ParserMatchClass = VecListFourDAllLanesAsmOperand;
255 // Register list of four D registers spaced by 2 (four sequential Q regs).
256 def VecListFourQAllLanesAsmOperand : AsmOperandClass {
257 let Name = "VecListFourQAllLanes";
258 let ParserMethod = "parseVectorList";
259 let RenderMethod = "addVecListOperands";
261 def VecListFourQAllLanes : RegisterOperand<DPR,
262 "printVectorListFourSpacedAllLanes"> {
263 let ParserMatchClass = VecListFourQAllLanesAsmOperand;
267 // Register list of one D register, with byte lane subscripting.
268 def VecListOneDByteIndexAsmOperand : AsmOperandClass {
269 let Name = "VecListOneDByteIndexed";
270 let ParserMethod = "parseVectorList";
271 let RenderMethod = "addVecListIndexedOperands";
273 def VecListOneDByteIndexed : Operand<i32> {
274 let ParserMatchClass = VecListOneDByteIndexAsmOperand;
275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
277 // ...with half-word lane subscripting.
278 def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
279 let Name = "VecListOneDHWordIndexed";
280 let ParserMethod = "parseVectorList";
281 let RenderMethod = "addVecListIndexedOperands";
283 def VecListOneDHWordIndexed : Operand<i32> {
284 let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
287 // ...with word lane subscripting.
288 def VecListOneDWordIndexAsmOperand : AsmOperandClass {
289 let Name = "VecListOneDWordIndexed";
290 let ParserMethod = "parseVectorList";
291 let RenderMethod = "addVecListIndexedOperands";
293 def VecListOneDWordIndexed : Operand<i32> {
294 let ParserMatchClass = VecListOneDWordIndexAsmOperand;
295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
298 // Register list of two D registers with byte lane subscripting.
299 def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
300 let Name = "VecListTwoDByteIndexed";
301 let ParserMethod = "parseVectorList";
302 let RenderMethod = "addVecListIndexedOperands";
304 def VecListTwoDByteIndexed : Operand<i32> {
305 let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
306 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
308 // ...with half-word lane subscripting.
309 def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
310 let Name = "VecListTwoDHWordIndexed";
311 let ParserMethod = "parseVectorList";
312 let RenderMethod = "addVecListIndexedOperands";
314 def VecListTwoDHWordIndexed : Operand<i32> {
315 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
316 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
318 // ...with word lane subscripting.
319 def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
320 let Name = "VecListTwoDWordIndexed";
321 let ParserMethod = "parseVectorList";
322 let RenderMethod = "addVecListIndexedOperands";
324 def VecListTwoDWordIndexed : Operand<i32> {
325 let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
326 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
328 // Register list of two Q registers with half-word lane subscripting.
329 def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
330 let Name = "VecListTwoQHWordIndexed";
331 let ParserMethod = "parseVectorList";
332 let RenderMethod = "addVecListIndexedOperands";
334 def VecListTwoQHWordIndexed : Operand<i32> {
335 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
336 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
338 // ...with word lane subscripting.
339 def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
340 let Name = "VecListTwoQWordIndexed";
341 let ParserMethod = "parseVectorList";
342 let RenderMethod = "addVecListIndexedOperands";
344 def VecListTwoQWordIndexed : Operand<i32> {
345 let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
346 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
350 // Register list of three D registers with byte lane subscripting.
351 def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
352 let Name = "VecListThreeDByteIndexed";
353 let ParserMethod = "parseVectorList";
354 let RenderMethod = "addVecListIndexedOperands";
356 def VecListThreeDByteIndexed : Operand<i32> {
357 let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
360 // ...with half-word lane subscripting.
361 def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
362 let Name = "VecListThreeDHWordIndexed";
363 let ParserMethod = "parseVectorList";
364 let RenderMethod = "addVecListIndexedOperands";
366 def VecListThreeDHWordIndexed : Operand<i32> {
367 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
370 // ...with word lane subscripting.
371 def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
372 let Name = "VecListThreeDWordIndexed";
373 let ParserMethod = "parseVectorList";
374 let RenderMethod = "addVecListIndexedOperands";
376 def VecListThreeDWordIndexed : Operand<i32> {
377 let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
380 // Register list of three Q registers with half-word lane subscripting.
381 def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
382 let Name = "VecListThreeQHWordIndexed";
383 let ParserMethod = "parseVectorList";
384 let RenderMethod = "addVecListIndexedOperands";
386 def VecListThreeQHWordIndexed : Operand<i32> {
387 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
390 // ...with word lane subscripting.
391 def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
392 let Name = "VecListThreeQWordIndexed";
393 let ParserMethod = "parseVectorList";
394 let RenderMethod = "addVecListIndexedOperands";
396 def VecListThreeQWordIndexed : Operand<i32> {
397 let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
401 // Register list of four D registers with byte lane subscripting.
402 def VecListFourDByteIndexAsmOperand : AsmOperandClass {
403 let Name = "VecListFourDByteIndexed";
404 let ParserMethod = "parseVectorList";
405 let RenderMethod = "addVecListIndexedOperands";
407 def VecListFourDByteIndexed : Operand<i32> {
408 let ParserMatchClass = VecListFourDByteIndexAsmOperand;
409 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
411 // ...with half-word lane subscripting.
412 def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
413 let Name = "VecListFourDHWordIndexed";
414 let ParserMethod = "parseVectorList";
415 let RenderMethod = "addVecListIndexedOperands";
417 def VecListFourDHWordIndexed : Operand<i32> {
418 let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
419 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
421 // ...with word lane subscripting.
422 def VecListFourDWordIndexAsmOperand : AsmOperandClass {
423 let Name = "VecListFourDWordIndexed";
424 let ParserMethod = "parseVectorList";
425 let RenderMethod = "addVecListIndexedOperands";
427 def VecListFourDWordIndexed : Operand<i32> {
428 let ParserMatchClass = VecListFourDWordIndexAsmOperand;
429 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
431 // Register list of four Q registers with half-word lane subscripting.
432 def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
433 let Name = "VecListFourQHWordIndexed";
434 let ParserMethod = "parseVectorList";
435 let RenderMethod = "addVecListIndexedOperands";
437 def VecListFourQHWordIndexed : Operand<i32> {
438 let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
439 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
441 // ...with word lane subscripting.
442 def VecListFourQWordIndexAsmOperand : AsmOperandClass {
443 let Name = "VecListFourQWordIndexed";
444 let ParserMethod = "parseVectorList";
445 let RenderMethod = "addVecListIndexedOperands";
447 def VecListFourQWordIndexed : Operand<i32> {
448 let ParserMatchClass = VecListFourQWordIndexAsmOperand;
449 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
452 def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
453 return cast<LoadSDNode>(N)->getAlignment() >= 8;
455 def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
456 (store node:$val, node:$ptr), [{
457 return cast<StoreSDNode>(N)->getAlignment() >= 8;
459 def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
460 return cast<LoadSDNode>(N)->getAlignment() == 4;
462 def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
463 (store node:$val, node:$ptr), [{
464 return cast<StoreSDNode>(N)->getAlignment() == 4;
466 def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
467 return cast<LoadSDNode>(N)->getAlignment() == 2;
469 def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
470 (store node:$val, node:$ptr), [{
471 return cast<StoreSDNode>(N)->getAlignment() == 2;
473 def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
474 return cast<LoadSDNode>(N)->getAlignment() == 1;
476 def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
477 (store node:$val, node:$ptr), [{
478 return cast<StoreSDNode>(N)->getAlignment() == 1;
480 def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
481 return cast<LoadSDNode>(N)->getAlignment() < 4;
483 def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
484 (store node:$val, node:$ptr), [{
485 return cast<StoreSDNode>(N)->getAlignment() < 4;
488 //===----------------------------------------------------------------------===//
489 // NEON-specific DAG Nodes.
490 //===----------------------------------------------------------------------===//
492 def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
493 def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
495 def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
496 def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
497 def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
498 def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
499 def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
500 def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
501 def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
502 def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
503 def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
504 def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
505 def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
507 // Types for vector shift by immediates. The "SHX" version is for long and
508 // narrow operations where the source and destination vectors have different
509 // types. The "SHINS" version is for shift and insert operations.
510 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
512 def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
514 def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
515 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
517 def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
518 def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
519 def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
520 def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
522 def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
523 def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
524 def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
526 def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
527 def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
528 def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
529 def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
530 def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
531 def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
533 def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
534 def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
535 def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
537 def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
538 def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
540 def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
542 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
543 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
545 def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
546 def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
547 def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
548 def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
550 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
552 def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
553 def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
555 def NEONvbsl : SDNode<"ARMISD::VBSL",
556 SDTypeProfile<1, 3, [SDTCisVec<0>,
559 SDTCisSameAs<0, 3>]>>;
561 def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
563 // VDUPLANE can produce a quad-register result from a double-register source,
564 // so the result is not constrained to match the source.
565 def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
566 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
569 def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
570 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
571 def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
573 def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
574 def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
575 def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
576 def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
578 def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
580 SDTCisSameAs<0, 3>]>;
581 def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
582 def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
583 def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
585 def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
586 SDTCisSameAs<1, 2>]>;
587 def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
588 def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
590 def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
592 def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
593 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
594 def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
595 def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
598 def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
599 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
600 unsigned EltBits = 0;
601 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
602 return (EltBits == 32 && EltVal == 0);
605 def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
606 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
607 unsigned EltBits = 0;
608 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
609 return (EltBits == 8 && EltVal == 0xff);
612 //===----------------------------------------------------------------------===//
613 // NEON load / store instructions
614 //===----------------------------------------------------------------------===//
616 // Use VLDM to load a Q register as a D register pair.
617 // This is a pseudo instruction that is expanded to VLDMD after reg alloc.
619 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
621 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
623 // Use VSTM to store a Q register as a D register pair.
624 // This is a pseudo instruction that is expanded to VSTMD after reg alloc.
626 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
628 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
630 // Classes for VLD* pseudo-instructions with multi-register operands.
631 // These are expanded to real instructions after register allocation.
632 class VLDQPseudo<InstrItinClass itin>
633 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
634 class VLDQWBPseudo<InstrItinClass itin>
635 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
636 (ins addrmode6:$addr, am6offset:$offset), itin,
638 class VLDQWBfixedPseudo<InstrItinClass itin>
639 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
640 (ins addrmode6:$addr), itin,
642 class VLDQWBregisterPseudo<InstrItinClass itin>
643 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
644 (ins addrmode6:$addr, rGPR:$offset), itin,
647 class VLDQQPseudo<InstrItinClass itin>
648 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
649 class VLDQQWBPseudo<InstrItinClass itin>
650 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
651 (ins addrmode6:$addr, am6offset:$offset), itin,
653 class VLDQQWBfixedPseudo<InstrItinClass itin>
654 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
655 (ins addrmode6:$addr), itin,
657 class VLDQQWBregisterPseudo<InstrItinClass itin>
658 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
659 (ins addrmode6:$addr, rGPR:$offset), itin,
663 class VLDQQQQPseudo<InstrItinClass itin>
664 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
666 class VLDQQQQWBPseudo<InstrItinClass itin>
667 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
668 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
669 "$addr.addr = $wb, $src = $dst">;
671 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
673 // VLD1 : Vector Load (multiple single elements)
674 class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
675 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
676 (ins AddrMode:$Rn), IIC_VLD1,
677 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
680 let DecoderMethod = "DecodeVLDST1Instruction";
682 class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
683 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
684 (ins AddrMode:$Rn), IIC_VLD1x2,
685 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
687 let Inst{5-4} = Rn{5-4};
688 let DecoderMethod = "DecodeVLDST1Instruction";
691 def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>;
692 def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
693 def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
694 def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
696 def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>;
697 def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
698 def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
699 def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
701 // ...with address register writeback:
702 multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
703 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
704 (ins AddrMode:$Rn), IIC_VLD1u,
705 "vld1", Dt, "$Vd, $Rn!",
706 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
707 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
709 let DecoderMethod = "DecodeVLDST1Instruction";
711 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
712 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
713 "vld1", Dt, "$Vd, $Rn, $Rm",
714 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
716 let DecoderMethod = "DecodeVLDST1Instruction";
719 multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
720 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
721 (ins AddrMode:$Rn), IIC_VLD1x2u,
722 "vld1", Dt, "$Vd, $Rn!",
723 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
724 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
725 let Inst{5-4} = Rn{5-4};
726 let DecoderMethod = "DecodeVLDST1Instruction";
728 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
729 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
730 "vld1", Dt, "$Vd, $Rn, $Rm",
731 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
732 let Inst{5-4} = Rn{5-4};
733 let DecoderMethod = "DecodeVLDST1Instruction";
737 defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>;
738 defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
739 defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
740 defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
741 defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
742 defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
743 defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
744 defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
746 // ...with 3 registers
747 class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
748 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
749 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
750 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
753 let DecoderMethod = "DecodeVLDST1Instruction";
755 multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
756 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
757 (ins AddrMode:$Rn), IIC_VLD1x2u,
758 "vld1", Dt, "$Vd, $Rn!",
759 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
760 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
762 let DecoderMethod = "DecodeVLDST1Instruction";
764 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
765 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
766 "vld1", Dt, "$Vd, $Rn, $Rm",
767 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
769 let DecoderMethod = "DecodeVLDST1Instruction";
773 def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>;
774 def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
775 def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
776 def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
778 defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>;
779 defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
780 defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
781 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
783 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
784 def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
785 def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
787 // ...with 4 registers
788 class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
789 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
790 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
791 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
793 let Inst{5-4} = Rn{5-4};
794 let DecoderMethod = "DecodeVLDST1Instruction";
796 multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
797 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
798 (ins AddrMode:$Rn), IIC_VLD1x2u,
799 "vld1", Dt, "$Vd, $Rn!",
800 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
801 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
802 let Inst{5-4} = Rn{5-4};
803 let DecoderMethod = "DecodeVLDST1Instruction";
805 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
806 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
807 "vld1", Dt, "$Vd, $Rn, $Rm",
808 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
809 let Inst{5-4} = Rn{5-4};
810 let DecoderMethod = "DecodeVLDST1Instruction";
814 def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
815 def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
816 def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
817 def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
819 defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
820 defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
821 defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
822 defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
824 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
825 def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
826 def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
828 // VLD2 : Vector Load (multiple 2-element structures)
829 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
830 InstrItinClass itin, Operand AddrMode>
831 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
832 (ins AddrMode:$Rn), itin,
833 "vld2", Dt, "$Vd, $Rn", "", []> {
835 let Inst{5-4} = Rn{5-4};
836 let DecoderMethod = "DecodeVLDST2Instruction";
839 def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
840 addrmode6align64or128>, Sched<[WriteVLD2]>;
841 def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
842 addrmode6align64or128>, Sched<[WriteVLD2]>;
843 def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
844 addrmode6align64or128>, Sched<[WriteVLD2]>;
846 def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
847 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
848 def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
849 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
850 def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
851 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
853 def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
854 def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
855 def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
857 // ...with address register writeback:
858 multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
859 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
860 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
861 (ins AddrMode:$Rn), itin,
862 "vld2", Dt, "$Vd, $Rn!",
863 "$Rn.addr = $wb", []> {
864 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
865 let Inst{5-4} = Rn{5-4};
866 let DecoderMethod = "DecodeVLDST2Instruction";
868 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
869 (ins AddrMode:$Rn, rGPR:$Rm), itin,
870 "vld2", Dt, "$Vd, $Rn, $Rm",
871 "$Rn.addr = $wb", []> {
872 let Inst{5-4} = Rn{5-4};
873 let DecoderMethod = "DecodeVLDST2Instruction";
877 defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
878 addrmode6align64or128>, Sched<[WriteVLD2]>;
879 defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
880 addrmode6align64or128>, Sched<[WriteVLD2]>;
881 defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
882 addrmode6align64or128>, Sched<[WriteVLD2]>;
884 defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
885 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
886 defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
887 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
888 defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
889 addrmode6align64or128or256>, Sched<[WriteVLD4]>;
891 def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
892 def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
893 def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
894 def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
895 def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
896 def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
898 // ...with double-spaced registers
899 def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
900 addrmode6align64or128>, Sched<[WriteVLD2]>;
901 def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
902 addrmode6align64or128>, Sched<[WriteVLD2]>;
903 def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
904 addrmode6align64or128>, Sched<[WriteVLD2]>;
905 defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
906 addrmode6align64or128>, Sched<[WriteVLD2]>;
907 defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
908 addrmode6align64or128>, Sched<[WriteVLD2]>;
909 defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
910 addrmode6align64or128>, Sched<[WriteVLD2]>;
912 // VLD3 : Vector Load (multiple 3-element structures)
913 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
914 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
915 (ins addrmode6:$Rn), IIC_VLD3,
916 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
919 let DecoderMethod = "DecodeVLDST3Instruction";
922 def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
923 def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
924 def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
926 def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
927 def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
928 def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
930 // ...with address register writeback:
931 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
932 : NLdSt<0, 0b10, op11_8, op7_4,
933 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
934 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
935 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
936 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
938 let DecoderMethod = "DecodeVLDST3Instruction";
941 def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
942 def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
943 def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
945 def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
946 def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
947 def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
949 // ...with double-spaced registers:
950 def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
951 def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
952 def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
953 def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
954 def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
955 def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
957 def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
958 def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
959 def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
961 // ...alternate versions to be allocated odd register numbers:
962 def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
963 def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
964 def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
966 def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
967 def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
968 def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
970 // VLD4 : Vector Load (multiple 4-element structures)
971 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
972 : NLdSt<0, 0b10, op11_8, op7_4,
973 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
974 (ins addrmode6:$Rn), IIC_VLD4,
975 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
978 let Inst{5-4} = Rn{5-4};
979 let DecoderMethod = "DecodeVLDST4Instruction";
982 def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
983 def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
984 def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
986 def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
987 def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
988 def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
990 // ...with address register writeback:
991 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
992 : NLdSt<0, 0b10, op11_8, op7_4,
993 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
994 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
995 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
996 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
997 let Inst{5-4} = Rn{5-4};
998 let DecoderMethod = "DecodeVLDST4Instruction";
1001 def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
1002 def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
1003 def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
1005 def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1006 def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1007 def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1009 // ...with double-spaced registers:
1010 def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
1011 def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
1012 def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
1013 def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
1014 def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
1015 def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
1017 def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1018 def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1019 def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1021 // ...alternate versions to be allocated odd register numbers:
1022 def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1023 def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1024 def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
1026 def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1027 def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1028 def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
1030 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1032 // Classes for VLD*LN pseudo-instructions with multi-register operands.
1033 // These are expanded to real instructions after register allocation.
1034 class VLDQLNPseudo<InstrItinClass itin>
1035 : PseudoNLdSt<(outs QPR:$dst),
1036 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
1037 itin, "$src = $dst">;
1038 class VLDQLNWBPseudo<InstrItinClass itin>
1039 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
1040 (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
1041 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1042 class VLDQQLNPseudo<InstrItinClass itin>
1043 : PseudoNLdSt<(outs QQPR:$dst),
1044 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
1045 itin, "$src = $dst">;
1046 class VLDQQLNWBPseudo<InstrItinClass itin>
1047 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
1048 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
1049 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1050 class VLDQQQQLNPseudo<InstrItinClass itin>
1051 : PseudoNLdSt<(outs QQQQPR:$dst),
1052 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1053 itin, "$src = $dst">;
1054 class VLDQQQQLNWBPseudo<InstrItinClass itin>
1055 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
1056 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1057 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1059 // VLD1LN : Vector Load (single element to one lane)
1060 class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1062 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1063 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
1064 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1066 [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1067 (i32 (LoadOp addrmode6:$Rn)),
1070 let DecoderMethod = "DecodeVLD1LN";
1072 class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1074 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1075 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
1076 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1078 [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1079 (i32 (LoadOp addrmode6oneL32:$Rn)),
1080 imm:$lane))]>, Sched<[WriteVLD1]> {
1082 let DecoderMethod = "DecodeVLD1LN";
1084 class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
1085 Sched<[WriteVLD1]> {
1086 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
1087 (i32 (LoadOp addrmode6:$addr)),
1091 def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
1092 let Inst{7-5} = lane{2-0};
1094 def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
1095 let Inst{7-6} = lane{1-0};
1096 let Inst{5-4} = Rn{5-4};
1098 def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
1099 let Inst{7} = lane{0};
1100 let Inst{5-4} = Rn{5-4};
1103 def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
1104 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
1105 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
1107 def : Pat<(vector_insert (v2f32 DPR:$src),
1108 (f32 (load addrmode6:$addr)), imm:$lane),
1109 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1110 def : Pat<(vector_insert (v4f32 QPR:$src),
1111 (f32 (load addrmode6:$addr)), imm:$lane),
1112 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1114 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1116 // ...with address register writeback:
1117 class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1118 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
1119 (ins addrmode6:$Rn, am6offset:$Rm,
1120 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
1121 "\\{$Vd[$lane]\\}, $Rn$Rm",
1122 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1123 let DecoderMethod = "DecodeVLD1LN";
1126 def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
1127 let Inst{7-5} = lane{2-0};
1129 def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
1130 let Inst{7-6} = lane{1-0};
1131 let Inst{4} = Rn{4};
1133 def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
1134 let Inst{7} = lane{0};
1135 let Inst{5} = Rn{4};
1136 let Inst{4} = Rn{4};
1139 def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1140 def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1141 def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1143 // VLD2LN : Vector Load (single 2-element structure to one lane)
1144 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1145 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
1146 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
1147 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
1148 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
1150 let Inst{4} = Rn{4};
1151 let DecoderMethod = "DecodeVLD2LN";
1154 def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
1155 let Inst{7-5} = lane{2-0};
1157 def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
1158 let Inst{7-6} = lane{1-0};
1160 def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
1161 let Inst{7} = lane{0};
1164 def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1165 def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1166 def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1168 // ...with double-spaced registers:
1169 def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
1170 let Inst{7-6} = lane{1-0};
1172 def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
1173 let Inst{7} = lane{0};
1176 def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1177 def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1179 // ...with address register writeback:
1180 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1181 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1182 (ins addrmode6:$Rn, am6offset:$Rm,
1183 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
1184 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
1185 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
1186 let Inst{4} = Rn{4};
1187 let DecoderMethod = "DecodeVLD2LN";
1190 def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
1191 let Inst{7-5} = lane{2-0};
1193 def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
1194 let Inst{7-6} = lane{1-0};
1196 def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
1197 let Inst{7} = lane{0};
1200 def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1201 def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1202 def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1204 def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
1205 let Inst{7-6} = lane{1-0};
1207 def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
1208 let Inst{7} = lane{0};
1211 def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1212 def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1214 // VLD3LN : Vector Load (single 3-element structure to one lane)
1215 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1216 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1217 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
1218 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
1219 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
1220 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
1222 let DecoderMethod = "DecodeVLD3LN";
1225 def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
1226 let Inst{7-5} = lane{2-0};
1228 def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
1229 let Inst{7-6} = lane{1-0};
1231 def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
1232 let Inst{7} = lane{0};
1235 def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1236 def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1237 def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1239 // ...with double-spaced registers:
1240 def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
1241 let Inst{7-6} = lane{1-0};
1243 def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
1244 let Inst{7} = lane{0};
1247 def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1248 def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1250 // ...with address register writeback:
1251 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1252 : NLdStLn<1, 0b10, op11_8, op7_4,
1253 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1254 (ins addrmode6:$Rn, am6offset:$Rm,
1255 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1256 IIC_VLD3lnu, "vld3", Dt,
1257 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
1258 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
1259 []>, Sched<[WriteVLD2]> {
1260 let DecoderMethod = "DecodeVLD3LN";
1263 def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
1264 let Inst{7-5} = lane{2-0};
1266 def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
1267 let Inst{7-6} = lane{1-0};
1269 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
1270 let Inst{7} = lane{0};
1273 def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1274 def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1275 def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1277 def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
1278 let Inst{7-6} = lane{1-0};
1280 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
1281 let Inst{7} = lane{0};
1284 def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1285 def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1287 // VLD4LN : Vector Load (single 4-element structure to one lane)
1288 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1289 : NLdStLn<1, 0b10, op11_8, op7_4,
1290 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1291 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
1292 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
1293 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
1294 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
1295 Sched<[WriteVLD2]> {
1297 let Inst{4} = Rn{4};
1298 let DecoderMethod = "DecodeVLD4LN";
1301 def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
1302 let Inst{7-5} = lane{2-0};
1304 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
1305 let Inst{7-6} = lane{1-0};
1307 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
1308 let Inst{7} = lane{0};
1309 let Inst{5} = Rn{5};
1312 def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1313 def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1314 def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1316 // ...with double-spaced registers:
1317 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
1318 let Inst{7-6} = lane{1-0};
1320 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
1321 let Inst{7} = lane{0};
1322 let Inst{5} = Rn{5};
1325 def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1326 def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1328 // ...with address register writeback:
1329 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1330 : NLdStLn<1, 0b10, op11_8, op7_4,
1331 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1332 (ins addrmode6:$Rn, am6offset:$Rm,
1333 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1334 IIC_VLD4lnu, "vld4", Dt,
1335 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
1336 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
1338 let Inst{4} = Rn{4};
1339 let DecoderMethod = "DecodeVLD4LN" ;
1342 def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
1343 let Inst{7-5} = lane{2-0};
1345 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
1346 let Inst{7-6} = lane{1-0};
1348 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
1349 let Inst{7} = lane{0};
1350 let Inst{5} = Rn{5};
1353 def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1354 def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1355 def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1357 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
1358 let Inst{7-6} = lane{1-0};
1360 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
1361 let Inst{7} = lane{0};
1362 let Inst{5} = Rn{5};
1365 def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1366 def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1368 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1370 // VLD1DUP : Vector Load (single element to all lanes)
1371 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1373 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
1375 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
1376 [(set VecListOneDAllLanes:$Vd,
1377 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
1378 Sched<[WriteVLD2]> {
1380 let Inst{4} = Rn{4};
1381 let DecoderMethod = "DecodeVLD1DupInstruction";
1383 def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
1384 addrmode6dupalignNone>;
1385 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
1386 addrmode6dupalign16>;
1387 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
1388 addrmode6dupalign32>;
1390 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1391 (VLD1DUPd32 addrmode6:$addr)>;
1393 class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1395 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
1396 (ins AddrMode:$Rn), IIC_VLD1dup,
1397 "vld1", Dt, "$Vd, $Rn", "",
1398 [(set VecListDPairAllLanes:$Vd,
1399 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
1401 let Inst{4} = Rn{4};
1402 let DecoderMethod = "DecodeVLD1DupInstruction";
1405 def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
1406 addrmode6dupalignNone>;
1407 def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
1408 addrmode6dupalign16>;
1409 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
1410 addrmode6dupalign32>;
1412 def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1413 (VLD1DUPq32 addrmode6:$addr)>;
1415 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1416 // ...with address register writeback:
1417 multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1418 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1419 (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1420 (ins AddrMode:$Rn), IIC_VLD1dupu,
1421 "vld1", Dt, "$Vd, $Rn!",
1422 "$Rn.addr = $wb", []> {
1423 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1424 let Inst{4} = Rn{4};
1425 let DecoderMethod = "DecodeVLD1DupInstruction";
1427 def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1428 (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1429 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1430 "vld1", Dt, "$Vd, $Rn, $Rm",
1431 "$Rn.addr = $wb", []> {
1432 let Inst{4} = Rn{4};
1433 let DecoderMethod = "DecodeVLD1DupInstruction";
1436 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1437 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1438 (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1439 (ins AddrMode:$Rn), IIC_VLD1dupu,
1440 "vld1", Dt, "$Vd, $Rn!",
1441 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1442 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1443 let Inst{4} = Rn{4};
1444 let DecoderMethod = "DecodeVLD1DupInstruction";
1446 def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1447 (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1448 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1449 "vld1", Dt, "$Vd, $Rn, $Rm",
1450 "$Rn.addr = $wb", []> {
1451 let Inst{4} = Rn{4};
1452 let DecoderMethod = "DecodeVLD1DupInstruction";
1456 defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
1457 defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
1458 defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
1460 defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
1461 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
1462 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
1464 // VLD2DUP : Vector Load (single 2-element structure to all lanes)
1465 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
1466 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
1467 (ins AddrMode:$Rn), IIC_VLD2dup,
1468 "vld2", Dt, "$Vd, $Rn", "", []> {
1470 let Inst{4} = Rn{4};
1471 let DecoderMethod = "DecodeVLD2DupInstruction";
1474 def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes,
1475 addrmode6dupalign16>;
1476 def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
1477 addrmode6dupalign32>;
1478 def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
1479 addrmode6dupalign64>;
1481 // HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
1482 // "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
1483 // ...with double-spaced registers
1484 def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes,
1485 addrmode6dupalign16>;
1486 def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1487 addrmode6dupalign32>;
1488 def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1489 addrmode6dupalign64>;
1491 // ...with address register writeback:
1492 multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
1494 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
1495 (outs VdTy:$Vd, GPR:$wb),
1496 (ins AddrMode:$Rn), IIC_VLD2dupu,
1497 "vld2", Dt, "$Vd, $Rn!",
1498 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1499 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1500 let Inst{4} = Rn{4};
1501 let DecoderMethod = "DecodeVLD2DupInstruction";
1503 def _register : NLdSt<1, 0b10, 0b1101, op7_4,
1504 (outs VdTy:$Vd, GPR:$wb),
1505 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
1506 "vld2", Dt, "$Vd, $Rn, $Rm",
1507 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1508 let Inst{4} = Rn{4};
1509 let DecoderMethod = "DecodeVLD2DupInstruction";
1513 defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes,
1514 addrmode6dupalign16>;
1515 defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
1516 addrmode6dupalign32>;
1517 defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
1518 addrmode6dupalign64>;
1520 defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes,
1521 addrmode6dupalign16>;
1522 defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1523 addrmode6dupalign32>;
1524 defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1525 addrmode6dupalign64>;
1527 // VLD3DUP : Vector Load (single 3-element structure to all lanes)
1528 class VLD3DUP<bits<4> op7_4, string Dt>
1529 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1530 (ins addrmode6dup:$Rn), IIC_VLD3dup,
1531 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
1532 Sched<[WriteVLD2]> {
1535 let DecoderMethod = "DecodeVLD3DupInstruction";
1538 def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
1539 def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1540 def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1542 def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1543 def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1544 def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1546 // ...with double-spaced registers (not used for codegen):
1547 def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
1548 def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
1549 def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
1551 // ...with address register writeback:
1552 class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
1553 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1554 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1555 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1556 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1558 let DecoderMethod = "DecodeVLD3DupInstruction";
1561 def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>;
1562 def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
1563 def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
1565 def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>;
1566 def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
1567 def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
1569 def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1570 def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1571 def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1573 // VLD4DUP : Vector Load (single 4-element structure to all lanes)
1574 class VLD4DUP<bits<4> op7_4, string Dt>
1575 : NLdSt<1, 0b10, 0b1111, op7_4,
1576 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1577 (ins addrmode6dup:$Rn), IIC_VLD4dup,
1578 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1580 let Inst{4} = Rn{4};
1581 let DecoderMethod = "DecodeVLD4DupInstruction";
1584 def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
1585 def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1586 def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1588 def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1589 def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1590 def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1592 // ...with double-spaced registers (not used for codegen):
1593 def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
1594 def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
1595 def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1597 // ...with address register writeback:
1598 class VLD4DUPWB<bits<4> op7_4, string Dt>
1599 : NLdSt<1, 0b10, 0b1111, op7_4,
1600 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1601 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1602 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1603 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1604 let Inst{4} = Rn{4};
1605 let DecoderMethod = "DecodeVLD4DupInstruction";
1608 def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
1609 def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1610 def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1612 def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
1613 def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1614 def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1616 def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1617 def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1618 def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1620 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1622 let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
1624 // Classes for VST* pseudo-instructions with multi-register operands.
1625 // These are expanded to real instructions after register allocation.
1626 class VSTQPseudo<InstrItinClass itin>
1627 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1628 class VSTQWBPseudo<InstrItinClass itin>
1629 : PseudoNLdSt<(outs GPR:$wb),
1630 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1631 "$addr.addr = $wb">;
1632 class VSTQWBfixedPseudo<InstrItinClass itin>
1633 : PseudoNLdSt<(outs GPR:$wb),
1634 (ins addrmode6:$addr, QPR:$src), itin,
1635 "$addr.addr = $wb">;
1636 class VSTQWBregisterPseudo<InstrItinClass itin>
1637 : PseudoNLdSt<(outs GPR:$wb),
1638 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
1639 "$addr.addr = $wb">;
1640 class VSTQQPseudo<InstrItinClass itin>
1641 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1642 class VSTQQWBPseudo<InstrItinClass itin>
1643 : PseudoNLdSt<(outs GPR:$wb),
1644 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1645 "$addr.addr = $wb">;
1646 class VSTQQWBfixedPseudo<InstrItinClass itin>
1647 : PseudoNLdSt<(outs GPR:$wb),
1648 (ins addrmode6:$addr, QQPR:$src), itin,
1649 "$addr.addr = $wb">;
1650 class VSTQQWBregisterPseudo<InstrItinClass itin>
1651 : PseudoNLdSt<(outs GPR:$wb),
1652 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
1653 "$addr.addr = $wb">;
1655 class VSTQQQQPseudo<InstrItinClass itin>
1656 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1657 class VSTQQQQWBPseudo<InstrItinClass itin>
1658 : PseudoNLdSt<(outs GPR:$wb),
1659 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1660 "$addr.addr = $wb">;
1662 // VST1 : Vector Store (multiple single elements)
1663 class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
1664 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
1665 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
1667 let Inst{4} = Rn{4};
1668 let DecoderMethod = "DecodeVLDST1Instruction";
1670 class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
1671 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
1672 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
1674 let Inst{5-4} = Rn{5-4};
1675 let DecoderMethod = "DecodeVLDST1Instruction";
1678 def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>;
1679 def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>;
1680 def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>;
1681 def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>;
1683 def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>;
1684 def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
1685 def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
1686 def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
1688 // ...with address register writeback:
1689 multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1690 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
1691 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
1692 "vst1", Dt, "$Vd, $Rn!",
1693 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1694 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1695 let Inst{4} = Rn{4};
1696 let DecoderMethod = "DecodeVLDST1Instruction";
1698 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
1699 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
1701 "vst1", Dt, "$Vd, $Rn, $Rm",
1702 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1703 let Inst{4} = Rn{4};
1704 let DecoderMethod = "DecodeVLDST1Instruction";
1707 multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1708 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1709 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
1710 "vst1", Dt, "$Vd, $Rn!",
1711 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1712 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1713 let Inst{5-4} = Rn{5-4};
1714 let DecoderMethod = "DecodeVLDST1Instruction";
1716 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1717 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
1719 "vst1", Dt, "$Vd, $Rn, $Rm",
1720 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1721 let Inst{5-4} = Rn{5-4};
1722 let DecoderMethod = "DecodeVLDST1Instruction";
1726 defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>;
1727 defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
1728 defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
1729 defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
1731 defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
1732 defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
1733 defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
1734 defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
1736 // ...with 3 registers
1737 class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
1738 : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1739 (ins AddrMode:$Rn, VecListThreeD:$Vd),
1740 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
1742 let Inst{4} = Rn{4};
1743 let DecoderMethod = "DecodeVLDST1Instruction";
1745 multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1746 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1747 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
1748 "vst1", Dt, "$Vd, $Rn!",
1749 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1750 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1751 let Inst{5-4} = Rn{5-4};
1752 let DecoderMethod = "DecodeVLDST1Instruction";
1754 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1755 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
1757 "vst1", Dt, "$Vd, $Rn, $Rm",
1758 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1759 let Inst{5-4} = Rn{5-4};
1760 let DecoderMethod = "DecodeVLDST1Instruction";
1764 def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>;
1765 def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
1766 def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
1767 def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
1769 defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>;
1770 defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
1771 defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
1772 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
1774 def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1775 def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1776 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1778 // ...with 4 registers
1779 class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
1780 : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1781 (ins AddrMode:$Rn, VecListFourD:$Vd),
1782 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
1783 []>, Sched<[WriteVST4]> {
1785 let Inst{5-4} = Rn{5-4};
1786 let DecoderMethod = "DecodeVLDST1Instruction";
1788 multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1789 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1790 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
1791 "vst1", Dt, "$Vd, $Rn!",
1792 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1793 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1794 let Inst{5-4} = Rn{5-4};
1795 let DecoderMethod = "DecodeVLDST1Instruction";
1797 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1798 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1800 "vst1", Dt, "$Vd, $Rn, $Rm",
1801 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1802 let Inst{5-4} = Rn{5-4};
1803 let DecoderMethod = "DecodeVLDST1Instruction";
1807 def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
1808 def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
1809 def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
1810 def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
1812 defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1813 defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1814 defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1815 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
1817 def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1818 def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1819 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1821 // VST2 : Vector Store (multiple 2-element structures)
1822 class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
1823 InstrItinClass itin, Operand AddrMode>
1824 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
1825 itin, "vst2", Dt, "$Vd, $Rn", "", []> {
1827 let Inst{5-4} = Rn{5-4};
1828 let DecoderMethod = "DecodeVLDST2Instruction";
1831 def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
1832 addrmode6align64or128>, Sched<[WriteVST2]>;
1833 def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
1834 addrmode6align64or128>, Sched<[WriteVST2]>;
1835 def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
1836 addrmode6align64or128>, Sched<[WriteVST2]>;
1838 def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
1839 addrmode6align64or128or256>, Sched<[WriteVST4]>;
1840 def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
1841 addrmode6align64or128or256>, Sched<[WriteVST4]>;
1842 def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
1843 addrmode6align64or128or256>, Sched<[WriteVST4]>;
1845 def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1846 def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1847 def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1849 // ...with address register writeback:
1850 multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
1851 RegisterOperand VdTy, Operand AddrMode> {
1852 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1853 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
1854 "vst2", Dt, "$Vd, $Rn!",
1855 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1856 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1857 let Inst{5-4} = Rn{5-4};
1858 let DecoderMethod = "DecodeVLDST2Instruction";
1860 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1861 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
1862 "vst2", Dt, "$Vd, $Rn, $Rm",
1863 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1864 let Inst{5-4} = Rn{5-4};
1865 let DecoderMethod = "DecodeVLDST2Instruction";
1868 multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1869 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1870 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
1871 "vst2", Dt, "$Vd, $Rn!",
1872 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1873 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1874 let Inst{5-4} = Rn{5-4};
1875 let DecoderMethod = "DecodeVLDST2Instruction";
1877 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1878 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1880 "vst2", Dt, "$Vd, $Rn, $Rm",
1881 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1882 let Inst{5-4} = Rn{5-4};
1883 let DecoderMethod = "DecodeVLDST2Instruction";
1887 defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair,
1888 addrmode6align64or128>;
1889 defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
1890 addrmode6align64or128>;
1891 defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
1892 addrmode6align64or128>;
1894 defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1895 defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1896 defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1898 def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1899 def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1900 def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1901 def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1902 def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1903 def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1905 // ...with double-spaced registers
1906 def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
1907 addrmode6align64or128>;
1908 def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
1909 addrmode6align64or128>;
1910 def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
1911 addrmode6align64or128>;
1912 defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced,
1913 addrmode6align64or128>;
1914 defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
1915 addrmode6align64or128>;
1916 defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
1917 addrmode6align64or128>;
1919 // VST3 : Vector Store (multiple 3-element structures)
1920 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1921 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1922 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1923 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
1925 let Inst{4} = Rn{4};
1926 let DecoderMethod = "DecodeVLDST3Instruction";
1929 def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
1930 def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
1931 def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
1933 def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1934 def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1935 def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1937 // ...with address register writeback:
1938 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1939 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1940 (ins addrmode6:$Rn, am6offset:$Rm,
1941 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
1942 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
1943 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1944 let Inst{4} = Rn{4};
1945 let DecoderMethod = "DecodeVLDST3Instruction";
1948 def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
1949 def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
1950 def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
1952 def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1953 def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1954 def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1956 // ...with double-spaced registers:
1957 def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
1958 def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
1959 def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
1960 def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
1961 def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
1962 def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
1964 def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1965 def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1966 def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1968 // ...alternate versions to be allocated odd register numbers:
1969 def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1970 def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1971 def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1973 def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1974 def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1975 def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
1977 // VST4 : Vector Store (multiple 4-element structures)
1978 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
1979 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1980 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
1981 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
1982 "", []>, Sched<[WriteVST4]> {
1984 let Inst{5-4} = Rn{5-4};
1985 let DecoderMethod = "DecodeVLDST4Instruction";
1988 def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
1989 def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
1990 def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
1992 def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
1993 def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
1994 def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
1996 // ...with address register writeback:
1997 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1998 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1999 (ins addrmode6:$Rn, am6offset:$Rm,
2000 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
2001 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
2002 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
2003 let Inst{5-4} = Rn{5-4};
2004 let DecoderMethod = "DecodeVLDST4Instruction";
2007 def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
2008 def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
2009 def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
2011 def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2012 def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2013 def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2015 // ...with double-spaced registers:
2016 def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
2017 def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
2018 def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
2019 def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
2020 def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
2021 def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
2023 def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2024 def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2025 def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2027 // ...alternate versions to be allocated odd register numbers:
2028 def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2029 def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2030 def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2032 def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2033 def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2034 def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2036 } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2038 // Classes for VST*LN pseudo-instructions with multi-register operands.
2039 // These are expanded to real instructions after register allocation.
2040 class VSTQLNPseudo<InstrItinClass itin>
2041 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
2043 class VSTQLNWBPseudo<InstrItinClass itin>
2044 : PseudoNLdSt<(outs GPR:$wb),
2045 (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
2046 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2047 class VSTQQLNPseudo<InstrItinClass itin>
2048 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
2050 class VSTQQLNWBPseudo<InstrItinClass itin>
2051 : PseudoNLdSt<(outs GPR:$wb),
2052 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
2053 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2054 class VSTQQQQLNPseudo<InstrItinClass itin>
2055 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
2057 class VSTQQQQLNWBPseudo<InstrItinClass itin>
2058 : PseudoNLdSt<(outs GPR:$wb),
2059 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
2060 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2062 // VST1LN : Vector Store (single element from one lane)
2063 class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2064 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
2065 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2066 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
2067 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
2068 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
2069 Sched<[WriteVST1]> {
2071 let DecoderMethod = "DecodeVST1LN";
2073 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2074 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
2075 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2079 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
2080 NEONvgetlaneu, addrmode6> {
2081 let Inst{7-5} = lane{2-0};
2083 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
2084 NEONvgetlaneu, addrmode6> {
2085 let Inst{7-6} = lane{1-0};
2086 let Inst{4} = Rn{4};
2089 def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
2091 let Inst{7} = lane{0};
2092 let Inst{5-4} = Rn{5-4};
2095 def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
2096 def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
2097 def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
2099 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
2100 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
2101 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
2102 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2104 // ...with address register writeback:
2105 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2106 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
2107 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2108 (ins AdrMode:$Rn, am6offset:$Rm,
2109 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
2110 "\\{$Vd[$lane]\\}, $Rn$Rm",
2112 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
2113 AdrMode:$Rn, am6offset:$Rm))]>,
2114 Sched<[WriteVST1]> {
2115 let DecoderMethod = "DecodeVST1LN";
2117 class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2118 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
2119 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2120 addrmode6:$addr, am6offset:$offset))];
2123 def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
2124 NEONvgetlaneu, addrmode6> {
2125 let Inst{7-5} = lane{2-0};
2127 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
2128 NEONvgetlaneu, addrmode6> {
2129 let Inst{7-6} = lane{1-0};
2130 let Inst{4} = Rn{4};
2132 def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
2133 extractelt, addrmode6oneL32> {
2134 let Inst{7} = lane{0};
2135 let Inst{5-4} = Rn{5-4};
2138 def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
2139 def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
2140 def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
2142 let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
2144 // VST2LN : Vector Store (single 2-element structure from one lane)
2145 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2146 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2147 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
2148 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
2149 "", []>, Sched<[WriteVST1]> {
2151 let Inst{4} = Rn{4};
2152 let DecoderMethod = "DecodeVST2LN";
2155 def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
2156 let Inst{7-5} = lane{2-0};
2158 def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
2159 let Inst{7-6} = lane{1-0};
2161 def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
2162 let Inst{7} = lane{0};
2165 def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2166 def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2167 def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2169 // ...with double-spaced registers:
2170 def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
2171 let Inst{7-6} = lane{1-0};
2172 let Inst{4} = Rn{4};
2174 def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
2175 let Inst{7} = lane{0};
2176 let Inst{4} = Rn{4};
2179 def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2180 def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2182 // ...with address register writeback:
2183 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2184 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2185 (ins addrmode6:$Rn, am6offset:$Rm,
2186 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
2187 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
2188 "$Rn.addr = $wb", []> {
2189 let Inst{4} = Rn{4};
2190 let DecoderMethod = "DecodeVST2LN";
2193 def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
2194 let Inst{7-5} = lane{2-0};
2196 def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
2197 let Inst{7-6} = lane{1-0};
2199 def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
2200 let Inst{7} = lane{0};
2203 def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2204 def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2205 def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2207 def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
2208 let Inst{7-6} = lane{1-0};
2210 def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
2211 let Inst{7} = lane{0};
2214 def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2215 def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2217 // VST3LN : Vector Store (single 3-element structure from one lane)
2218 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2219 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2220 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
2221 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
2222 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
2223 Sched<[WriteVST2]> {
2225 let DecoderMethod = "DecodeVST3LN";
2228 def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
2229 let Inst{7-5} = lane{2-0};
2231 def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
2232 let Inst{7-6} = lane{1-0};
2234 def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
2235 let Inst{7} = lane{0};
2238 def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2239 def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2240 def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2242 // ...with double-spaced registers:
2243 def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
2244 let Inst{7-6} = lane{1-0};
2246 def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
2247 let Inst{7} = lane{0};
2250 def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2251 def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2253 // ...with address register writeback:
2254 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2255 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2256 (ins addrmode6:$Rn, am6offset:$Rm,
2257 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
2258 IIC_VST3lnu, "vst3", Dt,
2259 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
2260 "$Rn.addr = $wb", []> {
2261 let DecoderMethod = "DecodeVST3LN";
2264 def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
2265 let Inst{7-5} = lane{2-0};
2267 def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
2268 let Inst{7-6} = lane{1-0};
2270 def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
2271 let Inst{7} = lane{0};
2274 def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2275 def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2276 def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2278 def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
2279 let Inst{7-6} = lane{1-0};
2281 def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
2282 let Inst{7} = lane{0};
2285 def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2286 def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2288 // VST4LN : Vector Store (single 4-element structure from one lane)
2289 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2290 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2291 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
2292 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
2293 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
2294 "", []>, Sched<[WriteVST2]> {
2296 let Inst{4} = Rn{4};
2297 let DecoderMethod = "DecodeVST4LN";
2300 def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
2301 let Inst{7-5} = lane{2-0};
2303 def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
2304 let Inst{7-6} = lane{1-0};
2306 def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
2307 let Inst{7} = lane{0};
2308 let Inst{5} = Rn{5};
2311 def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2312 def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2313 def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2315 // ...with double-spaced registers:
2316 def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
2317 let Inst{7-6} = lane{1-0};
2319 def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
2320 let Inst{7} = lane{0};
2321 let Inst{5} = Rn{5};
2324 def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2325 def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2327 // ...with address register writeback:
2328 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2329 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2330 (ins addrmode6:$Rn, am6offset:$Rm,
2331 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
2332 IIC_VST4lnu, "vst4", Dt,
2333 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
2334 "$Rn.addr = $wb", []> {
2335 let Inst{4} = Rn{4};
2336 let DecoderMethod = "DecodeVST4LN";
2339 def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
2340 let Inst{7-5} = lane{2-0};
2342 def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
2343 let Inst{7-6} = lane{1-0};
2345 def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
2346 let Inst{7} = lane{0};
2347 let Inst{5} = Rn{5};
2350 def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2351 def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2352 def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2354 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
2355 let Inst{7-6} = lane{1-0};
2357 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
2358 let Inst{7} = lane{0};
2359 let Inst{5} = Rn{5};
2362 def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2363 def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2365 } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2367 // Use vld1/vst1 for unaligned f64 load / store
2368 def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
2369 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
2370 def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
2371 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
2372 def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
2373 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
2374 def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
2375 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
2376 def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
2377 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
2378 def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
2379 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
2381 // Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
2382 // load / store if it's legal.
2383 def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
2384 (VLD1q64 addrmode6:$addr)>;
2385 def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2386 (VST1q64 addrmode6:$addr, QPR:$value)>;
2387 def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
2388 (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>;
2389 def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2390 (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
2391 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
2392 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
2393 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2394 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
2395 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
2396 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>;
2397 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2398 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
2400 //===----------------------------------------------------------------------===//
2401 // NEON pattern fragments
2402 //===----------------------------------------------------------------------===//
2404 // Extract D sub-registers of Q registers.
2405 def DSubReg_i8_reg : SDNodeXForm<imm, [{
2406 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2407 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N),
2410 def DSubReg_i16_reg : SDNodeXForm<imm, [{
2411 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2412 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N),
2415 def DSubReg_i32_reg : SDNodeXForm<imm, [{
2416 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2417 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N),
2420 def DSubReg_f64_reg : SDNodeXForm<imm, [{
2421 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2422 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N),
2426 // Extract S sub-registers of Q/D registers.
2427 def SSubReg_f32_reg : SDNodeXForm<imm, [{
2428 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
2429 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N),
2433 // Translate lane numbers from Q registers to D subregs.
2434 def SubReg_i8_lane : SDNodeXForm<imm, [{
2435 return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
2437 def SubReg_i16_lane : SDNodeXForm<imm, [{
2438 return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32);
2440 def SubReg_i32_lane : SDNodeXForm<imm, [{
2441 return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32);
2444 //===----------------------------------------------------------------------===//
2445 // Instruction Classes
2446 //===----------------------------------------------------------------------===//
2448 // Basic 2-register operations: double- and quad-register.
2449 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2450 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2451 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2452 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2453 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
2454 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
2455 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2456 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2457 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2458 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2459 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
2460 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
2462 // Basic 2-register intrinsics, both double- and quad-register.
2463 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2464 bits<2> op17_16, bits<5> op11_7, bit op4,
2465 InstrItinClass itin, string OpcodeStr, string Dt,
2466 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2467 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2468 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2469 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2470 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2471 bits<2> op17_16, bits<5> op11_7, bit op4,
2472 InstrItinClass itin, string OpcodeStr, string Dt,
2473 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2474 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2475 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2476 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2478 // Same as above, but not predicated.
2479 class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2480 InstrItinClass itin, string OpcodeStr, string Dt,
2481 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2482 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
2483 itin, OpcodeStr, Dt,
2484 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2486 class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2487 InstrItinClass itin, string OpcodeStr, string Dt,
2488 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2489 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
2490 itin, OpcodeStr, Dt,
2491 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2493 // Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
2494 class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2495 bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2496 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2497 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
2498 itin, OpcodeStr, Dt,
2499 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2501 // Same as N2VQIntXnp but with Vd as a src register.
2502 class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2503 bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2504 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2505 : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
2506 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
2507 itin, OpcodeStr, Dt,
2508 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
2509 let Constraints = "$src = $Vd";
2512 // Narrow 2-register operations.
2513 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2514 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2515 InstrItinClass itin, string OpcodeStr, string Dt,
2516 ValueType TyD, ValueType TyQ, SDNode OpNode>
2517 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2518 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2519 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
2521 // Narrow 2-register intrinsics.
2522 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2523 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2524 InstrItinClass itin, string OpcodeStr, string Dt,
2525 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
2526 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2527 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2528 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
2530 // Long 2-register operations (currently only used for VMOVL).
2531 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2532 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2533 InstrItinClass itin, string OpcodeStr, string Dt,
2534 ValueType TyQ, ValueType TyD, SDNode OpNode>
2535 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2536 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2537 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
2539 // Long 2-register intrinsics.
2540 class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2541 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2542 InstrItinClass itin, string OpcodeStr, string Dt,
2543 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2544 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2545 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2546 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
2548 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
2549 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
2550 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
2551 (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
2552 OpcodeStr, Dt, "$Vd, $Vm",
2553 "$src1 = $Vd, $src2 = $Vm", []>;
2554 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
2555 InstrItinClass itin, string OpcodeStr, string Dt>
2556 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
2557 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
2558 "$src1 = $Vd, $src2 = $Vm", []>;
2560 // Basic 3-register operations: double- and quad-register.
2561 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2562 InstrItinClass itin, string OpcodeStr, string Dt,
2563 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2564 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2565 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2566 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2567 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2568 // All of these have a two-operand InstAlias.
2569 let TwoOperandAliasConstraint = "$Vn = $Vd";
2570 let isCommutable = Commutable;
2572 // Same as N3VD but no data type.
2573 class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2574 InstrItinClass itin, string OpcodeStr,
2575 ValueType ResTy, ValueType OpTy,
2576 SDNode OpNode, bit Commutable>
2577 : N3VX<op24, op23, op21_20, op11_8, 0, op4,
2578 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2579 OpcodeStr, "$Vd, $Vn, $Vm", "",
2580 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
2581 // All of these have a two-operand InstAlias.
2582 let TwoOperandAliasConstraint = "$Vn = $Vd";
2583 let isCommutable = Commutable;
2586 class N3VDSL<bits<2> op21_20, bits<4> op11_8,
2587 InstrItinClass itin, string OpcodeStr, string Dt,
2588 ValueType Ty, SDNode ShOp>
2589 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2590 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2591 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2593 (Ty (ShOp (Ty DPR:$Vn),
2594 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2595 // All of these have a two-operand InstAlias.
2596 let TwoOperandAliasConstraint = "$Vn = $Vd";
2597 let isCommutable = 0;
2599 class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
2600 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
2601 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2602 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2603 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2605 (Ty (ShOp (Ty DPR:$Vn),
2606 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2607 // All of these have a two-operand InstAlias.
2608 let TwoOperandAliasConstraint = "$Vn = $Vd";
2609 let isCommutable = 0;
2612 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2613 InstrItinClass itin, string OpcodeStr, string Dt,
2614 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2615 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2616 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2617 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2618 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2619 // All of these have a two-operand InstAlias.
2620 let TwoOperandAliasConstraint = "$Vn = $Vd";
2621 let isCommutable = Commutable;
2623 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2624 InstrItinClass itin, string OpcodeStr,
2625 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2626 : N3VX<op24, op23, op21_20, op11_8, 1, op4,
2627 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2628 OpcodeStr, "$Vd, $Vn, $Vm", "",
2629 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
2630 // All of these have a two-operand InstAlias.
2631 let TwoOperandAliasConstraint = "$Vn = $Vd";
2632 let isCommutable = Commutable;
2634 class N3VQSL<bits<2> op21_20, bits<4> op11_8,
2635 InstrItinClass itin, string OpcodeStr, string Dt,
2636 ValueType ResTy, ValueType OpTy, SDNode ShOp>
2637 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2638 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2639 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2640 [(set (ResTy QPR:$Vd),
2641 (ResTy (ShOp (ResTy QPR:$Vn),
2642 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2644 // All of these have a two-operand InstAlias.
2645 let TwoOperandAliasConstraint = "$Vn = $Vd";
2646 let isCommutable = 0;
2648 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
2649 ValueType ResTy, ValueType OpTy, SDNode ShOp>
2650 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2651 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2652 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2653 [(set (ResTy QPR:$Vd),
2654 (ResTy (ShOp (ResTy QPR:$Vn),
2655 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2657 // All of these have a two-operand InstAlias.
2658 let TwoOperandAliasConstraint = "$Vn = $Vd";
2659 let isCommutable = 0;
2662 // Basic 3-register intrinsics, both double- and quad-register.
2663 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2664 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2665 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2666 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2667 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
2668 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2669 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2670 // All of these have a two-operand InstAlias.
2671 let TwoOperandAliasConstraint = "$Vn = $Vd";
2672 let isCommutable = Commutable;
2675 class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2676 bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2677 string Dt, ValueType ResTy, ValueType OpTy,
2678 SDPatternOperator IntOp, bit Commutable>
2679 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2680 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
2681 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2683 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2684 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2685 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2686 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2687 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2689 (Ty (IntOp (Ty DPR:$Vn),
2690 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2692 let isCommutable = 0;
2695 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2696 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2697 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2698 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2699 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2701 (Ty (IntOp (Ty DPR:$Vn),
2702 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2703 let isCommutable = 0;
2705 class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2706 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2707 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2708 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2709 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2710 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2711 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2712 let TwoOperandAliasConstraint = "$Vm = $Vd";
2713 let isCommutable = 0;
2716 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2717 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2718 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2719 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2720 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2721 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2722 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2723 // All of these have a two-operand InstAlias.
2724 let TwoOperandAliasConstraint = "$Vn = $Vd";
2725 let isCommutable = Commutable;
2728 class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2729 bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2730 string Dt, ValueType ResTy, ValueType OpTy,
2731 SDPatternOperator IntOp, bit Commutable>
2732 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2733 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
2734 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2736 // Same as N3VQIntnp but with Vd as a src register.
2737 class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2738 bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2739 string Dt, ValueType ResTy, ValueType OpTy,
2740 SDPatternOperator IntOp, bit Commutable>
2741 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2742 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
2743 f, itin, OpcodeStr, Dt,
2744 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
2745 (OpTy QPR:$Vm))))]> {
2746 let Constraints = "$src = $Vd";
2749 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2750 string OpcodeStr, string Dt,
2751 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2752 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2753 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2754 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2755 [(set (ResTy QPR:$Vd),
2756 (ResTy (IntOp (ResTy QPR:$Vn),
2757 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2759 let isCommutable = 0;
2761 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2762 string OpcodeStr, string Dt,
2763 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2764 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2765 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2766 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2767 [(set (ResTy QPR:$Vd),
2768 (ResTy (IntOp (ResTy QPR:$Vn),
2769 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2771 let isCommutable = 0;
2773 class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2774 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2775 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2776 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2777 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2778 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2779 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2780 let TwoOperandAliasConstraint = "$Vm = $Vd";
2781 let isCommutable = 0;
2784 // Multiply-Add/Sub operations: double- and quad-register.
2785 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2786 InstrItinClass itin, string OpcodeStr, string Dt,
2787 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2788 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2789 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2790 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2791 [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2792 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2794 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2795 string OpcodeStr, string Dt,
2796 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2797 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2799 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2801 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2803 (Ty (ShOp (Ty DPR:$src1),
2805 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2807 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2808 string OpcodeStr, string Dt,
2809 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2810 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2812 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2814 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2816 (Ty (ShOp (Ty DPR:$src1),
2818 (Ty (NEONvduplane (Ty DPR_8:$Vm),
2821 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2822 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2823 SDPatternOperator MulOp, SDPatternOperator OpNode>
2824 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2825 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2826 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2827 [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2828 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2829 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2830 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2831 SDPatternOperator MulOp, SDPatternOperator ShOp>
2832 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2834 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2836 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2837 [(set (ResTy QPR:$Vd),
2838 (ResTy (ShOp (ResTy QPR:$src1),
2839 (ResTy (MulOp QPR:$Vn,
2840 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2842 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2843 string OpcodeStr, string Dt,
2844 ValueType ResTy, ValueType OpTy,
2845 SDPatternOperator MulOp, SDPatternOperator ShOp>
2846 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2848 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2850 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2851 [(set (ResTy QPR:$Vd),
2852 (ResTy (ShOp (ResTy QPR:$src1),
2853 (ResTy (MulOp QPR:$Vn,
2854 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2857 // Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2858 class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2859 InstrItinClass itin, string OpcodeStr, string Dt,
2860 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2861 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2862 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2863 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2864 [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2865 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2866 class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2867 InstrItinClass itin, string OpcodeStr, string Dt,
2868 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2869 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2870 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2871 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2872 [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2873 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2875 // Neon 3-argument intrinsics, both double- and quad-register.
2876 // The destination register is also used as the first source operand register.
2877 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2878 InstrItinClass itin, string OpcodeStr, string Dt,
2879 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2880 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2881 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2882 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2883 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2884 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2885 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2886 InstrItinClass itin, string OpcodeStr, string Dt,
2887 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2888 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2889 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2890 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2891 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2892 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2894 // Long Multiply-Add/Sub operations.
2895 class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2896 InstrItinClass itin, string OpcodeStr, string Dt,
2897 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2898 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2899 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2900 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2901 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2902 (TyQ (MulOp (TyD DPR:$Vn),
2903 (TyD DPR:$Vm)))))]>;
2904 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2905 InstrItinClass itin, string OpcodeStr, string Dt,
2906 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2907 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2908 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2910 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2912 (OpNode (TyQ QPR:$src1),
2913 (TyQ (MulOp (TyD DPR:$Vn),
2914 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
2916 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2917 InstrItinClass itin, string OpcodeStr, string Dt,
2918 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2919 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2920 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2922 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2924 (OpNode (TyQ QPR:$src1),
2925 (TyQ (MulOp (TyD DPR:$Vn),
2926 (TyD (NEONvduplane (TyD DPR_8:$Vm),
2929 // Long Intrinsic-Op vector operations with explicit extend (VABAL).
2930 class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2931 InstrItinClass itin, string OpcodeStr, string Dt,
2932 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
2934 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2935 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2936 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2937 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2938 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
2939 (TyD DPR:$Vm)))))))]>;
2941 // Neon Long 3-argument intrinsic. The destination register is
2942 // a quad-register and is also used as the first source operand register.
2943 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2944 InstrItinClass itin, string OpcodeStr, string Dt,
2945 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2946 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2947 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2948 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2950 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
2951 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2952 string OpcodeStr, string Dt,
2953 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2954 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2956 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2958 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2959 [(set (ResTy QPR:$Vd),
2960 (ResTy (IntOp (ResTy QPR:$src1),
2962 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2964 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2965 InstrItinClass itin, string OpcodeStr, string Dt,
2966 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2967 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2969 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2971 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2972 [(set (ResTy QPR:$Vd),
2973 (ResTy (IntOp (ResTy QPR:$src1),
2975 (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
2978 // Narrowing 3-register intrinsics.
2979 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2980 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
2981 SDPatternOperator IntOp, bit Commutable>
2982 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2983 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
2984 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2985 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
2986 let isCommutable = Commutable;
2989 // Long 3-register operations.
2990 class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2991 InstrItinClass itin, string OpcodeStr, string Dt,
2992 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
2993 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2994 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2995 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2996 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
2997 let isCommutable = Commutable;
3000 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
3001 InstrItinClass itin, string OpcodeStr, string Dt,
3002 ValueType TyQ, ValueType TyD, SDNode OpNode>
3003 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3004 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3005 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3007 (TyQ (OpNode (TyD DPR:$Vn),
3008 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
3009 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3010 InstrItinClass itin, string OpcodeStr, string Dt,
3011 ValueType TyQ, ValueType TyD, SDNode OpNode>
3012 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3013 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3014 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3016 (TyQ (OpNode (TyD DPR:$Vn),
3017 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
3019 // Long 3-register operations with explicitly extended operands.
3020 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3021 InstrItinClass itin, string OpcodeStr, string Dt,
3022 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
3024 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3025 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3026 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3027 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
3028 (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3029 let isCommutable = Commutable;
3032 // Long 3-register intrinsics with explicit extend (VABDL).
3033 class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3034 InstrItinClass itin, string OpcodeStr, string Dt,
3035 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3037 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3038 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3039 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3040 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3041 (TyD DPR:$Vm))))))]> {
3042 let isCommutable = Commutable;
3045 // Long 3-register intrinsics.
3046 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3047 InstrItinClass itin, string OpcodeStr, string Dt,
3048 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
3049 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3050 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3051 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3052 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3053 let isCommutable = Commutable;
3056 // Same as above, but not predicated.
3057 class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
3058 bit op4, InstrItinClass itin, string OpcodeStr,
3059 string Dt, ValueType ResTy, ValueType OpTy,
3060 SDPatternOperator IntOp, bit Commutable>
3061 : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
3062 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
3063 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
3065 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3066 string OpcodeStr, string Dt,
3067 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3068 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3069 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3070 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3071 [(set (ResTy QPR:$Vd),
3072 (ResTy (IntOp (OpTy DPR:$Vn),
3073 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
3075 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3076 InstrItinClass itin, string OpcodeStr, string Dt,
3077 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3078 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3079 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3080 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3081 [(set (ResTy QPR:$Vd),
3082 (ResTy (IntOp (OpTy DPR:$Vn),
3083 (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
3086 // Wide 3-register operations.
3087 class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3088 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
3089 SDNode OpNode, SDNode ExtOp, bit Commutable>
3090 : N3V<op24, op23, op21_20, op11_8, 0, op4,
3091 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
3092 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3093 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
3094 (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3095 // All of these have a two-operand InstAlias.
3096 let TwoOperandAliasConstraint = "$Vn = $Vd";
3097 let isCommutable = Commutable;
3100 // Pairwise long 2-register intrinsics, both double- and quad-register.
3101 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3102 bits<2> op17_16, bits<5> op11_7, bit op4,
3103 string OpcodeStr, string Dt,
3104 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3105 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
3106 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3107 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
3108 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3109 bits<2> op17_16, bits<5> op11_7, bit op4,
3110 string OpcodeStr, string Dt,
3111 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3112 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
3113 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3114 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
3116 // Pairwise long 2-register accumulate intrinsics,
3117 // both double- and quad-register.
3118 // The destination register is also used as the first source operand register.
3119 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3120 bits<2> op17_16, bits<5> op11_7, bit op4,
3121 string OpcodeStr, string Dt,
3122 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3123 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
3124 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
3125 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3126 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
3127 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3128 bits<2> op17_16, bits<5> op11_7, bit op4,
3129 string OpcodeStr, string Dt,
3130 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3131 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
3132 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
3133 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3134 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
3136 // Shift by immediate,
3137 // both double- and quad-register.
3138 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3139 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3140 Format f, InstrItinClass itin, Operand ImmTy,
3141 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3142 : N2VImm<op24, op23, op11_8, op7, 0, op4,
3143 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
3144 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3145 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
3146 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3147 Format f, InstrItinClass itin, Operand ImmTy,
3148 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3149 : N2VImm<op24, op23, op11_8, op7, 1, op4,
3150 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
3151 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3152 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
3155 // Long shift by immediate.
3156 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3157 string OpcodeStr, string Dt,
3158 ValueType ResTy, ValueType OpTy, Operand ImmTy,
3159 SDPatternOperator OpNode>
3160 : N2VImm<op24, op23, op11_8, op7, op6, op4,
3161 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
3162 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3163 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
3165 // Narrow shift by immediate.
3166 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3167 InstrItinClass itin, string OpcodeStr, string Dt,
3168 ValueType ResTy, ValueType OpTy, Operand ImmTy,
3169 SDPatternOperator OpNode>
3170 : N2VImm<op24, op23, op11_8, op7, op6, op4,
3171 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
3172 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3173 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
3174 (i32 ImmTy:$SIMM))))]>;
3176 // Shift right by immediate and accumulate,
3177 // both double- and quad-register.
3178 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3179 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3180 Operand ImmTy, string OpcodeStr, string Dt,
3181 ValueType Ty, SDNode ShOp>
3182 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3183 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3184 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3185 [(set DPR:$Vd, (Ty (add DPR:$src1,
3186 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
3187 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3188 Operand ImmTy, string OpcodeStr, string Dt,
3189 ValueType Ty, SDNode ShOp>
3190 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3191 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3192 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3193 [(set QPR:$Vd, (Ty (add QPR:$src1,
3194 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
3197 // Shift by immediate and insert,
3198 // both double- and quad-register.
3199 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3200 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3201 Operand ImmTy, Format f, string OpcodeStr, string Dt,
3202 ValueType Ty,SDNode ShOp>
3203 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3204 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
3205 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3206 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
3207 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3208 Operand ImmTy, Format f, string OpcodeStr, string Dt,
3209 ValueType Ty,SDNode ShOp>
3210 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3211 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
3212 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3213 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
3216 // Convert, with fractional bits immediate,
3217 // both double- and quad-register.
3218 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3219 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3220 SDPatternOperator IntOp>
3221 : N2VImm<op24, op23, op11_8, op7, 0, op4,
3222 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3223 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3224 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
3225 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3226 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3227 SDPatternOperator IntOp>
3228 : N2VImm<op24, op23, op11_8, op7, 1, op4,
3229 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3230 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3231 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
3233 //===----------------------------------------------------------------------===//
3235 //===----------------------------------------------------------------------===//
3237 // Abbreviations used in multiclass suffixes:
3238 // Q = quarter int (8 bit) elements
3239 // H = half int (16 bit) elements
3240 // S = single int (32 bit) elements
3241 // D = double int (64 bit) elements
3243 // Neon 2-register vector operations and intrinsics.
3245 // Neon 2-register comparisons.
3246 // source operand element sizes of 8, 16 and 32 bits:
3247 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3248 bits<5> op11_7, bit op4, string opc, string Dt,
3249 string asm, SDNode OpNode> {
3250 // 64-bit vector types.
3251 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
3252 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3253 opc, !strconcat(Dt, "8"), asm, "",
3254 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
3255 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3256 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3257 opc, !strconcat(Dt, "16"), asm, "",
3258 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
3259 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3260 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3261 opc, !strconcat(Dt, "32"), asm, "",
3262 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
3263 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3264 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3265 opc, "f32", asm, "",
3266 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
3267 let Inst{10} = 1; // overwrite F = 1
3269 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3270 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3271 opc, "f16", asm, "",
3272 [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>,
3273 Requires<[HasNEON,HasFullFP16]> {
3274 let Inst{10} = 1; // overwrite F = 1
3277 // 128-bit vector types.
3278 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
3279 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3280 opc, !strconcat(Dt, "8"), asm, "",
3281 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
3282 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3283 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3284 opc, !strconcat(Dt, "16"), asm, "",
3285 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
3286 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3287 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3288 opc, !strconcat(Dt, "32"), asm, "",
3289 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
3290 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3291 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3292 opc, "f32", asm, "",
3293 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
3294 let Inst{10} = 1; // overwrite F = 1
3296 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3297 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3298 opc, "f16", asm, "",
3299 [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>,
3300 Requires<[HasNEON,HasFullFP16]> {
3301 let Inst{10} = 1; // overwrite F = 1
3306 // Neon 2-register vector intrinsics,
3307 // element sizes of 8, 16 and 32 bits:
3308 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3309 bits<5> op11_7, bit op4,
3310 InstrItinClass itinD, InstrItinClass itinQ,
3311 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3312 // 64-bit vector types.
3313 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3314 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3315 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3316 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
3317 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3318 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
3320 // 128-bit vector types.
3321 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3322 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
3323 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3324 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
3325 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3326 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
3330 // Neon Narrowing 2-register vector operations,
3331 // source operand element sizes of 16, 32 and 64 bits:
3332 multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3333 bits<5> op11_7, bit op6, bit op4,
3334 InstrItinClass itin, string OpcodeStr, string Dt,
3336 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3337 itin, OpcodeStr, !strconcat(Dt, "16"),
3338 v8i8, v8i16, OpNode>;
3339 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3340 itin, OpcodeStr, !strconcat(Dt, "32"),
3341 v4i16, v4i32, OpNode>;
3342 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3343 itin, OpcodeStr, !strconcat(Dt, "64"),
3344 v2i32, v2i64, OpNode>;
3347 // Neon Narrowing 2-register vector intrinsics,
3348 // source operand element sizes of 16, 32 and 64 bits:
3349 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3350 bits<5> op11_7, bit op6, bit op4,
3351 InstrItinClass itin, string OpcodeStr, string Dt,
3352 SDPatternOperator IntOp> {
3353 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3354 itin, OpcodeStr, !strconcat(Dt, "16"),
3355 v8i8, v8i16, IntOp>;
3356 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3357 itin, OpcodeStr, !strconcat(Dt, "32"),
3358 v4i16, v4i32, IntOp>;
3359 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3360 itin, OpcodeStr, !strconcat(Dt, "64"),
3361 v2i32, v2i64, IntOp>;
3365 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
3366 // source operand element sizes of 16, 32 and 64 bits:
3367 multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
3368 string OpcodeStr, string Dt, SDNode OpNode> {
3369 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3370 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
3371 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3372 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3373 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3374 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3378 // Neon 3-register vector operations.
3380 // First with only element sizes of 8, 16 and 32 bits:
3381 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3382 InstrItinClass itinD16, InstrItinClass itinD32,
3383 InstrItinClass itinQ16, InstrItinClass itinQ32,
3384 string OpcodeStr, string Dt,
3385 SDNode OpNode, bit Commutable = 0> {
3386 // 64-bit vector types.
3387 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
3388 OpcodeStr, !strconcat(Dt, "8"),
3389 v8i8, v8i8, OpNode, Commutable>;
3390 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
3391 OpcodeStr, !strconcat(Dt, "16"),
3392 v4i16, v4i16, OpNode, Commutable>;
3393 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
3394 OpcodeStr, !strconcat(Dt, "32"),
3395 v2i32, v2i32, OpNode, Commutable>;
3397 // 128-bit vector types.
3398 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
3399 OpcodeStr, !strconcat(Dt, "8"),
3400 v16i8, v16i8, OpNode, Commutable>;
3401 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
3402 OpcodeStr, !strconcat(Dt, "16"),
3403 v8i16, v8i16, OpNode, Commutable>;
3404 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
3405 OpcodeStr, !strconcat(Dt, "32"),
3406 v4i32, v4i32, OpNode, Commutable>;
3409 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
3410 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
3411 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
3412 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
3413 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
3414 v4i32, v2i32, ShOp>;
3417 // ....then also with element size 64 bits:
3418 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3419 InstrItinClass itinD, InstrItinClass itinQ,
3420 string OpcodeStr, string Dt,
3421 SDNode OpNode, bit Commutable = 0>
3422 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
3423 OpcodeStr, Dt, OpNode, Commutable> {
3424 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
3425 OpcodeStr, !strconcat(Dt, "64"),
3426 v1i64, v1i64, OpNode, Commutable>;
3427 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
3428 OpcodeStr, !strconcat(Dt, "64"),
3429 v2i64, v2i64, OpNode, Commutable>;
3433 // Neon 3-register vector intrinsics.
3435 // First with only element sizes of 16 and 32 bits:
3436 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3437 InstrItinClass itinD16, InstrItinClass itinD32,
3438 InstrItinClass itinQ16, InstrItinClass itinQ32,
3439 string OpcodeStr, string Dt,
3440 SDPatternOperator IntOp, bit Commutable = 0> {
3441 // 64-bit vector types.
3442 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
3443 OpcodeStr, !strconcat(Dt, "16"),
3444 v4i16, v4i16, IntOp, Commutable>;
3445 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
3446 OpcodeStr, !strconcat(Dt, "32"),
3447 v2i32, v2i32, IntOp, Commutable>;
3449 // 128-bit vector types.
3450 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3451 OpcodeStr, !strconcat(Dt, "16"),
3452 v8i16, v8i16, IntOp, Commutable>;
3453 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3454 OpcodeStr, !strconcat(Dt, "32"),
3455 v4i32, v4i32, IntOp, Commutable>;
3457 multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3458 InstrItinClass itinD16, InstrItinClass itinD32,
3459 InstrItinClass itinQ16, InstrItinClass itinQ32,
3460 string OpcodeStr, string Dt,
3461 SDPatternOperator IntOp> {
3462 // 64-bit vector types.
3463 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
3464 OpcodeStr, !strconcat(Dt, "16"),
3465 v4i16, v4i16, IntOp>;
3466 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
3467 OpcodeStr, !strconcat(Dt, "32"),
3468 v2i32, v2i32, IntOp>;
3470 // 128-bit vector types.
3471 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3472 OpcodeStr, !strconcat(Dt, "16"),
3473 v8i16, v8i16, IntOp>;
3474 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3475 OpcodeStr, !strconcat(Dt, "32"),
3476 v4i32, v4i32, IntOp>;
3479 multiclass N3VIntSL_HS<bits<4> op11_8,
3480 InstrItinClass itinD16, InstrItinClass itinD32,
3481 InstrItinClass itinQ16, InstrItinClass itinQ32,
3482 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3483 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
3484 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
3485 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
3486 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
3487 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
3488 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
3489 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
3490 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
3493 // ....then also with element size of 8 bits:
3494 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3495 InstrItinClass itinD16, InstrItinClass itinD32,
3496 InstrItinClass itinQ16, InstrItinClass itinQ32,
3497 string OpcodeStr, string Dt,
3498 SDPatternOperator IntOp, bit Commutable = 0>
3499 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3500 OpcodeStr, Dt, IntOp, Commutable> {
3501 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
3502 OpcodeStr, !strconcat(Dt, "8"),
3503 v8i8, v8i8, IntOp, Commutable>;
3504 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3505 OpcodeStr, !strconcat(Dt, "8"),
3506 v16i8, v16i8, IntOp, Commutable>;
3508 multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3509 InstrItinClass itinD16, InstrItinClass itinD32,
3510 InstrItinClass itinQ16, InstrItinClass itinQ32,
3511 string OpcodeStr, string Dt,
3512 SDPatternOperator IntOp>
3513 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3514 OpcodeStr, Dt, IntOp> {
3515 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
3516 OpcodeStr, !strconcat(Dt, "8"),
3518 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3519 OpcodeStr, !strconcat(Dt, "8"),
3520 v16i8, v16i8, IntOp>;
3524 // ....then also with element size of 64 bits:
3525 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3526 InstrItinClass itinD16, InstrItinClass itinD32,
3527 InstrItinClass itinQ16, InstrItinClass itinQ32,
3528 string OpcodeStr, string Dt,
3529 SDPatternOperator IntOp, bit Commutable = 0>
3530 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3531 OpcodeStr, Dt, IntOp, Commutable> {
3532 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
3533 OpcodeStr, !strconcat(Dt, "64"),
3534 v1i64, v1i64, IntOp, Commutable>;
3535 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3536 OpcodeStr, !strconcat(Dt, "64"),
3537 v2i64, v2i64, IntOp, Commutable>;
3539 multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3540 InstrItinClass itinD16, InstrItinClass itinD32,
3541 InstrItinClass itinQ16, InstrItinClass itinQ32,
3542 string OpcodeStr, string Dt,
3543 SDPatternOperator IntOp>
3544 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3545 OpcodeStr, Dt, IntOp> {
3546 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
3547 OpcodeStr, !strconcat(Dt, "64"),
3548 v1i64, v1i64, IntOp>;
3549 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3550 OpcodeStr, !strconcat(Dt, "64"),
3551 v2i64, v2i64, IntOp>;
3554 // Neon Narrowing 3-register vector intrinsics,
3555 // source operand element sizes of 16, 32 and 64 bits:
3556 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3557 string OpcodeStr, string Dt,
3558 SDPatternOperator IntOp, bit Commutable = 0> {
3559 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
3560 OpcodeStr, !strconcat(Dt, "16"),
3561 v8i8, v8i16, IntOp, Commutable>;
3562 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
3563 OpcodeStr, !strconcat(Dt, "32"),
3564 v4i16, v4i32, IntOp, Commutable>;
3565 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
3566 OpcodeStr, !strconcat(Dt, "64"),
3567 v2i32, v2i64, IntOp, Commutable>;
3571 // Neon Long 3-register vector operations.
3573 multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3574 InstrItinClass itin16, InstrItinClass itin32,
3575 string OpcodeStr, string Dt,
3576 SDNode OpNode, bit Commutable = 0> {
3577 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
3578 OpcodeStr, !strconcat(Dt, "8"),
3579 v8i16, v8i8, OpNode, Commutable>;
3580 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
3581 OpcodeStr, !strconcat(Dt, "16"),
3582 v4i32, v4i16, OpNode, Commutable>;
3583 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
3584 OpcodeStr, !strconcat(Dt, "32"),
3585 v2i64, v2i32, OpNode, Commutable>;
3588 multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
3589 InstrItinClass itin, string OpcodeStr, string Dt,
3591 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
3592 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3593 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
3594 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3597 multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3598 InstrItinClass itin16, InstrItinClass itin32,
3599 string OpcodeStr, string Dt,
3600 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3601 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
3602 OpcodeStr, !strconcat(Dt, "8"),
3603 v8i16, v8i8, OpNode, ExtOp, Commutable>;
3604 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
3605 OpcodeStr, !strconcat(Dt, "16"),
3606 v4i32, v4i16, OpNode, ExtOp, Commutable>;
3607 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
3608 OpcodeStr, !strconcat(Dt, "32"),
3609 v2i64, v2i32, OpNode, ExtOp, Commutable>;
3612 // Neon Long 3-register vector intrinsics.
3614 // First with only element sizes of 16 and 32 bits:
3615 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3616 InstrItinClass itin16, InstrItinClass itin32,
3617 string OpcodeStr, string Dt,
3618 SDPatternOperator IntOp, bit Commutable = 0> {
3619 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
3620 OpcodeStr, !strconcat(Dt, "16"),
3621 v4i32, v4i16, IntOp, Commutable>;
3622 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
3623 OpcodeStr, !strconcat(Dt, "32"),
3624 v2i64, v2i32, IntOp, Commutable>;
3627 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
3628 InstrItinClass itin, string OpcodeStr, string Dt,
3629 SDPatternOperator IntOp> {
3630 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
3631 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3632 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
3633 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3636 // ....then also with element size of 8 bits:
3637 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3638 InstrItinClass itin16, InstrItinClass itin32,
3639 string OpcodeStr, string Dt,
3640 SDPatternOperator IntOp, bit Commutable = 0>
3641 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
3642 IntOp, Commutable> {
3643 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
3644 OpcodeStr, !strconcat(Dt, "8"),
3645 v8i16, v8i8, IntOp, Commutable>;
3648 // ....with explicit extend (VABDL).
3649 multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3650 InstrItinClass itin, string OpcodeStr, string Dt,
3651 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
3652 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
3653 OpcodeStr, !strconcat(Dt, "8"),
3654 v8i16, v8i8, IntOp, ExtOp, Commutable>;
3655 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
3656 OpcodeStr, !strconcat(Dt, "16"),
3657 v4i32, v4i16, IntOp, ExtOp, Commutable>;
3658 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
3659 OpcodeStr, !strconcat(Dt, "32"),
3660 v2i64, v2i32, IntOp, ExtOp, Commutable>;
3664 // Neon Wide 3-register vector intrinsics,
3665 // source operand element sizes of 8, 16 and 32 bits:
3666 multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3667 string OpcodeStr, string Dt,
3668 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3669 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
3670 OpcodeStr, !strconcat(Dt, "8"),
3671 v8i16, v8i8, OpNode, ExtOp, Commutable>;
3672 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
3673 OpcodeStr, !strconcat(Dt, "16"),
3674 v4i32, v4i16, OpNode, ExtOp, Commutable>;
3675 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
3676 OpcodeStr, !strconcat(Dt, "32"),
3677 v2i64, v2i32, OpNode, ExtOp, Commutable>;
3681 // Neon Multiply-Op vector operations,
3682 // element sizes of 8, 16 and 32 bits:
3683 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3684 InstrItinClass itinD16, InstrItinClass itinD32,
3685 InstrItinClass itinQ16, InstrItinClass itinQ32,
3686 string OpcodeStr, string Dt, SDNode OpNode> {
3687 // 64-bit vector types.
3688 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
3689 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
3690 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
3691 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
3692 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
3693 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
3695 // 128-bit vector types.
3696 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
3697 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
3698 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
3699 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
3700 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
3701 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
3704 multiclass N3VMulOpSL_HS<bits<4> op11_8,
3705 InstrItinClass itinD16, InstrItinClass itinD32,
3706 InstrItinClass itinQ16, InstrItinClass itinQ32,
3707 string OpcodeStr, string Dt, SDPatternOperator ShOp> {
3708 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
3709 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
3710 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
3711 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
3712 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
3713 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
3715 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
3716 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
3720 // Neon Intrinsic-Op vector operations,
3721 // element sizes of 8, 16 and 32 bits:
3722 multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3723 InstrItinClass itinD, InstrItinClass itinQ,
3724 string OpcodeStr, string Dt, SDPatternOperator IntOp,
3726 // 64-bit vector types.
3727 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
3728 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
3729 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
3730 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
3731 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
3732 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
3734 // 128-bit vector types.
3735 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
3736 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
3737 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
3738 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
3739 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
3740 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
3743 // Neon 3-argument intrinsics,
3744 // element sizes of 16 and 32 bits:
3745 multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3746 InstrItinClass itinD16, InstrItinClass itinD32,
3747 InstrItinClass itinQ16, InstrItinClass itinQ32,
3748 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3749 // 64-bit vector types.
3750 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
3751 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
3752 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
3753 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3755 // 128-bit vector types.
3756 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
3757 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3758 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
3759 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3762 // element sizes of 8, 16 and 32 bits:
3763 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3764 InstrItinClass itinD16, InstrItinClass itinD32,
3765 InstrItinClass itinQ16, InstrItinClass itinQ32,
3766 string OpcodeStr, string Dt, SDPatternOperator IntOp>
3767 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
3768 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
3769 // 64-bit vector types.
3770 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
3771 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3772 // 128-bit vector types.
3773 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
3774 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3777 // Neon Long Multiply-Op vector operations,
3778 // element sizes of 8, 16 and 32 bits:
3779 multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3780 InstrItinClass itin16, InstrItinClass itin32,
3781 string OpcodeStr, string Dt, SDNode MulOp,
3783 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3784 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3785 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3786 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3787 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3788 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3791 multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3792 string Dt, SDNode MulOp, SDNode OpNode> {
3793 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3794 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3795 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3796 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3800 // Neon Long 3-argument intrinsics.
3802 // First with only element sizes of 16 and 32 bits:
3803 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3804 InstrItinClass itin16, InstrItinClass itin32,
3805 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3806 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3807 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3808 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3809 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3812 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3813 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3814 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3815 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3816 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3817 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3820 // ....then also with element size of 8 bits:
3821 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3822 InstrItinClass itin16, InstrItinClass itin32,
3823 string OpcodeStr, string Dt, SDPatternOperator IntOp>
3824 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3825 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3826 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3829 // ....with explicit extend (VABAL).
3830 multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3831 InstrItinClass itin, string OpcodeStr, string Dt,
3832 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
3833 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3834 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3835 IntOp, ExtOp, OpNode>;
3836 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3837 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3838 IntOp, ExtOp, OpNode>;
3839 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3840 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3841 IntOp, ExtOp, OpNode>;
3845 // Neon Pairwise long 2-register intrinsics,
3846 // element sizes of 8, 16 and 32 bits:
3847 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3848 bits<5> op11_7, bit op4,
3849 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3850 // 64-bit vector types.
3851 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3852 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3853 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3854 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3855 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3856 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3858 // 128-bit vector types.
3859 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3860 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3861 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3862 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3863 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3864 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3868 // Neon Pairwise long 2-register accumulate intrinsics,
3869 // element sizes of 8, 16 and 32 bits:
3870 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3871 bits<5> op11_7, bit op4,
3872 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3873 // 64-bit vector types.
3874 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3875 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3876 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3877 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3878 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3879 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3881 // 128-bit vector types.
3882 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3883 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3884 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3885 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3886 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3887 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3891 // Neon 2-register vector shift by immediate,
3892 // with f of either N2RegVShLFrm or N2RegVShRFrm
3893 // element sizes of 8, 16, 32 and 64 bits:
3894 multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3895 InstrItinClass itin, string OpcodeStr, string Dt,
3897 // 64-bit vector types.
3898 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3899 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3900 let Inst{21-19} = 0b001; // imm6 = 001xxx
3902 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3903 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3904 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3906 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3907 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3908 let Inst{21} = 0b1; // imm6 = 1xxxxx
3910 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3911 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3914 // 128-bit vector types.
3915 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3916 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3917 let Inst{21-19} = 0b001; // imm6 = 001xxx
3919 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3920 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3921 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3923 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3924 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3925 let Inst{21} = 0b1; // imm6 = 1xxxxx
3927 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3928 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3931 multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3932 InstrItinClass itin, string OpcodeStr, string Dt,
3933 string baseOpc, SDNode OpNode> {
3934 // 64-bit vector types.
3935 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3936 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3937 let Inst{21-19} = 0b001; // imm6 = 001xxx
3939 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3940 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3941 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3943 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3944 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3945 let Inst{21} = 0b1; // imm6 = 1xxxxx
3947 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
3948 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3951 // 128-bit vector types.
3952 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3953 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3954 let Inst{21-19} = 0b001; // imm6 = 001xxx
3956 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3957 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3958 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3960 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3961 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3962 let Inst{21} = 0b1; // imm6 = 1xxxxx
3964 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
3965 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3969 // Neon Shift-Accumulate vector operations,
3970 // element sizes of 8, 16, 32 and 64 bits:
3971 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3972 string OpcodeStr, string Dt, SDNode ShOp> {
3973 // 64-bit vector types.
3974 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
3975 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
3976 let Inst{21-19} = 0b001; // imm6 = 001xxx
3978 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
3979 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
3980 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3982 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
3983 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
3984 let Inst{21} = 0b1; // imm6 = 1xxxxx
3986 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
3987 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
3990 // 128-bit vector types.
3991 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
3992 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
3993 let Inst{21-19} = 0b001; // imm6 = 001xxx
3995 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
3996 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
3997 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3999 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4000 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
4001 let Inst{21} = 0b1; // imm6 = 1xxxxx
4003 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4004 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
4008 // Neon Shift-Insert vector operations,
4009 // with f of either N2RegVShLFrm or N2RegVShRFrm
4010 // element sizes of 8, 16, 32 and 64 bits:
4011 multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4013 // 64-bit vector types.
4014 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4015 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
4016 let Inst{21-19} = 0b001; // imm6 = 001xxx
4018 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4019 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
4020 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4022 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4023 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
4024 let Inst{21} = 0b1; // imm6 = 1xxxxx
4026 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
4027 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
4030 // 128-bit vector types.
4031 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4032 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
4033 let Inst{21-19} = 0b001; // imm6 = 001xxx
4035 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4036 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
4037 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4039 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4040 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
4041 let Inst{21} = 0b1; // imm6 = 1xxxxx
4043 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
4044 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
4047 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4049 // 64-bit vector types.
4050 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4051 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
4052 let Inst{21-19} = 0b001; // imm6 = 001xxx
4054 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4055 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
4056 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4058 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4059 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
4060 let Inst{21} = 0b1; // imm6 = 1xxxxx
4062 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4063 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
4066 // 128-bit vector types.
4067 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4068 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
4069 let Inst{21-19} = 0b001; // imm6 = 001xxx
4071 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4072 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
4073 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4075 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4076 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
4077 let Inst{21} = 0b1; // imm6 = 1xxxxx
4079 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4080 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
4084 // Neon Shift Long operations,
4085 // element sizes of 8, 16, 32 bits:
4086 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4087 bit op4, string OpcodeStr, string Dt,
4088 SDPatternOperator OpNode> {
4089 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4090 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
4091 let Inst{21-19} = 0b001; // imm6 = 001xxx
4093 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4094 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
4095 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4097 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4098 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
4099 let Inst{21} = 0b1; // imm6 = 1xxxxx
4103 // Neon Shift Narrow operations,
4104 // element sizes of 16, 32, 64 bits:
4105 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4106 bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
4107 SDPatternOperator OpNode> {
4108 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4109 OpcodeStr, !strconcat(Dt, "16"),
4110 v8i8, v8i16, shr_imm8, OpNode> {
4111 let Inst{21-19} = 0b001; // imm6 = 001xxx
4113 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4114 OpcodeStr, !strconcat(Dt, "32"),
4115 v4i16, v4i32, shr_imm16, OpNode> {
4116 let Inst{21-20} = 0b01; // imm6 = 01xxxx
4118 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4119 OpcodeStr, !strconcat(Dt, "64"),
4120 v2i32, v2i64, shr_imm32, OpNode> {
4121 let Inst{21} = 0b1; // imm6 = 1xxxxx
4125 //===----------------------------------------------------------------------===//
4126 // Instruction Definitions.
4127 //===----------------------------------------------------------------------===//
4129 // Vector Add Operations.
4131 // VADD : Vector Add (integer and floating-point)
4132 defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
4134 def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
4135 v2f32, v2f32, fadd, 1>;
4136 def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
4137 v4f32, v4f32, fadd, 1>;
4138 def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
4139 v4f16, v4f16, fadd, 1>,
4140 Requires<[HasNEON,HasFullFP16]>;
4141 def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
4142 v8f16, v8f16, fadd, 1>,
4143 Requires<[HasNEON,HasFullFP16]>;
4144 // VADDL : Vector Add Long (Q = D + D)
4145 defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4146 "vaddl", "s", add, sext, 1>;
4147 defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4148 "vaddl", "u", add, zext, 1>;
4149 // VADDW : Vector Add Wide (Q = Q + D)
4150 defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
4151 defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
4152 // VHADD : Vector Halving Add
4153 defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
4154 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4155 "vhadd", "s", int_arm_neon_vhadds, 1>;
4156 defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
4157 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4158 "vhadd", "u", int_arm_neon_vhaddu, 1>;
4159 // VRHADD : Vector Rounding Halving Add
4160 defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
4161 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4162 "vrhadd", "s", int_arm_neon_vrhadds, 1>;
4163 defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
4164 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4165 "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
4166 // VQADD : Vector Saturating Add
4167 defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
4168 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4169 "vqadd", "s", int_arm_neon_vqadds, 1>;
4170 defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
4171 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4172 "vqadd", "u", int_arm_neon_vqaddu, 1>;
4173 // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
4174 defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
4175 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
4176 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
4177 int_arm_neon_vraddhn, 1>;
4179 def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4180 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
4181 def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4182 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
4183 def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4184 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
4186 // Vector Multiply Operations.
4188 // VMUL : Vector Multiply (integer, polynomial and floating-point)
4189 defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
4190 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
4191 def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
4192 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
4193 def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
4194 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
4195 def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
4196 v2f32, v2f32, fmul, 1>;
4197 def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
4198 v4f32, v4f32, fmul, 1>;
4199 def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
4200 v4f16, v4f16, fmul, 1>,
4201 Requires<[HasNEON,HasFullFP16]>;
4202 def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
4203 v8f16, v8f16, fmul, 1>,
4204 Requires<[HasNEON,HasFullFP16]>;
4205 defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
4206 def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
4207 def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
4209 def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
4210 Requires<[HasNEON,HasFullFP16]>;
4211 def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
4213 Requires<[HasNEON,HasFullFP16]>;
4215 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
4216 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
4217 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
4218 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4219 (DSubReg_i16_reg imm:$lane))),
4220 (SubReg_i16_lane imm:$lane)))>;
4221 def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
4222 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
4223 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
4224 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4225 (DSubReg_i32_reg imm:$lane))),
4226 (SubReg_i32_lane imm:$lane)))>;
4227 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
4228 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
4229 (v4f32 (VMULslfq (v4f32 QPR:$src1),
4230 (v2f32 (EXTRACT_SUBREG QPR:$src2,
4231 (DSubReg_i32_reg imm:$lane))),
4232 (SubReg_i32_lane imm:$lane)))>;
4235 def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
4237 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4239 def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
4241 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4245 // VQDMULH : Vector Saturating Doubling Multiply Returning High Half
4246 defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
4247 IIC_VMULi16Q, IIC_VMULi32Q,
4248 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
4249 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
4250 IIC_VMULi16Q, IIC_VMULi32Q,
4251 "vqdmulh", "s", int_arm_neon_vqdmulh>;
4252 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
4253 (v8i16 (NEONvduplane (v8i16 QPR:$src2),
4255 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
4256 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4257 (DSubReg_i16_reg imm:$lane))),
4258 (SubReg_i16_lane imm:$lane)))>;
4259 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
4260 (v4i32 (NEONvduplane (v4i32 QPR:$src2),
4262 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
4263 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4264 (DSubReg_i32_reg imm:$lane))),
4265 (SubReg_i32_lane imm:$lane)))>;
4267 // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
4268 defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
4269 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
4270 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
4271 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
4272 IIC_VMULi16Q, IIC_VMULi32Q,
4273 "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
4274 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
4275 (v8i16 (NEONvduplane (v8i16 QPR:$src2),
4277 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
4278 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4279 (DSubReg_i16_reg imm:$lane))),
4280 (SubReg_i16_lane imm:$lane)))>;
4281 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
4282 (v4i32 (NEONvduplane (v4i32 QPR:$src2),
4284 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
4285 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4286 (DSubReg_i32_reg imm:$lane))),
4287 (SubReg_i32_lane imm:$lane)))>;
4289 // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
4290 let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
4291 DecoderNamespace = "NEONData" in {
4292 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4293 "vmull", "s", NEONvmulls, 1>;
4294 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4295 "vmull", "u", NEONvmullu, 1>;
4296 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
4297 v8i16, v8i8, int_arm_neon_vmullp, 1>;
4298 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
4299 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
4300 Requires<[HasV8, HasCrypto]>;
4302 defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
4303 defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
4305 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
4306 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
4307 "vqdmull", "s", int_arm_neon_vqdmull, 1>;
4308 defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
4309 "vqdmull", "s", int_arm_neon_vqdmull>;
4311 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
4313 // VMLA : Vector Multiply Accumulate (integer and floating-point)
4314 defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4315 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4316 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
4317 v2f32, fmul_su, fadd_mlx>,
4318 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4319 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
4320 v4f32, fmul_su, fadd_mlx>,
4321 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4322 def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
4323 v4f16, fmul_su, fadd_mlx>,
4324 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
4325 def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
4326 v8f16, fmul_su, fadd_mlx>,
4327 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
4328 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
4329 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4330 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
4331 v2f32, fmul_su, fadd_mlx>,
4332 Requires<[HasNEON, UseFPVMLx]>;
4333 def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
4334 v4f32, v2f32, fmul_su, fadd_mlx>,
4335 Requires<[HasNEON, UseFPVMLx]>;
4336 def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
4338 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4339 def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
4340 v8f16, v4f16, fmul, fadd>,
4341 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4343 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
4344 (mul (v8i16 QPR:$src2),
4345 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
4346 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4347 (v4i16 (EXTRACT_SUBREG QPR:$src3,
4348 (DSubReg_i16_reg imm:$lane))),
4349 (SubReg_i16_lane imm:$lane)))>;
4351 def : Pat<(v4i32 (add (v4i32 QPR:$src1),
4352 (mul (v4i32 QPR:$src2),
4353 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
4354 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4355 (v2i32 (EXTRACT_SUBREG QPR:$src3,
4356 (DSubReg_i32_reg imm:$lane))),
4357 (SubReg_i32_lane imm:$lane)))>;
4359 def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
4360 (fmul_su (v4f32 QPR:$src2),
4361 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
4362 (v4f32 (VMLAslfq (v4f32 QPR:$src1),
4364 (v2f32 (EXTRACT_SUBREG QPR:$src3,
4365 (DSubReg_i32_reg imm:$lane))),
4366 (SubReg_i32_lane imm:$lane)))>,
4367 Requires<[HasNEON, UseFPVMLx]>;
4369 // VMLAL : Vector Multiply Accumulate Long (Q += D * D)
4370 defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4371 "vmlal", "s", NEONvmulls, add>;
4372 defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4373 "vmlal", "u", NEONvmullu, add>;
4375 defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
4376 defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
4378 let Predicates = [HasNEON, HasV8_1a] in {
4379 // v8.1a Neon Rounding Double Multiply-Op vector operations,
4380 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
4382 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
4383 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4385 def : Pat<(v4i16 (int_arm_neon_vqadds
4387 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4388 (v4i16 DPR:$Vm))))),
4389 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4390 def : Pat<(v2i32 (int_arm_neon_vqadds
4392 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4393 (v2i32 DPR:$Vm))))),
4394 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4395 def : Pat<(v8i16 (int_arm_neon_vqadds
4397 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4398 (v8i16 QPR:$Vm))))),
4399 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4400 def : Pat<(v4i32 (int_arm_neon_vqadds
4402 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4403 (v4i32 QPR:$Vm))))),
4404 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4406 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
4407 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4409 def : Pat<(v4i16 (int_arm_neon_vqadds
4411 (v4i16 (int_arm_neon_vqrdmulh
4413 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4415 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
4417 def : Pat<(v2i32 (int_arm_neon_vqadds
4419 (v2i32 (int_arm_neon_vqrdmulh
4421 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4423 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4425 def : Pat<(v8i16 (int_arm_neon_vqadds
4427 (v8i16 (int_arm_neon_vqrdmulh
4429 (v8i16 (NEONvduplane (v8i16 QPR:$src3),
4431 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
4433 (v4i16 (EXTRACT_SUBREG
4435 (DSubReg_i16_reg imm:$lane))),
4436 (SubReg_i16_lane imm:$lane)))>;
4437 def : Pat<(v4i32 (int_arm_neon_vqadds
4439 (v4i32 (int_arm_neon_vqrdmulh
4441 (v4i32 (NEONvduplane (v4i32 QPR:$src3),
4443 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
4445 (v2i32 (EXTRACT_SUBREG
4447 (DSubReg_i32_reg imm:$lane))),
4448 (SubReg_i32_lane imm:$lane)))>;
4450 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
4452 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
4453 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4455 def : Pat<(v4i16 (int_arm_neon_vqsubs
4457 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4458 (v4i16 DPR:$Vm))))),
4459 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4460 def : Pat<(v2i32 (int_arm_neon_vqsubs
4462 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4463 (v2i32 DPR:$Vm))))),
4464 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4465 def : Pat<(v8i16 (int_arm_neon_vqsubs
4467 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4468 (v8i16 QPR:$Vm))))),
4469 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4470 def : Pat<(v4i32 (int_arm_neon_vqsubs
4472 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4473 (v4i32 QPR:$Vm))))),
4474 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4476 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
4477 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4479 def : Pat<(v4i16 (int_arm_neon_vqsubs
4481 (v4i16 (int_arm_neon_vqrdmulh
4483 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4485 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
4486 def : Pat<(v2i32 (int_arm_neon_vqsubs
4488 (v2i32 (int_arm_neon_vqrdmulh
4490 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4492 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4494 def : Pat<(v8i16 (int_arm_neon_vqsubs
4496 (v8i16 (int_arm_neon_vqrdmulh
4498 (v8i16 (NEONvduplane (v8i16 QPR:$src3),
4500 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
4502 (v4i16 (EXTRACT_SUBREG
4504 (DSubReg_i16_reg imm:$lane))),
4505 (SubReg_i16_lane imm:$lane)))>;
4506 def : Pat<(v4i32 (int_arm_neon_vqsubs
4508 (v4i32 (int_arm_neon_vqrdmulh
4510 (v4i32 (NEONvduplane (v4i32 QPR:$src3),
4512 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
4514 (v2i32 (EXTRACT_SUBREG
4516 (DSubReg_i32_reg imm:$lane))),
4517 (SubReg_i32_lane imm:$lane)))>;
4519 // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
4520 defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4521 "vqdmlal", "s", null_frag>;
4522 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
4524 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
4525 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4526 (v4i16 DPR:$Vm))))),
4527 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4528 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
4529 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4530 (v2i32 DPR:$Vm))))),
4531 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4532 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
4533 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4534 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4536 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4537 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
4538 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4539 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4541 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4543 // VMLS : Vector Multiply Subtract (integer and floating-point)
4544 defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4545 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4546 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
4547 v2f32, fmul_su, fsub_mlx>,
4548 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4549 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
4550 v4f32, fmul_su, fsub_mlx>,
4551 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4552 def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
4554 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
4555 def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
4557 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
4558 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
4559 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4560 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
4561 v2f32, fmul_su, fsub_mlx>,
4562 Requires<[HasNEON, UseFPVMLx]>;
4563 def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
4564 v4f32, v2f32, fmul_su, fsub_mlx>,
4565 Requires<[HasNEON, UseFPVMLx]>;
4566 def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
4568 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4569 def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
4570 v8f16, v4f16, fmul, fsub>,
4571 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4573 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
4574 (mul (v8i16 QPR:$src2),
4575 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
4576 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4577 (v4i16 (EXTRACT_SUBREG QPR:$src3,
4578 (DSubReg_i16_reg imm:$lane))),
4579 (SubReg_i16_lane imm:$lane)))>;
4581 def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
4582 (mul (v4i32 QPR:$src2),
4583 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
4584 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4585 (v2i32 (EXTRACT_SUBREG QPR:$src3,
4586 (DSubReg_i32_reg imm:$lane))),
4587 (SubReg_i32_lane imm:$lane)))>;
4589 def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
4590 (fmul_su (v4f32 QPR:$src2),
4591 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
4592 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
4593 (v2f32 (EXTRACT_SUBREG QPR:$src3,
4594 (DSubReg_i32_reg imm:$lane))),
4595 (SubReg_i32_lane imm:$lane)))>,
4596 Requires<[HasNEON, UseFPVMLx]>;
4598 // VMLSL : Vector Multiply Subtract Long (Q -= D * D)
4599 defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4600 "vmlsl", "s", NEONvmulls, sub>;
4601 defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4602 "vmlsl", "u", NEONvmullu, sub>;
4604 defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
4605 defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
4607 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
4608 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
4609 "vqdmlsl", "s", null_frag>;
4610 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
4612 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
4613 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4614 (v4i16 DPR:$Vm))))),
4615 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4616 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
4617 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4618 (v2i32 DPR:$Vm))))),
4619 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4620 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
4621 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4622 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4624 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4625 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
4626 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4627 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4629 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4631 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4632 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
4633 v2f32, fmul_su, fadd_mlx>,
4634 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4636 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
4637 v4f32, fmul_su, fadd_mlx>,
4638 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4639 def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
4641 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4643 def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
4645 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4647 // Fused Vector Multiply Subtract (floating-point)
4648 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
4649 v2f32, fmul_su, fsub_mlx>,
4650 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4651 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
4652 v4f32, fmul_su, fsub_mlx>,
4653 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4654 def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
4656 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4657 def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
4659 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4661 // Match @llvm.fma.* intrinsics
4662 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4663 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4664 Requires<[HasVFP4]>;
4665 def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4666 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4667 Requires<[HasVFP4]>;
4668 def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
4669 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4670 Requires<[HasVFP4]>;
4671 def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
4672 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4673 Requires<[HasVFP4]>;
4675 // Vector Subtract Operations.
4677 // VSUB : Vector Subtract (integer and floating-point)
4678 defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
4679 "vsub", "i", sub, 0>;
4680 def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
4681 v2f32, v2f32, fsub, 0>;
4682 def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
4683 v4f32, v4f32, fsub, 0>;
4684 def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
4685 v4f16, v4f16, fsub, 0>,
4686 Requires<[HasNEON,HasFullFP16]>;
4687 def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
4688 v8f16, v8f16, fsub, 0>,
4689 Requires<[HasNEON,HasFullFP16]>;
4690 // VSUBL : Vector Subtract Long (Q = D - D)
4691 defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4692 "vsubl", "s", sub, sext, 0>;
4693 defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4694 "vsubl", "u", sub, zext, 0>;
4695 // VSUBW : Vector Subtract Wide (Q = Q - D)
4696 defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
4697 defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
4698 // VHSUB : Vector Halving Subtract
4699 defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
4700 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4701 "vhsub", "s", int_arm_neon_vhsubs, 0>;
4702 defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
4703 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4704 "vhsub", "u", int_arm_neon_vhsubu, 0>;
4705 // VQSUB : Vector Saturing Subtract
4706 defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
4707 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4708 "vqsub", "s", int_arm_neon_vqsubs, 0>;
4709 defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
4710 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4711 "vqsub", "u", int_arm_neon_vqsubu, 0>;
4712 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
4713 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
4714 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
4715 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
4716 int_arm_neon_vrsubhn, 0>;
4718 def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4719 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
4720 def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4721 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
4722 def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4723 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
4725 // Vector Comparisons.
4727 // VCEQ : Vector Compare Equal
4728 defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4729 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
4730 def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
4732 def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
4734 def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
4736 Requires<[HasNEON, HasFullFP16]>;
4737 def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
4739 Requires<[HasNEON, HasFullFP16]>;
4741 let TwoOperandAliasConstraint = "$Vm = $Vd" in
4742 defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
4743 "$Vd, $Vm, #0", NEONvceqz>;
4745 // VCGE : Vector Compare Greater Than or Equal
4746 defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4747 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
4748 defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4749 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
4750 def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
4752 def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
4754 def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
4756 Requires<[HasNEON, HasFullFP16]>;
4757 def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
4759 Requires<[HasNEON, HasFullFP16]>;
4761 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
4762 defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
4763 "$Vd, $Vm, #0", NEONvcgez>;
4764 defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
4765 "$Vd, $Vm, #0", NEONvclez>;
4768 // VCGT : Vector Compare Greater Than
4769 defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4770 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
4771 defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4772 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
4773 def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
4775 def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
4777 def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
4779 Requires<[HasNEON, HasFullFP16]>;
4780 def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
4782 Requires<[HasNEON, HasFullFP16]>;
4784 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
4785 defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
4786 "$Vd, $Vm, #0", NEONvcgtz>;
4787 defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
4788 "$Vd, $Vm, #0", NEONvcltz>;
4791 // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
4792 def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
4793 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
4794 def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
4795 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
4796 def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
4797 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
4798 Requires<[HasNEON, HasFullFP16]>;
4799 def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
4800 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
4801 Requires<[HasNEON, HasFullFP16]>;
4802 // VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
4803 def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
4804 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
4805 def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
4806 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
4807 def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
4808 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
4809 Requires<[HasNEON, HasFullFP16]>;
4810 def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
4811 "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>,
4812 Requires<[HasNEON, HasFullFP16]>;
4813 // VTST : Vector Test Bits
4814 defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
4815 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
4817 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
4818 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
4819 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
4820 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
4821 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
4822 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
4823 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
4824 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
4825 let Predicates = [HasNEON, HasFullFP16] in {
4826 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
4827 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
4828 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
4829 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
4830 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
4831 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
4832 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
4833 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
4836 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
4837 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
4838 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
4839 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
4840 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
4841 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
4842 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
4843 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
4844 let Predicates = [HasNEON, HasFullFP16] in {
4845 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
4846 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
4847 def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
4848 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
4849 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
4850 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
4851 def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
4852 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
4855 // Vector Bitwise Operations.
4857 def vnotd : PatFrag<(ops node:$in),
4858 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
4859 def vnotq : PatFrag<(ops node:$in),
4860 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
4863 // VAND : Vector Bitwise AND
4864 def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
4865 v2i32, v2i32, and, 1>;
4866 def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
4867 v4i32, v4i32, and, 1>;
4869 // VEOR : Vector Bitwise Exclusive OR
4870 def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
4871 v2i32, v2i32, xor, 1>;
4872 def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
4873 v4i32, v4i32, xor, 1>;
4875 // VORR : Vector Bitwise OR
4876 def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
4877 v2i32, v2i32, or, 1>;
4878 def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
4879 v4i32, v4i32, or, 1>;
4881 def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
4882 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
4884 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
4886 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
4887 let Inst{9} = SIMM{9};
4890 def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
4891 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
4893 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
4895 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
4896 let Inst{10-9} = SIMM{10-9};
4899 def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
4900 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
4902 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
4904 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
4905 let Inst{9} = SIMM{9};
4908 def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
4909 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
4911 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
4913 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
4914 let Inst{10-9} = SIMM{10-9};
4918 // VBIC : Vector Bitwise Bit Clear (AND NOT)
4919 let TwoOperandAliasConstraint = "$Vn = $Vd" in {
4920 def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
4921 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
4922 "vbic", "$Vd, $Vn, $Vm", "",
4923 [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
4924 (vnotd DPR:$Vm))))]>;
4925 def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
4926 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
4927 "vbic", "$Vd, $Vn, $Vm", "",
4928 [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
4929 (vnotq QPR:$Vm))))]>;
4932 def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
4933 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
4935 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
4937 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
4938 let Inst{9} = SIMM{9};
4941 def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
4942 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
4944 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
4946 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
4947 let Inst{10-9} = SIMM{10-9};
4950 def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
4951 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
4953 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
4955 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
4956 let Inst{9} = SIMM{9};
4959 def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
4960 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
4962 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
4964 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
4965 let Inst{10-9} = SIMM{10-9};
4968 // VORN : Vector Bitwise OR NOT
4969 def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
4970 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
4971 "vorn", "$Vd, $Vn, $Vm", "",
4972 [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
4973 (vnotd DPR:$Vm))))]>;
4974 def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
4975 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
4976 "vorn", "$Vd, $Vn, $Vm", "",
4977 [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
4978 (vnotq QPR:$Vm))))]>;
4980 // VMVN : Vector Bitwise NOT (Immediate)
4982 let isReMaterializable = 1 in {
4984 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
4985 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4986 "vmvn", "i16", "$Vd, $SIMM", "",
4987 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
4988 let Inst{9} = SIMM{9};
4991 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
4992 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4993 "vmvn", "i16", "$Vd, $SIMM", "",
4994 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
4995 let Inst{9} = SIMM{9};
4998 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
4999 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5000 "vmvn", "i32", "$Vd, $SIMM", "",
5001 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
5002 let Inst{11-8} = SIMM{11-8};
5005 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
5006 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5007 "vmvn", "i32", "$Vd, $SIMM", "",
5008 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
5009 let Inst{11-8} = SIMM{11-8};
5013 // VMVN : Vector Bitwise NOT
5014 def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
5015 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
5016 "vmvn", "$Vd, $Vm", "",
5017 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
5018 def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
5019 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
5020 "vmvn", "$Vd, $Vm", "",
5021 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
5022 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
5023 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
5025 // VBSL : Vector Bitwise Select
5026 def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5027 (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5028 N3RegFrm, IIC_VCNTiD,
5029 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5031 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
5032 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
5033 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
5034 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5035 Requires<[HasNEON]>;
5036 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
5037 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
5038 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5039 Requires<[HasNEON]>;
5040 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
5041 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
5042 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5043 Requires<[HasNEON]>;
5044 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
5045 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
5046 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5047 Requires<[HasNEON]>;
5048 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
5049 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
5050 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
5051 Requires<[HasNEON]>;
5053 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
5054 (and DPR:$Vm, (vnotd DPR:$Vd)))),
5055 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
5056 Requires<[HasNEON]>;
5058 def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
5059 (and DPR:$Vm, (vnotd DPR:$Vd)))),
5060 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
5061 Requires<[HasNEON]>;
5063 def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5064 (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5065 N3RegFrm, IIC_VCNTiQ,
5066 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5068 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
5070 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
5071 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
5072 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5073 Requires<[HasNEON]>;
5074 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
5075 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
5076 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5077 Requires<[HasNEON]>;
5078 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
5079 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
5080 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5081 Requires<[HasNEON]>;
5082 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
5083 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
5084 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5085 Requires<[HasNEON]>;
5086 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
5087 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
5088 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
5089 Requires<[HasNEON]>;
5091 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
5092 (and QPR:$Vm, (vnotq QPR:$Vd)))),
5093 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
5094 Requires<[HasNEON]>;
5095 def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
5096 (and QPR:$Vm, (vnotq QPR:$Vd)))),
5097 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
5098 Requires<[HasNEON]>;
5100 // VBIF : Vector Bitwise Insert if False
5101 // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
5102 // FIXME: This instruction's encoding MAY NOT BE correct.
5103 def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
5104 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5105 N3RegFrm, IIC_VBINiD,
5106 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5108 def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
5109 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5110 N3RegFrm, IIC_VBINiQ,
5111 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5114 // VBIT : Vector Bitwise Insert if True
5115 // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
5116 // FIXME: This instruction's encoding MAY NOT BE correct.
5117 def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
5118 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5119 N3RegFrm, IIC_VBINiD,
5120 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5122 def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
5123 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5124 N3RegFrm, IIC_VBINiQ,
5125 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5128 // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
5129 // for equivalent operations with different register constraints; it just
5132 // Vector Absolute Differences.
5134 // VABD : Vector Absolute Difference
5135 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
5136 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5137 "vabd", "s", int_arm_neon_vabds, 1>;
5138 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
5139 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5140 "vabd", "u", int_arm_neon_vabdu, 1>;
5141 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
5142 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
5143 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5144 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
5145 def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
5146 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
5147 Requires<[HasNEON, HasFullFP16]>;
5148 def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5149 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
5150 Requires<[HasNEON, HasFullFP16]>;
5152 // VABDL : Vector Absolute Difference Long (Q = | D - D |)
5153 defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
5154 "vabdl", "s", int_arm_neon_vabds, zext, 1>;
5155 defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
5156 "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
5159 PatFrag<(ops node:$in1, node:$in2, node:$shift),
5160 (NEONvshrs (sub (zext node:$in1),
5161 (zext node:$in2)), (i32 $shift))>;
5163 def : Pat<(xor (v4i32 (bitconvert (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15)))),
5164 (v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)),
5165 (zext (v8i8 DPR:$opB))),
5166 (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15))))))),
5167 (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
5169 def : Pat<(xor (v4i32 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)),
5170 (v4i32 (add (sub (zext (v4i16 DPR:$opA)),
5171 (zext (v4i16 DPR:$opB))),
5172 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)))),
5173 (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
5175 def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
5176 (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
5177 (zext (v2i32 DPR:$opB))),
5178 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
5179 (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
5181 // VABA : Vector Absolute Difference and Accumulate
5182 defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5183 "vaba", "s", int_arm_neon_vabds, add>;
5184 defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5185 "vaba", "u", int_arm_neon_vabdu, add>;
5187 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
5188 defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
5189 "vabal", "s", int_arm_neon_vabds, zext, add>;
5190 defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
5191 "vabal", "u", int_arm_neon_vabdu, zext, add>;
5193 // Vector Maximum and Minimum.
5195 // VMAX : Vector Maximum
5196 defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
5197 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5198 "vmax", "s", smax, 1>;
5199 defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
5200 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5201 "vmax", "u", umax, 1>;
5202 def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
5204 v2f32, v2f32, fmaxnan, 1>;
5205 def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5207 v4f32, v4f32, fmaxnan, 1>;
5208 def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
5210 v4f16, v4f16, fmaxnan, 1>,
5211 Requires<[HasNEON, HasFullFP16]>;
5212 def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5214 v8f16, v8f16, fmaxnan, 1>,
5215 Requires<[HasNEON, HasFullFP16]>;
5218 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5219 def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
5220 N3RegFrm, NoItinerary, "vmaxnm", "f32",
5221 v2f32, v2f32, fmaxnum, 1>,
5222 Requires<[HasV8, HasNEON]>;
5223 def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
5224 N3RegFrm, NoItinerary, "vmaxnm", "f32",
5225 v4f32, v4f32, fmaxnum, 1>,
5226 Requires<[HasV8, HasNEON]>;
5227 def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
5228 N3RegFrm, NoItinerary, "vmaxnm", "f16",
5229 v4f16, v4f16, fmaxnum, 1>,
5230 Requires<[HasV8, HasNEON, HasFullFP16]>;
5231 def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
5232 N3RegFrm, NoItinerary, "vmaxnm", "f16",
5233 v8f16, v8f16, fmaxnum, 1>,
5234 Requires<[HasV8, HasNEON, HasFullFP16]>;
5237 // VMIN : Vector Minimum
5238 defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
5239 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5240 "vmin", "s", smin, 1>;
5241 defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
5242 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5243 "vmin", "u", umin, 1>;
5244 def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
5246 v2f32, v2f32, fminnan, 1>;
5247 def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5249 v4f32, v4f32, fminnan, 1>;
5250 def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
5252 v4f16, v4f16, fminnan, 1>,
5253 Requires<[HasNEON, HasFullFP16]>;
5254 def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5256 v8f16, v8f16, fminnan, 1>,
5257 Requires<[HasNEON, HasFullFP16]>;
5260 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5261 def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
5262 N3RegFrm, NoItinerary, "vminnm", "f32",
5263 v2f32, v2f32, fminnum, 1>,
5264 Requires<[HasV8, HasNEON]>;
5265 def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
5266 N3RegFrm, NoItinerary, "vminnm", "f32",
5267 v4f32, v4f32, fminnum, 1>,
5268 Requires<[HasV8, HasNEON]>;
5269 def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
5270 N3RegFrm, NoItinerary, "vminnm", "f16",
5271 v4f16, v4f16, fminnum, 1>,
5272 Requires<[HasV8, HasNEON, HasFullFP16]>;
5273 def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
5274 N3RegFrm, NoItinerary, "vminnm", "f16",
5275 v8f16, v8f16, fminnum, 1>,
5276 Requires<[HasV8, HasNEON, HasFullFP16]>;
5279 // Vector Pairwise Operations.
5281 // VPADD : Vector Pairwise Add
5282 def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5284 v8i8, v8i8, int_arm_neon_vpadd, 0>;
5285 def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5287 v4i16, v4i16, int_arm_neon_vpadd, 0>;
5288 def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5290 v2i32, v2i32, int_arm_neon_vpadd, 0>;
5291 def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
5292 IIC_VPBIND, "vpadd", "f32",
5293 v2f32, v2f32, int_arm_neon_vpadd, 0>;
5294 def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
5295 IIC_VPBIND, "vpadd", "f16",
5296 v4f16, v4f16, int_arm_neon_vpadd, 0>,
5297 Requires<[HasNEON, HasFullFP16]>;
5299 // VPADDL : Vector Pairwise Add Long
5300 defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
5301 int_arm_neon_vpaddls>;
5302 defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
5303 int_arm_neon_vpaddlu>;
5305 // VPADAL : Vector Pairwise Add and Accumulate Long
5306 defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
5307 int_arm_neon_vpadals>;
5308 defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
5309 int_arm_neon_vpadalu>;
5311 // VPMAX : Vector Pairwise Maximum
5312 def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5313 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
5314 def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5315 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
5316 def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5317 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
5318 def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5319 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
5320 def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5321 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
5322 def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5323 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
5324 def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5325 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
5326 def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5327 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
5328 Requires<[HasNEON, HasFullFP16]>;
5330 // VPMIN : Vector Pairwise Minimum
5331 def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5332 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
5333 def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5334 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
5335 def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5336 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
5337 def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5338 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
5339 def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5340 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
5341 def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5342 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
5343 def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5344 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
5345 def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5346 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
5347 Requires<[HasNEON, HasFullFP16]>;
5349 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
5351 // VRECPE : Vector Reciprocal Estimate
5352 def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5353 IIC_VUNAD, "vrecpe", "u32",
5354 v2i32, v2i32, int_arm_neon_vrecpe>;
5355 def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5356 IIC_VUNAQ, "vrecpe", "u32",
5357 v4i32, v4i32, int_arm_neon_vrecpe>;
5358 def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5359 IIC_VUNAD, "vrecpe", "f32",
5360 v2f32, v2f32, int_arm_neon_vrecpe>;
5361 def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5362 IIC_VUNAQ, "vrecpe", "f32",
5363 v4f32, v4f32, int_arm_neon_vrecpe>;
5364 def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5365 IIC_VUNAD, "vrecpe", "f16",
5366 v4f16, v4f16, int_arm_neon_vrecpe>,
5367 Requires<[HasNEON, HasFullFP16]>;
5368 def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5369 IIC_VUNAQ, "vrecpe", "f16",
5370 v8f16, v8f16, int_arm_neon_vrecpe>,
5371 Requires<[HasNEON, HasFullFP16]>;
5373 // VRECPS : Vector Reciprocal Step
5374 def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5375 IIC_VRECSD, "vrecps", "f32",
5376 v2f32, v2f32, int_arm_neon_vrecps, 1>;
5377 def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5378 IIC_VRECSQ, "vrecps", "f32",
5379 v4f32, v4f32, int_arm_neon_vrecps, 1>;
5380 def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5381 IIC_VRECSD, "vrecps", "f16",
5382 v4f16, v4f16, int_arm_neon_vrecps, 1>,
5383 Requires<[HasNEON, HasFullFP16]>;
5384 def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5385 IIC_VRECSQ, "vrecps", "f16",
5386 v8f16, v8f16, int_arm_neon_vrecps, 1>,
5387 Requires<[HasNEON, HasFullFP16]>;
5389 // VRSQRTE : Vector Reciprocal Square Root Estimate
5390 def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5391 IIC_VUNAD, "vrsqrte", "u32",
5392 v2i32, v2i32, int_arm_neon_vrsqrte>;
5393 def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5394 IIC_VUNAQ, "vrsqrte", "u32",
5395 v4i32, v4i32, int_arm_neon_vrsqrte>;
5396 def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5397 IIC_VUNAD, "vrsqrte", "f32",
5398 v2f32, v2f32, int_arm_neon_vrsqrte>;
5399 def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5400 IIC_VUNAQ, "vrsqrte", "f32",
5401 v4f32, v4f32, int_arm_neon_vrsqrte>;
5402 def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5403 IIC_VUNAD, "vrsqrte", "f16",
5404 v4f16, v4f16, int_arm_neon_vrsqrte>,
5405 Requires<[HasNEON, HasFullFP16]>;
5406 def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5407 IIC_VUNAQ, "vrsqrte", "f16",
5408 v8f16, v8f16, int_arm_neon_vrsqrte>,
5409 Requires<[HasNEON, HasFullFP16]>;
5411 // VRSQRTS : Vector Reciprocal Square Root Step
5412 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5413 IIC_VRECSD, "vrsqrts", "f32",
5414 v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
5415 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5416 IIC_VRECSQ, "vrsqrts", "f32",
5417 v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
5418 def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5419 IIC_VRECSD, "vrsqrts", "f16",
5420 v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
5421 Requires<[HasNEON, HasFullFP16]>;
5422 def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5423 IIC_VRECSQ, "vrsqrts", "f16",
5424 v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
5425 Requires<[HasNEON, HasFullFP16]>;
5429 // VSHL : Vector Shift
5430 defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
5431 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5432 "vshl", "s", int_arm_neon_vshifts>;
5433 defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
5434 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5435 "vshl", "u", int_arm_neon_vshiftu>;
5437 // VSHL : Vector Shift Left (Immediate)
5438 defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
5440 // VSHR : Vector Shift Right (Immediate)
5441 defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
5443 defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
5446 // VSHLL : Vector Shift Left Long
5447 defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
5448 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>;
5449 defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
5450 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>;
5452 // VSHLL : Vector Shift Left Long (with maximum shift count)
5453 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
5454 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
5455 ValueType OpTy, Operand ImmTy>
5456 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
5457 ResTy, OpTy, ImmTy, null_frag> {
5458 let Inst{21-16} = op21_16;
5459 let DecoderMethod = "DecodeVSHLMaxInstruction";
5461 def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
5463 def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
5464 v4i32, v4i16, imm16>;
5465 def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
5466 v2i64, v2i32, imm32>;
5468 def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))),
5469 (VSHLLi8 DPR:$Rn, 8)>;
5470 def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))),
5471 (VSHLLi16 DPR:$Rn, 16)>;
5472 def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))),
5473 (VSHLLi32 DPR:$Rn, 32)>;
5474 def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))),
5475 (VSHLLi8 DPR:$Rn, 8)>;
5476 def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))),
5477 (VSHLLi16 DPR:$Rn, 16)>;
5478 def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))),
5479 (VSHLLi32 DPR:$Rn, 32)>;
5481 // VSHRN : Vector Shift Right and Narrow
5482 defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
5483 PatFrag<(ops node:$Rn, node:$amt),
5484 (trunc (NEONvshrs node:$Rn, node:$amt))>>;
5486 def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))),
5487 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
5488 def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))),
5489 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
5490 def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))),
5491 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
5493 // VRSHL : Vector Rounding Shift
5494 defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
5495 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5496 "vrshl", "s", int_arm_neon_vrshifts>;
5497 defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
5498 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5499 "vrshl", "u", int_arm_neon_vrshiftu>;
5500 // VRSHR : Vector Rounding Shift Right
5501 defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
5503 defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
5506 // VRSHRN : Vector Rounding Shift Right and Narrow
5507 defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
5510 // VQSHL : Vector Saturating Shift
5511 defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
5512 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5513 "vqshl", "s", int_arm_neon_vqshifts>;
5514 defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
5515 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5516 "vqshl", "u", int_arm_neon_vqshiftu>;
5517 // VQSHL : Vector Saturating Shift Left (Immediate)
5518 defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
5519 defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
5521 // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
5522 defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
5524 // VQSHRN : Vector Saturating Shift Right and Narrow
5525 defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
5527 defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
5530 // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
5531 defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
5534 // VQRSHL : Vector Saturating Rounding Shift
5535 defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
5536 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5537 "vqrshl", "s", int_arm_neon_vqrshifts>;
5538 defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
5539 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5540 "vqrshl", "u", int_arm_neon_vqrshiftu>;
5542 // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
5543 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
5545 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
5548 // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
5549 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
5552 // VSRA : Vector Shift Right and Accumulate
5553 defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
5554 defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
5555 // VRSRA : Vector Rounding Shift Right and Accumulate
5556 defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
5557 defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
5559 // VSLI : Vector Shift Left and Insert
5560 defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
5562 // VSRI : Vector Shift Right and Insert
5563 defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
5565 // Vector Absolute and Saturating Absolute.
5567 // VABS : Vector Absolute Value
5568 defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
5569 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>;
5570 def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
5572 v2f32, v2f32, fabs>;
5573 def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
5575 v4f32, v4f32, fabs>;
5576 def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
5578 v4f16, v4f16, fabs>,
5579 Requires<[HasNEON, HasFullFP16]>;
5580 def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
5582 v8f16, v8f16, fabs>,
5583 Requires<[HasNEON, HasFullFP16]>;
5585 // VQABS : Vector Saturating Absolute Value
5586 defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
5587 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
5588 int_arm_neon_vqabs>;
5592 def vnegd : PatFrag<(ops node:$in),
5593 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
5594 def vnegq : PatFrag<(ops node:$in),
5595 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
5597 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
5598 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
5599 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
5600 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
5601 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
5602 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
5603 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
5604 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
5606 // VNEG : Vector Negate (integer)
5607 def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
5608 def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
5609 def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
5610 def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
5611 def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
5612 def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
5614 // VNEG : Vector Negate (floating-point)
5615 def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
5616 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
5617 "vneg", "f32", "$Vd, $Vm", "",
5618 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
5619 def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
5620 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
5621 "vneg", "f32", "$Vd, $Vm", "",
5622 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
5623 def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
5624 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
5625 "vneg", "f16", "$Vd, $Vm", "",
5626 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
5627 Requires<[HasNEON, HasFullFP16]>;
5628 def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
5629 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
5630 "vneg", "f16", "$Vd, $Vm", "",
5631 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
5632 Requires<[HasNEON, HasFullFP16]>;
5634 def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
5635 def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
5636 def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
5637 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
5638 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
5639 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
5641 // VQNEG : Vector Saturating Negate
5642 defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
5643 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
5644 int_arm_neon_vqneg>;
5646 // Vector Bit Counting Operations.
5648 // VCLS : Vector Count Leading Sign Bits
5649 defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
5650 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
5652 // VCLZ : Vector Count Leading Zeros
5653 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
5654 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
5656 // VCNT : Vector Count One Bits
5657 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
5658 IIC_VCNTiD, "vcnt", "8",
5660 def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
5661 IIC_VCNTiQ, "vcnt", "8",
5662 v16i8, v16i8, ctpop>;
5665 def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
5666 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
5667 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
5669 def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
5670 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
5671 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
5674 // Vector Move Operations.
5676 // VMOV : Vector Move (Register)
5677 def : NEONInstAlias<"vmov${p} $Vd, $Vm",
5678 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
5679 def : NEONInstAlias<"vmov${p} $Vd, $Vm",
5680 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
5682 // VMOV : Vector Move (Immediate)
5684 // Although VMOVs are not strictly speaking cheap, they are as expensive
5685 // as their copies counterpart (VORR), so we should prefer rematerialization
5686 // over splitting when it applies.
5687 let isReMaterializable = 1, isAsCheapAsAMove=1 in {
5688 def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
5689 (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
5690 "vmov", "i8", "$Vd, $SIMM", "",
5691 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
5692 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
5693 (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
5694 "vmov", "i8", "$Vd, $SIMM", "",
5695 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
5697 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
5698 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5699 "vmov", "i16", "$Vd, $SIMM", "",
5700 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
5701 let Inst{9} = SIMM{9};
5704 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
5705 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5706 "vmov", "i16", "$Vd, $SIMM", "",
5707 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
5708 let Inst{9} = SIMM{9};
5711 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
5712 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5713 "vmov", "i32", "$Vd, $SIMM", "",
5714 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
5715 let Inst{11-8} = SIMM{11-8};
5718 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
5719 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5720 "vmov", "i32", "$Vd, $SIMM", "",
5721 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
5722 let Inst{11-8} = SIMM{11-8};
5725 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
5726 (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
5727 "vmov", "i64", "$Vd, $SIMM", "",
5728 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
5729 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
5730 (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
5731 "vmov", "i64", "$Vd, $SIMM", "",
5732 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
5734 def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
5735 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
5736 "vmov", "f32", "$Vd, $SIMM", "",
5737 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
5738 def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
5739 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
5740 "vmov", "f32", "$Vd, $SIMM", "",
5741 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
5742 } // isReMaterializable, isAsCheapAsAMove
5744 // Add support for bytes replication feature, so it could be GAS compatible.
5745 // E.g. instructions below:
5746 // "vmov.i32 d0, 0xffffffff"
5747 // "vmov.i32 d0, 0xabababab"
5748 // "vmov.i16 d0, 0xabab"
5749 // are incorrect, but we could deal with such cases.
5750 // For last two instructions, for example, it should emit:
5751 // "vmov.i8 d0, 0xab"
5752 def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm",
5753 (VMOVv8i8 DPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>;
5754 def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm",
5755 (VMOVv8i8 DPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>;
5756 def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm",
5757 (VMOVv16i8 QPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>;
5758 def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm",
5759 (VMOVv16i8 QPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>;
5761 // Also add same support for VMVN instructions. So instruction:
5762 // "vmvn.i32 d0, 0xabababab"
5764 // "vmov.i8 d0, 0x54"
5765 def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm",
5766 (VMOVv8i8 DPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>;
5767 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm",
5768 (VMOVv8i8 DPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>;
5769 def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm",
5770 (VMOVv16i8 QPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>;
5771 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm",
5772 (VMOVv16i8 QPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>;
5774 // On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
5775 // require zero cycles to execute so they should be used wherever possible for
5776 // setting a register to zero.
5778 // Even without these pseudo-insts we would probably end up with the correct
5779 // instruction, but we could not mark the general ones with "isAsCheapAsAMove"
5780 // since they are sometimes rather expensive (in general).
5782 let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
5783 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
5784 [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
5785 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
5787 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
5788 [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
5789 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
5793 // VMOV : Vector Get Lane (move scalar to ARM core register)
5795 def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
5796 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
5797 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
5798 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
5800 let Inst{21} = lane{2};
5801 let Inst{6-5} = lane{1-0};
5803 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
5804 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
5805 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
5806 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
5808 let Inst{21} = lane{1};
5809 let Inst{6} = lane{0};
5811 def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
5812 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
5813 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
5814 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
5816 let Inst{21} = lane{2};
5817 let Inst{6-5} = lane{1-0};
5819 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
5820 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
5821 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
5822 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
5824 let Inst{21} = lane{1};
5825 let Inst{6} = lane{0};
5827 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
5828 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
5829 IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
5830 [(set GPR:$R, (extractelt (v2i32 DPR:$V),
5832 Requires<[HasVFP2, HasFastVGETLNi32]> {
5833 let Inst{21} = lane{0};
5835 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
5836 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
5837 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
5838 (DSubReg_i8_reg imm:$lane))),
5839 (SubReg_i8_lane imm:$lane))>;
5840 def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
5841 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
5842 (DSubReg_i16_reg imm:$lane))),
5843 (SubReg_i16_lane imm:$lane))>;
5844 def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
5845 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
5846 (DSubReg_i8_reg imm:$lane))),
5847 (SubReg_i8_lane imm:$lane))>;
5848 def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
5849 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
5850 (DSubReg_i16_reg imm:$lane))),
5851 (SubReg_i16_lane imm:$lane))>;
5852 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
5853 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
5854 (DSubReg_i32_reg imm:$lane))),
5855 (SubReg_i32_lane imm:$lane))>,
5856 Requires<[HasNEON, HasFastVGETLNi32]>;
5857 def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
5859 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
5860 Requires<[HasNEON, HasSlowVGETLNi32]>;
5861 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
5863 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
5864 Requires<[HasNEON, HasSlowVGETLNi32]>;
5865 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
5866 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
5867 (SSubReg_f32_reg imm:$src2))>;
5868 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
5869 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
5870 (SSubReg_f32_reg imm:$src2))>;
5871 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
5872 // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
5873 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
5874 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
5877 // VMOV : Vector Set Lane (move ARM core register to scalar)
5879 let Constraints = "$src1 = $V" in {
5880 def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
5881 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
5882 IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
5883 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
5884 GPR:$R, imm:$lane))]> {
5885 let Inst{21} = lane{2};
5886 let Inst{6-5} = lane{1-0};
5888 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
5889 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
5890 IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
5891 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
5892 GPR:$R, imm:$lane))]> {
5893 let Inst{21} = lane{1};
5894 let Inst{6} = lane{0};
5896 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
5897 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
5898 IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
5899 [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
5900 GPR:$R, imm:$lane))]>,
5901 Requires<[HasVFP2]> {
5902 let Inst{21} = lane{0};
5903 // This instruction is equivalent as
5904 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
5905 let isInsertSubreg = 1;
5908 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
5909 (v16i8 (INSERT_SUBREG QPR:$src1,
5910 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
5911 (DSubReg_i8_reg imm:$lane))),
5912 GPR:$src2, (SubReg_i8_lane imm:$lane))),
5913 (DSubReg_i8_reg imm:$lane)))>;
5914 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
5915 (v8i16 (INSERT_SUBREG QPR:$src1,
5916 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
5917 (DSubReg_i16_reg imm:$lane))),
5918 GPR:$src2, (SubReg_i16_lane imm:$lane))),
5919 (DSubReg_i16_reg imm:$lane)))>;
5920 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
5921 (v4i32 (INSERT_SUBREG QPR:$src1,
5922 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
5923 (DSubReg_i32_reg imm:$lane))),
5924 GPR:$src2, (SubReg_i32_lane imm:$lane))),
5925 (DSubReg_i32_reg imm:$lane)))>;
5927 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
5928 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
5929 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
5930 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
5931 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
5932 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
5934 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
5935 // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
5936 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
5937 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
5939 def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
5940 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
5941 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
5942 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
5943 def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
5944 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
5946 def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
5947 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
5948 def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
5949 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
5950 def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
5951 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
5953 def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
5954 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5955 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
5957 def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
5958 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
5959 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
5961 def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
5962 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
5963 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
5966 // VDUP : Vector Duplicate (from ARM core register to all elements)
5968 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
5969 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
5970 IIC_VMOVIS, "vdup", Dt, "$V, $R",
5971 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
5972 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
5973 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
5974 IIC_VMOVIS, "vdup", Dt, "$V, $R",
5975 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
5977 def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
5978 def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
5979 def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
5980 Requires<[HasNEON, HasFastVDUP32]>;
5981 def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
5982 def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
5983 def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
5985 // NEONvdup patterns for uarchs with fast VDUP.32.
5986 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
5987 Requires<[HasNEON,HasFastVDUP32]>;
5988 def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
5990 // NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
5991 def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
5992 Requires<[HasNEON,HasSlowVDUP32]>;
5993 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
5994 Requires<[HasNEON,HasSlowVDUP32]>;
5996 // VDUP : Vector Duplicate Lane (from scalar to all elements)
5998 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
5999 ValueType Ty, Operand IdxTy>
6000 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6001 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
6002 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
6004 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
6005 ValueType ResTy, ValueType OpTy, Operand IdxTy>
6006 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6007 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
6008 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
6009 VectorIndex32:$lane)))]>;
6011 // Inst{19-16} is partially specified depending on the element size.
6013 def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
6015 let Inst{19-17} = lane{2-0};
6017 def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
6019 let Inst{19-18} = lane{1-0};
6021 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
6023 let Inst{19} = lane{0};
6025 def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
6027 let Inst{19-17} = lane{2-0};
6029 def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
6031 let Inst{19-18} = lane{1-0};
6033 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
6035 let Inst{19} = lane{0};
6038 def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
6039 (VDUPLN32d DPR:$Vm, imm:$lane)>;
6041 def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
6042 (VDUPLN32q DPR:$Vm, imm:$lane)>;
6044 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
6045 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
6046 (DSubReg_i8_reg imm:$lane))),
6047 (SubReg_i8_lane imm:$lane)))>;
6048 def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
6049 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
6050 (DSubReg_i16_reg imm:$lane))),
6051 (SubReg_i16_lane imm:$lane)))>;
6052 def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
6053 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
6054 (DSubReg_i32_reg imm:$lane))),
6055 (SubReg_i32_lane imm:$lane)))>;
6056 def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
6057 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
6058 (DSubReg_i32_reg imm:$lane))),
6059 (SubReg_i32_lane imm:$lane)))>;
6061 def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))),
6062 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6063 SPR:$src, ssub_0), (i32 0)))>;
6064 def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))),
6065 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6066 SPR:$src, ssub_0), (i32 0)))>;
6068 // VMOVN : Vector Narrowing Move
6069 defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
6070 "vmovn", "i", trunc>;
6071 // VQMOVN : Vector Saturating Narrowing Move
6072 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
6073 "vqmovn", "s", int_arm_neon_vqmovns>;
6074 defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
6075 "vqmovn", "u", int_arm_neon_vqmovnu>;
6076 defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
6077 "vqmovun", "s", int_arm_neon_vqmovnsu>;
6078 // VMOVL : Vector Lengthening Move
6079 defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
6080 defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
6081 def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
6082 def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
6083 def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
6085 // Vector Conversions.
6087 // VCVT : Vector Convert Between Floating-Point and Integers
6088 def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6089 v2i32, v2f32, fp_to_sint>;
6090 def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6091 v2i32, v2f32, fp_to_uint>;
6092 def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6093 v2f32, v2i32, sint_to_fp>;
6094 def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6095 v2f32, v2i32, uint_to_fp>;
6097 def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6098 v4i32, v4f32, fp_to_sint>;
6099 def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6100 v4i32, v4f32, fp_to_uint>;
6101 def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6102 v4f32, v4i32, sint_to_fp>;
6103 def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6104 v4f32, v4i32, uint_to_fp>;
6106 def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6107 v4i16, v4f16, fp_to_sint>,
6108 Requires<[HasNEON, HasFullFP16]>;
6109 def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6110 v4i16, v4f16, fp_to_uint>,
6111 Requires<[HasNEON, HasFullFP16]>;
6112 def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6113 v4f16, v4i16, sint_to_fp>,
6114 Requires<[HasNEON, HasFullFP16]>;
6115 def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6116 v4f16, v4i16, uint_to_fp>,
6117 Requires<[HasNEON, HasFullFP16]>;
6119 def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6120 v8i16, v8f16, fp_to_sint>,
6121 Requires<[HasNEON, HasFullFP16]>;
6122 def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6123 v8i16, v8f16, fp_to_uint>,
6124 Requires<[HasNEON, HasFullFP16]>;
6125 def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6126 v8f16, v8i16, sint_to_fp>,
6127 Requires<[HasNEON, HasFullFP16]>;
6128 def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6129 v8f16, v8i16, uint_to_fp>,
6130 Requires<[HasNEON, HasFullFP16]>;
6133 multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
6134 SDPatternOperator IntU> {
6135 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
6136 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6137 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
6138 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6139 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
6140 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6141 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
6142 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6143 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
6144 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6145 "s16.f16", v4i16, v4f16, IntS>,
6146 Requires<[HasV8, HasNEON, HasFullFP16]>;
6147 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6148 "s16.f16", v8i16, v8f16, IntS>,
6149 Requires<[HasV8, HasNEON, HasFullFP16]>;
6150 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6151 "u16.f16", v4i16, v4f16, IntU>,
6152 Requires<[HasV8, HasNEON, HasFullFP16]>;
6153 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6154 "u16.f16", v8i16, v8f16, IntU>,
6155 Requires<[HasV8, HasNEON, HasFullFP16]>;
6159 defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
6160 defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
6161 defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
6162 defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
6164 // VCVT : Vector Convert Between Floating-Point and Fixed-Point.
6165 let DecoderMethod = "DecodeVCVTD" in {
6166 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6167 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
6168 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6169 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
6170 def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6171 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
6172 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6173 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
6174 let Predicates = [HasNEON, HasFullFP16] in {
6175 def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6176 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
6177 def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6178 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
6179 def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6180 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
6181 def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6182 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
6183 } // Predicates = [HasNEON, HasFullFP16]
6186 let DecoderMethod = "DecodeVCVTQ" in {
6187 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6188 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
6189 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6190 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
6191 def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6192 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
6193 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6194 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
6195 let Predicates = [HasNEON, HasFullFP16] in {
6196 def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6197 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
6198 def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6199 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
6200 def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6201 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
6202 def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6203 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
6204 } // Predicates = [HasNEON, HasFullFP16]
6207 def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
6208 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6209 def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
6210 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6211 def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
6212 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6213 def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
6214 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6216 def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
6217 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6218 def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
6219 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6220 def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
6221 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6222 def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
6223 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6225 def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
6226 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6227 def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
6228 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6229 def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
6230 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6231 def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
6232 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6234 def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
6235 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6236 def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
6237 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6238 def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
6239 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6240 def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
6241 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6244 // VCVT : Vector Convert Between Half-Precision and Single-Precision.
6245 def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
6246 IIC_VUNAQ, "vcvt", "f16.f32",
6247 v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
6248 Requires<[HasNEON, HasFP16]>;
6249 def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
6250 IIC_VUNAQ, "vcvt", "f32.f16",
6251 v4f32, v4i16, int_arm_neon_vcvthf2fp>,
6252 Requires<[HasNEON, HasFP16]>;
6256 // VREV64 : Vector Reverse elements within 64-bit doublewords
6258 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6259 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
6260 (ins DPR:$Vm), IIC_VMOVD,
6261 OpcodeStr, Dt, "$Vd, $Vm", "",
6262 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
6263 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6264 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
6265 (ins QPR:$Vm), IIC_VMOVQ,
6266 OpcodeStr, Dt, "$Vd, $Vm", "",
6267 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
6269 def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
6270 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
6271 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
6272 def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
6274 def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
6275 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
6276 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
6277 def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
6279 // VREV32 : Vector Reverse elements within 32-bit words
6281 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6282 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
6283 (ins DPR:$Vm), IIC_VMOVD,
6284 OpcodeStr, Dt, "$Vd, $Vm", "",
6285 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
6286 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6287 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
6288 (ins QPR:$Vm), IIC_VMOVQ,
6289 OpcodeStr, Dt, "$Vd, $Vm", "",
6290 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
6292 def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
6293 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
6295 def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
6296 def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
6298 // VREV16 : Vector Reverse elements within 16-bit halfwords
6300 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6301 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
6302 (ins DPR:$Vm), IIC_VMOVD,
6303 OpcodeStr, Dt, "$Vd, $Vm", "",
6304 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
6305 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6306 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
6307 (ins QPR:$Vm), IIC_VMOVQ,
6308 OpcodeStr, Dt, "$Vd, $Vm", "",
6309 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
6311 def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
6312 def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
6314 // Other Vector Shuffles.
6316 // Aligned extractions: really just dropping registers
6318 class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
6319 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
6320 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
6322 def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
6324 def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
6326 def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
6328 def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
6330 def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
6333 // VEXT : Vector Extract
6336 // All of these have a two-operand InstAlias.
6337 let TwoOperandAliasConstraint = "$Vn = $Vd" in {
6338 class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6339 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
6340 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
6341 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6342 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
6343 (Ty DPR:$Vm), imm:$index)))]> {
6346 let Inst{10-8} = index{2-0};
6349 class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6350 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
6351 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
6352 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6353 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
6354 (Ty QPR:$Vm), imm:$index)))]> {
6356 let Inst{11-8} = index{3-0};
6360 def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
6361 let Inst{10-8} = index{2-0};
6363 def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
6364 let Inst{10-9} = index{1-0};
6367 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
6368 let Inst{10} = index{0};
6369 let Inst{9-8} = 0b00;
6371 def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
6374 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
6376 def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
6377 let Inst{11-8} = index{3-0};
6379 def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
6380 let Inst{11-9} = index{2-0};
6383 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
6384 let Inst{11-10} = index{1-0};
6385 let Inst{9-8} = 0b00;
6387 def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
6388 let Inst{11} = index{0};
6389 let Inst{10-8} = 0b000;
6391 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
6394 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
6396 // VTRN : Vector Transpose
6398 def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
6399 def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
6400 def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
6402 def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
6403 def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
6404 def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
6406 // VUZP : Vector Unzip (Deinterleave)
6408 def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
6409 def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
6410 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6411 def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
6412 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6414 def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
6415 def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
6416 def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
6418 // VZIP : Vector Zip (Interleave)
6420 def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
6421 def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
6422 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6423 def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
6424 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6426 def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
6427 def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
6428 def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
6430 // Vector Table Lookup and Table Extension.
6432 // VTBL : Vector Table Lookup
6433 let DecoderMethod = "DecodeTBLInstruction" in {
6435 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
6436 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
6437 "vtbl", "8", "$Vd, $Vn, $Vm", "",
6438 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
6440 let hasExtraSrcRegAllocReq = 1 in {
6442 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
6443 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
6444 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6446 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
6447 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
6448 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6450 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
6451 (ins VecListFourD:$Vn, DPR:$Vm),
6453 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6454 } // hasExtraSrcRegAllocReq = 1
6457 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
6459 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
6461 // VTBX : Vector Table Extension
6463 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
6464 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
6465 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
6466 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
6467 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
6468 let hasExtraSrcRegAllocReq = 1 in {
6470 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
6471 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
6472 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
6474 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
6475 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
6476 NVTBLFrm, IIC_VTBX3,
6477 "vtbx", "8", "$Vd, $Vn, $Vm",
6480 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
6481 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
6482 "vtbx", "8", "$Vd, $Vn, $Vm",
6484 } // hasExtraSrcRegAllocReq = 1
6487 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
6488 IIC_VTBX3, "$orig = $dst", []>;
6490 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
6491 IIC_VTBX4, "$orig = $dst", []>;
6492 } // DecoderMethod = "DecodeTBLInstruction"
6494 def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
6495 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
6498 def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
6500 (v8i8 (VTBX2 v8i8:$orig,
6501 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
6505 def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
6506 v8i8:$Vn2, v8i8:$Vm)),
6507 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6510 (v8i8 (IMPLICIT_DEF)), dsub_3),
6512 def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
6513 v8i8:$Vn2, v8i8:$Vm)),
6514 (v8i8 (VTBX3Pseudo v8i8:$orig,
6515 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6518 (v8i8 (IMPLICIT_DEF)), dsub_3),
6521 def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
6522 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
6523 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6528 def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
6529 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
6530 (v8i8 (VTBX4Pseudo v8i8:$orig,
6531 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
6537 // VRINT : Vector Rounding
6538 multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
6539 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
6540 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
6541 !strconcat("vrint", op), "f32",
6542 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
6543 let Inst{9-7} = op9_7;
6545 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
6546 !strconcat("vrint", op), "f32",
6547 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
6548 let Inst{9-7} = op9_7;
6550 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
6551 !strconcat("vrint", op), "f16",
6553 Requires<[HasV8, HasNEON, HasFullFP16]> {
6554 let Inst{9-7} = op9_7;
6556 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
6557 !strconcat("vrint", op), "f16",
6559 Requires<[HasV8, HasNEON, HasFullFP16]> {
6560 let Inst{9-7} = op9_7;
6564 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
6565 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
6566 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
6567 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
6568 let Predicates = [HasNEON, HasFullFP16] in {
6569 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
6570 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
6571 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
6572 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
6576 defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
6577 defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
6578 defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
6579 defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
6580 defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
6581 defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
6583 // Cryptography instructions
6584 let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
6585 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
6586 class AES<string op, bit op7, bit op6, SDPatternOperator Int>
6587 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
6588 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
6589 Requires<[HasV8, HasCrypto]>;
6590 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
6591 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
6592 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
6593 Requires<[HasV8, HasCrypto]>;
6594 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
6595 SDPatternOperator Int>
6596 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
6597 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
6598 Requires<[HasV8, HasCrypto]>;
6599 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
6600 SDPatternOperator Int>
6601 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
6602 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
6603 Requires<[HasV8, HasCrypto]>;
6604 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
6605 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
6606 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
6607 Requires<[HasV8, HasCrypto]>;
6610 def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
6611 def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
6612 def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
6613 def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
6615 def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
6616 def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
6617 def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
6618 def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
6619 def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
6620 def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
6621 def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
6622 def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
6623 def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
6624 def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
6626 def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
6627 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
6628 (SHA1H (SUBREG_TO_REG (i64 0),
6629 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
6633 def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
6634 (SHA1C v4i32:$hash_abcd,
6635 (SUBREG_TO_REG (i64 0),
6636 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
6640 def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
6641 (SHA1M v4i32:$hash_abcd,
6642 (SUBREG_TO_REG (i64 0),
6643 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
6647 def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
6648 (SHA1P v4i32:$hash_abcd,
6649 (SUBREG_TO_REG (i64 0),
6650 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
6654 //===----------------------------------------------------------------------===//
6655 // NEON instructions for single-precision FP math
6656 //===----------------------------------------------------------------------===//
6658 class N2VSPat<SDNode OpNode, NeonI Inst>
6659 : NEONFPPat<(f32 (OpNode SPR:$a)),
6661 (v2f32 (COPY_TO_REGCLASS (Inst
6663 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6664 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
6666 class N3VSPat<SDNode OpNode, NeonI Inst>
6667 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
6669 (v2f32 (COPY_TO_REGCLASS (Inst
6671 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6674 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6675 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
6677 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
6678 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
6680 (v2f32 (COPY_TO_REGCLASS (Inst
6682 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6685 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6688 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6689 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
6691 class NVCVTIFPat<SDNode OpNode, NeonI Inst>
6692 : NEONFPPat<(f32 (OpNode GPR:$a)),
6693 (f32 (EXTRACT_SUBREG
6696 (v2f32 (IMPLICIT_DEF)),
6697 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
6699 class NVCVTFIPat<SDNode OpNode, NeonI Inst>
6700 : NEONFPPat<(i32 (OpNode SPR:$a)),
6701 (i32 (EXTRACT_SUBREG
6702 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6706 def : N3VSPat<fadd, VADDfd>;
6707 def : N3VSPat<fsub, VSUBfd>;
6708 def : N3VSPat<fmul, VMULfd>;
6709 def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
6710 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
6711 def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
6712 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
6713 def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
6714 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
6715 def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
6716 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
6717 def : N2VSPat<fabs, VABSfd>;
6718 def : N2VSPat<fneg, VNEGfd>;
6719 def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>;
6720 def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>;
6721 def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
6722 def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
6723 def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
6724 def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
6726 // NEON doesn't have any f64 conversions, so provide patterns to make
6727 // sure the VFP conversions match when extracting from a vector.
6728 def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
6729 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
6730 def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
6731 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
6732 def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
6733 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
6734 def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
6735 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
6738 // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
6739 def : Pat<(f32 (bitconvert GPR:$a)),
6740 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
6741 Requires<[HasNEON, DontUseVMOVSR]>;
6743 //===----------------------------------------------------------------------===//
6744 // Non-Instruction Patterns
6745 //===----------------------------------------------------------------------===//
6748 let Predicates = [IsLE] in {
6749 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
6750 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
6751 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
6753 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
6754 let Predicates = [IsLE] in {
6755 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
6756 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
6757 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
6758 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
6759 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
6761 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
6762 let Predicates = [IsLE] in {
6763 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
6764 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
6765 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
6766 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
6767 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
6768 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
6769 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
6770 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
6771 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
6772 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
6774 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
6775 let Predicates = [IsLE] in {
6776 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
6777 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
6778 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
6779 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
6780 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
6781 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
6783 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
6784 let Predicates = [IsLE] in {
6785 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
6786 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
6789 let Predicates = [IsLE] in {
6790 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
6791 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
6792 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
6794 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
6795 let Predicates = [IsLE] in {
6796 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
6797 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
6798 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
6799 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
6800 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
6802 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
6803 let Predicates = [IsLE] in {
6804 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
6805 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
6806 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
6807 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
6808 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
6809 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
6810 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
6811 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
6812 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
6813 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
6814 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
6816 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
6817 let Predicates = [IsLE] in {
6818 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
6819 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
6820 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
6822 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
6823 let Predicates = [IsLE] in {
6824 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
6825 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
6826 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
6827 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
6830 let Predicates = [IsBE] in {
6831 // 64 bit conversions
6832 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
6833 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
6834 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
6835 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
6836 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
6837 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
6838 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
6839 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
6840 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
6841 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
6842 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
6843 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
6844 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
6845 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
6846 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
6847 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
6848 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
6849 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
6850 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
6851 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
6852 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
6853 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
6854 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
6855 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
6856 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
6857 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
6859 // 128 bit conversions
6860 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
6861 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
6862 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
6863 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
6864 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
6865 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
6866 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
6867 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
6868 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
6869 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
6870 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
6871 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
6872 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
6873 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
6874 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
6875 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
6876 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
6877 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
6878 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
6879 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
6880 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
6881 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
6882 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
6883 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
6884 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
6885 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
6888 // Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
6889 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
6890 (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>;
6891 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
6892 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>;
6893 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
6894 (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>;
6895 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
6896 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>;
6898 // Fold extracting an element out of a v2i32 into a vfp register.
6899 def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
6900 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
6902 // Vector lengthening move with load, matching extending loads.
6904 // extload, zextload and sextload for a standard lengthening load. Example:
6905 // Lengthen_Single<"8", "i16", "8"> =
6906 // Pat<(v8i16 (extloadvi8 addrmode6:$addr))
6907 // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
6908 // (f64 (IMPLICIT_DEF)), (i32 0)))>;
6909 multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
6910 let AddedComplexity = 10 in {
6911 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6912 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
6913 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
6914 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
6916 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6917 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
6918 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
6919 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
6921 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6922 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
6923 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
6924 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
6928 // extload, zextload and sextload for a lengthening load which only uses
6929 // half the lanes available. Example:
6930 // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
6931 // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
6932 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
6933 // (f64 (IMPLICIT_DEF)), (i32 0))),
6935 multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
6936 string InsnLanes, string InsnTy> {
6937 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6938 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
6939 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
6940 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
6942 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6943 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
6944 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
6945 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
6947 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6948 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
6949 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
6950 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
6954 // The following class definition is basically a copy of the
6955 // Lengthen_HalfSingle definition above, however with an additional parameter
6956 // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
6957 // data loaded by VLD1LN into proper vector format in big endian mode.
6958 multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
6959 string InsnLanes, string InsnTy, string RevLanes> {
6960 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6961 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
6962 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
6963 (!cast<Instruction>("VREV32d" # RevLanes)
6964 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
6966 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6967 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
6968 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
6969 (!cast<Instruction>("VREV32d" # RevLanes)
6970 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
6972 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6973 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
6974 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
6975 (!cast<Instruction>("VREV32d" # RevLanes)
6976 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
6980 // extload, zextload and sextload for a lengthening load followed by another
6981 // lengthening load, to quadruple the initial length.
6983 // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
6984 // Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
6985 // (EXTRACT_SUBREG (VMOVLuv4i32
6986 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
6987 // (f64 (IMPLICIT_DEF)),
6991 multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
6992 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
6994 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6995 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
6996 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
6997 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
6998 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7000 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7001 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7002 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7003 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7004 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7006 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7007 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7008 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7009 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7010 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7014 // The following class definition is basically a copy of the
7015 // Lengthen_Double definition above, however with an additional parameter
7016 // "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7017 // data loaded by VLD1LN into proper vector format in big endian mode.
7018 multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7019 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7020 string Insn2Ty, string RevLanes> {
7021 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7022 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7023 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7024 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7025 (!cast<Instruction>("VREV32d" # RevLanes)
7026 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7028 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7029 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7030 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7031 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7032 (!cast<Instruction>("VREV32d" # RevLanes)
7033 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7035 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7036 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7037 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7038 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7039 (!cast<Instruction>("VREV32d" # RevLanes)
7040 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7044 // extload, zextload and sextload for a lengthening load followed by another
7045 // lengthening load, to quadruple the initial length, but which ends up only
7046 // requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
7048 // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
7049 // Pat<(v2i32 (extloadvi8 addrmode6:$addr))
7050 // (EXTRACT_SUBREG (VMOVLuv4i32
7051 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
7052 // (f64 (IMPLICIT_DEF)), (i32 0))),
7055 multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
7056 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7058 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7059 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7060 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7061 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7062 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7065 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7066 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7067 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7068 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7069 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7072 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7073 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7074 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7075 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7076 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7081 // The following class definition is basically a copy of the
7082 // Lengthen_HalfDouble definition above, however with an additional VREV16d8
7083 // instruction to convert data loaded by VLD1LN into proper vector format
7084 // in big endian mode.
7085 multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7086 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7088 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7089 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7090 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7091 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7092 (!cast<Instruction>("VREV16d8")
7093 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7096 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7097 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7098 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7099 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7100 (!cast<Instruction>("VREV16d8")
7101 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7104 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7105 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7106 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7107 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7108 (!cast<Instruction>("VREV16d8")
7109 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7114 defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
7115 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
7116 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
7118 let Predicates = [IsLE] in {
7119 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
7120 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
7122 // Double lengthening - v4i8 -> v4i16 -> v4i32
7123 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
7124 // v2i8 -> v2i16 -> v2i32
7125 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
7126 // v2i16 -> v2i32 -> v2i64
7127 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
7130 let Predicates = [IsBE] in {
7131 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
7132 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
7134 // Double lengthening - v4i8 -> v4i16 -> v4i32
7135 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
7136 // v2i8 -> v2i16 -> v2i32
7137 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
7138 // v2i16 -> v2i32 -> v2i64
7139 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
7142 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
7143 let Predicates = [IsLE] in {
7144 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7145 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7146 (VLD1LNd16 addrmode6:$addr,
7147 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7148 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7149 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7150 (VLD1LNd16 addrmode6:$addr,
7151 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7152 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7153 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7154 (VLD1LNd16 addrmode6:$addr,
7155 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7157 // The following patterns are basically a copy of the patterns above,
7158 // however with an additional VREV16d instruction to convert data
7159 // loaded by VLD1LN into proper vector format in big endian mode.
7160 let Predicates = [IsBE] in {
7161 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7162 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7163 (!cast<Instruction>("VREV16d8")
7164 (VLD1LNd16 addrmode6:$addr,
7165 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7166 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7167 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7168 (!cast<Instruction>("VREV16d8")
7169 (VLD1LNd16 addrmode6:$addr,
7170 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7171 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7172 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7173 (!cast<Instruction>("VREV16d8")
7174 (VLD1LNd16 addrmode6:$addr,
7175 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7178 def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
7179 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7180 def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7181 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7182 def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
7183 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7184 def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
7185 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7186 def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7187 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7189 //===----------------------------------------------------------------------===//
7190 // Assembler aliases
7193 def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
7194 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
7195 def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
7196 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
7198 // VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
7199 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7200 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7201 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
7202 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7203 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
7204 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7205 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
7206 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7207 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
7208 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7209 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
7210 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7211 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
7212 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7213 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
7214 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7215 // ... two-operand aliases
7216 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
7217 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7218 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
7219 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7220 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
7221 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7222 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
7223 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7224 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
7225 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
7226 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
7227 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
7229 def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
7230 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
7231 def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
7232 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
7233 def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
7234 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
7235 def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
7236 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
7239 // VLD1 single-lane pseudo-instructions. These need special handling for
7240 // the lane index that an InstAlias can't handle, so we use these instead.
7241 def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
7242 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7244 def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
7245 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7247 def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
7248 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7251 def VLD1LNdWB_fixed_Asm_8 :
7252 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
7253 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7255 def VLD1LNdWB_fixed_Asm_16 :
7256 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
7257 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7259 def VLD1LNdWB_fixed_Asm_32 :
7260 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
7261 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7263 def VLD1LNdWB_register_Asm_8 :
7264 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
7265 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7266 rGPR:$Rm, pred:$p)>;
7267 def VLD1LNdWB_register_Asm_16 :
7268 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
7269 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7270 rGPR:$Rm, pred:$p)>;
7271 def VLD1LNdWB_register_Asm_32 :
7272 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
7273 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7274 rGPR:$Rm, pred:$p)>;
7277 // VST1 single-lane pseudo-instructions. These need special handling for
7278 // the lane index that an InstAlias can't handle, so we use these instead.
7279 def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
7280 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7282 def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
7283 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7285 def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
7286 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7289 def VST1LNdWB_fixed_Asm_8 :
7290 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
7291 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7293 def VST1LNdWB_fixed_Asm_16 :
7294 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
7295 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7297 def VST1LNdWB_fixed_Asm_32 :
7298 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
7299 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7301 def VST1LNdWB_register_Asm_8 :
7302 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
7303 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
7304 rGPR:$Rm, pred:$p)>;
7305 def VST1LNdWB_register_Asm_16 :
7306 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
7307 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
7308 rGPR:$Rm, pred:$p)>;
7309 def VST1LNdWB_register_Asm_32 :
7310 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
7311 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
7312 rGPR:$Rm, pred:$p)>;
7314 // VLD2 single-lane pseudo-instructions. These need special handling for
7315 // the lane index that an InstAlias can't handle, so we use these instead.
7316 def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
7317 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7319 def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
7320 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7322 def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
7323 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
7324 def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
7325 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7327 def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
7328 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7331 def VLD2LNdWB_fixed_Asm_8 :
7332 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
7333 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7335 def VLD2LNdWB_fixed_Asm_16 :
7336 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
7337 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7339 def VLD2LNdWB_fixed_Asm_32 :
7340 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
7341 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7343 def VLD2LNqWB_fixed_Asm_16 :
7344 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
7345 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7347 def VLD2LNqWB_fixed_Asm_32 :
7348 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
7349 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7351 def VLD2LNdWB_register_Asm_8 :
7352 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
7353 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7354 rGPR:$Rm, pred:$p)>;
7355 def VLD2LNdWB_register_Asm_16 :
7356 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
7357 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7358 rGPR:$Rm, pred:$p)>;
7359 def VLD2LNdWB_register_Asm_32 :
7360 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
7361 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7362 rGPR:$Rm, pred:$p)>;
7363 def VLD2LNqWB_register_Asm_16 :
7364 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
7365 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7366 rGPR:$Rm, pred:$p)>;
7367 def VLD2LNqWB_register_Asm_32 :
7368 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
7369 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7370 rGPR:$Rm, pred:$p)>;
7373 // VST2 single-lane pseudo-instructions. These need special handling for
7374 // the lane index that an InstAlias can't handle, so we use these instead.
7375 def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
7376 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7378 def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
7379 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7381 def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
7382 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7384 def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
7385 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7387 def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
7388 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7391 def VST2LNdWB_fixed_Asm_8 :
7392 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
7393 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7395 def VST2LNdWB_fixed_Asm_16 :
7396 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
7397 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7399 def VST2LNdWB_fixed_Asm_32 :
7400 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
7401 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7403 def VST2LNqWB_fixed_Asm_16 :
7404 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
7405 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7407 def VST2LNqWB_fixed_Asm_32 :
7408 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
7409 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7411 def VST2LNdWB_register_Asm_8 :
7412 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
7413 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7414 rGPR:$Rm, pred:$p)>;
7415 def VST2LNdWB_register_Asm_16 :
7416 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
7417 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7418 rGPR:$Rm, pred:$p)>;
7419 def VST2LNdWB_register_Asm_32 :
7420 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
7421 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7422 rGPR:$Rm, pred:$p)>;
7423 def VST2LNqWB_register_Asm_16 :
7424 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
7425 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7426 rGPR:$Rm, pred:$p)>;
7427 def VST2LNqWB_register_Asm_32 :
7428 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
7429 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7430 rGPR:$Rm, pred:$p)>;
7432 // VLD3 all-lanes pseudo-instructions. These need special handling for
7433 // the lane index that an InstAlias can't handle, so we use these instead.
7434 def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7435 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7437 def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7438 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7440 def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7441 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7443 def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7444 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7446 def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7447 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7449 def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7450 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7453 def VLD3DUPdWB_fixed_Asm_8 :
7454 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7455 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7457 def VLD3DUPdWB_fixed_Asm_16 :
7458 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7459 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7461 def VLD3DUPdWB_fixed_Asm_32 :
7462 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7463 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7465 def VLD3DUPqWB_fixed_Asm_8 :
7466 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7467 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7469 def VLD3DUPqWB_fixed_Asm_16 :
7470 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7471 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7473 def VLD3DUPqWB_fixed_Asm_32 :
7474 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7475 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7477 def VLD3DUPdWB_register_Asm_8 :
7478 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7479 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7480 rGPR:$Rm, pred:$p)>;
7481 def VLD3DUPdWB_register_Asm_16 :
7482 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7483 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7484 rGPR:$Rm, pred:$p)>;
7485 def VLD3DUPdWB_register_Asm_32 :
7486 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7487 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7488 rGPR:$Rm, pred:$p)>;
7489 def VLD3DUPqWB_register_Asm_8 :
7490 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7491 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7492 rGPR:$Rm, pred:$p)>;
7493 def VLD3DUPqWB_register_Asm_16 :
7494 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7495 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7496 rGPR:$Rm, pred:$p)>;
7497 def VLD3DUPqWB_register_Asm_32 :
7498 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7499 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7500 rGPR:$Rm, pred:$p)>;
7503 // VLD3 single-lane pseudo-instructions. These need special handling for
7504 // the lane index that an InstAlias can't handle, so we use these instead.
7505 def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7506 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7508 def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7509 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7511 def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7512 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7514 def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7515 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7517 def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7518 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7521 def VLD3LNdWB_fixed_Asm_8 :
7522 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7523 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7525 def VLD3LNdWB_fixed_Asm_16 :
7526 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7527 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7529 def VLD3LNdWB_fixed_Asm_32 :
7530 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7531 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7533 def VLD3LNqWB_fixed_Asm_16 :
7534 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7535 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7537 def VLD3LNqWB_fixed_Asm_32 :
7538 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7539 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7541 def VLD3LNdWB_register_Asm_8 :
7542 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7543 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7544 rGPR:$Rm, pred:$p)>;
7545 def VLD3LNdWB_register_Asm_16 :
7546 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7547 (ins VecListThreeDHWordIndexed:$list,
7548 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7549 def VLD3LNdWB_register_Asm_32 :
7550 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7551 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7552 rGPR:$Rm, pred:$p)>;
7553 def VLD3LNqWB_register_Asm_16 :
7554 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7555 (ins VecListThreeQHWordIndexed:$list,
7556 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7557 def VLD3LNqWB_register_Asm_32 :
7558 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7559 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7560 rGPR:$Rm, pred:$p)>;
7562 // VLD3 multiple structure pseudo-instructions. These need special handling for
7563 // the vector operands that the normal instructions don't yet model.
7564 // FIXME: Remove these when the register classes and instructions are updated.
7565 def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7566 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7567 def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7568 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7569 def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7570 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7571 def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7572 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7573 def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7574 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7575 def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7576 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7578 def VLD3dWB_fixed_Asm_8 :
7579 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7580 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7581 def VLD3dWB_fixed_Asm_16 :
7582 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7583 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7584 def VLD3dWB_fixed_Asm_32 :
7585 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7586 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7587 def VLD3qWB_fixed_Asm_8 :
7588 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7589 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7590 def VLD3qWB_fixed_Asm_16 :
7591 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7592 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7593 def VLD3qWB_fixed_Asm_32 :
7594 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7595 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7596 def VLD3dWB_register_Asm_8 :
7597 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7598 (ins VecListThreeD:$list, addrmode6align64:$addr,
7599 rGPR:$Rm, pred:$p)>;
7600 def VLD3dWB_register_Asm_16 :
7601 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7602 (ins VecListThreeD:$list, addrmode6align64:$addr,
7603 rGPR:$Rm, pred:$p)>;
7604 def VLD3dWB_register_Asm_32 :
7605 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7606 (ins VecListThreeD:$list, addrmode6align64:$addr,
7607 rGPR:$Rm, pred:$p)>;
7608 def VLD3qWB_register_Asm_8 :
7609 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7610 (ins VecListThreeQ:$list, addrmode6align64:$addr,
7611 rGPR:$Rm, pred:$p)>;
7612 def VLD3qWB_register_Asm_16 :
7613 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7614 (ins VecListThreeQ:$list, addrmode6align64:$addr,
7615 rGPR:$Rm, pred:$p)>;
7616 def VLD3qWB_register_Asm_32 :
7617 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7618 (ins VecListThreeQ:$list, addrmode6align64:$addr,
7619 rGPR:$Rm, pred:$p)>;
7621 // VST3 single-lane pseudo-instructions. These need special handling for
7622 // the lane index that an InstAlias can't handle, so we use these instead.
7623 def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
7624 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7626 def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
7627 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7629 def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
7630 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7632 def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
7633 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7635 def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
7636 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7639 def VST3LNdWB_fixed_Asm_8 :
7640 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
7641 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7643 def VST3LNdWB_fixed_Asm_16 :
7644 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
7645 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7647 def VST3LNdWB_fixed_Asm_32 :
7648 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
7649 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7651 def VST3LNqWB_fixed_Asm_16 :
7652 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
7653 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7655 def VST3LNqWB_fixed_Asm_32 :
7656 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
7657 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7659 def VST3LNdWB_register_Asm_8 :
7660 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
7661 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7662 rGPR:$Rm, pred:$p)>;
7663 def VST3LNdWB_register_Asm_16 :
7664 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
7665 (ins VecListThreeDHWordIndexed:$list,
7666 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7667 def VST3LNdWB_register_Asm_32 :
7668 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
7669 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7670 rGPR:$Rm, pred:$p)>;
7671 def VST3LNqWB_register_Asm_16 :
7672 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
7673 (ins VecListThreeQHWordIndexed:$list,
7674 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7675 def VST3LNqWB_register_Asm_32 :
7676 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
7677 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7678 rGPR:$Rm, pred:$p)>;
7681 // VST3 multiple structure pseudo-instructions. These need special handling for
7682 // the vector operands that the normal instructions don't yet model.
7683 // FIXME: Remove these when the register classes and instructions are updated.
7684 def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
7685 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7686 def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
7687 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7688 def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
7689 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7690 def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
7691 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7692 def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
7693 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7694 def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
7695 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7697 def VST3dWB_fixed_Asm_8 :
7698 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
7699 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7700 def VST3dWB_fixed_Asm_16 :
7701 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
7702 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7703 def VST3dWB_fixed_Asm_32 :
7704 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
7705 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7706 def VST3qWB_fixed_Asm_8 :
7707 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
7708 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7709 def VST3qWB_fixed_Asm_16 :
7710 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
7711 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7712 def VST3qWB_fixed_Asm_32 :
7713 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
7714 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7715 def VST3dWB_register_Asm_8 :
7716 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
7717 (ins VecListThreeD:$list, addrmode6align64:$addr,
7718 rGPR:$Rm, pred:$p)>;
7719 def VST3dWB_register_Asm_16 :
7720 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
7721 (ins VecListThreeD:$list, addrmode6align64:$addr,
7722 rGPR:$Rm, pred:$p)>;
7723 def VST3dWB_register_Asm_32 :
7724 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
7725 (ins VecListThreeD:$list, addrmode6align64:$addr,
7726 rGPR:$Rm, pred:$p)>;
7727 def VST3qWB_register_Asm_8 :
7728 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
7729 (ins VecListThreeQ:$list, addrmode6align64:$addr,
7730 rGPR:$Rm, pred:$p)>;
7731 def VST3qWB_register_Asm_16 :
7732 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
7733 (ins VecListThreeQ:$list, addrmode6align64:$addr,
7734 rGPR:$Rm, pred:$p)>;
7735 def VST3qWB_register_Asm_32 :
7736 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
7737 (ins VecListThreeQ:$list, addrmode6align64:$addr,
7738 rGPR:$Rm, pred:$p)>;
7740 // VLD4 all-lanes pseudo-instructions. These need special handling for
7741 // the lane index that an InstAlias can't handle, so we use these instead.
7742 def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
7743 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
7745 def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
7746 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
7748 def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
7749 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
7751 def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
7752 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
7754 def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
7755 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
7757 def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
7758 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
7761 def VLD4DUPdWB_fixed_Asm_8 :
7762 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
7763 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
7765 def VLD4DUPdWB_fixed_Asm_16 :
7766 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
7767 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
7769 def VLD4DUPdWB_fixed_Asm_32 :
7770 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
7771 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
7773 def VLD4DUPqWB_fixed_Asm_8 :
7774 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
7775 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
7777 def VLD4DUPqWB_fixed_Asm_16 :
7778 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
7779 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
7781 def VLD4DUPqWB_fixed_Asm_32 :
7782 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
7783 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
7785 def VLD4DUPdWB_register_Asm_8 :
7786 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
7787 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
7788 rGPR:$Rm, pred:$p)>;
7789 def VLD4DUPdWB_register_Asm_16 :
7790 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
7791 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
7792 rGPR:$Rm, pred:$p)>;
7793 def VLD4DUPdWB_register_Asm_32 :
7794 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
7795 (ins VecListFourDAllLanes:$list,
7796 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
7797 def VLD4DUPqWB_register_Asm_8 :
7798 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
7799 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
7800 rGPR:$Rm, pred:$p)>;
7801 def VLD4DUPqWB_register_Asm_16 :
7802 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
7803 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
7804 rGPR:$Rm, pred:$p)>;
7805 def VLD4DUPqWB_register_Asm_32 :
7806 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
7807 (ins VecListFourQAllLanes:$list,
7808 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
7811 // VLD4 single-lane pseudo-instructions. These need special handling for
7812 // the lane index that an InstAlias can't handle, so we use these instead.
7813 def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
7814 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
7816 def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
7817 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
7819 def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
7820 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
7822 def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
7823 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
7825 def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
7826 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
7829 def VLD4LNdWB_fixed_Asm_8 :
7830 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
7831 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
7833 def VLD4LNdWB_fixed_Asm_16 :
7834 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
7835 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
7837 def VLD4LNdWB_fixed_Asm_32 :
7838 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
7839 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
7841 def VLD4LNqWB_fixed_Asm_16 :
7842 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
7843 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
7845 def VLD4LNqWB_fixed_Asm_32 :
7846 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
7847 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
7849 def VLD4LNdWB_register_Asm_8 :
7850 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
7851 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
7852 rGPR:$Rm, pred:$p)>;
7853 def VLD4LNdWB_register_Asm_16 :
7854 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
7855 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
7856 rGPR:$Rm, pred:$p)>;
7857 def VLD4LNdWB_register_Asm_32 :
7858 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
7859 (ins VecListFourDWordIndexed:$list,
7860 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
7861 def VLD4LNqWB_register_Asm_16 :
7862 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
7863 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
7864 rGPR:$Rm, pred:$p)>;
7865 def VLD4LNqWB_register_Asm_32 :
7866 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
7867 (ins VecListFourQWordIndexed:$list,
7868 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
7872 // VLD4 multiple structure pseudo-instructions. These need special handling for
7873 // the vector operands that the normal instructions don't yet model.
7874 // FIXME: Remove these when the register classes and instructions are updated.
7875 def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
7876 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7878 def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
7879 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7881 def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
7882 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7884 def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
7885 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7887 def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
7888 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7890 def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
7891 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7894 def VLD4dWB_fixed_Asm_8 :
7895 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
7896 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7898 def VLD4dWB_fixed_Asm_16 :
7899 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
7900 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7902 def VLD4dWB_fixed_Asm_32 :
7903 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
7904 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7906 def VLD4qWB_fixed_Asm_8 :
7907 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
7908 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7910 def VLD4qWB_fixed_Asm_16 :
7911 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
7912 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7914 def VLD4qWB_fixed_Asm_32 :
7915 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
7916 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7918 def VLD4dWB_register_Asm_8 :
7919 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
7920 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7921 rGPR:$Rm, pred:$p)>;
7922 def VLD4dWB_register_Asm_16 :
7923 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
7924 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7925 rGPR:$Rm, pred:$p)>;
7926 def VLD4dWB_register_Asm_32 :
7927 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
7928 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7929 rGPR:$Rm, pred:$p)>;
7930 def VLD4qWB_register_Asm_8 :
7931 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
7932 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7933 rGPR:$Rm, pred:$p)>;
7934 def VLD4qWB_register_Asm_16 :
7935 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
7936 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7937 rGPR:$Rm, pred:$p)>;
7938 def VLD4qWB_register_Asm_32 :
7939 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
7940 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7941 rGPR:$Rm, pred:$p)>;
7943 // VST4 single-lane pseudo-instructions. These need special handling for
7944 // the lane index that an InstAlias can't handle, so we use these instead.
7945 def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
7946 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
7948 def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
7949 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
7951 def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
7952 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
7954 def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
7955 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
7957 def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
7958 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
7961 def VST4LNdWB_fixed_Asm_8 :
7962 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
7963 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
7965 def VST4LNdWB_fixed_Asm_16 :
7966 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
7967 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
7969 def VST4LNdWB_fixed_Asm_32 :
7970 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
7971 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
7973 def VST4LNqWB_fixed_Asm_16 :
7974 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
7975 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
7977 def VST4LNqWB_fixed_Asm_32 :
7978 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
7979 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
7981 def VST4LNdWB_register_Asm_8 :
7982 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
7983 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
7984 rGPR:$Rm, pred:$p)>;
7985 def VST4LNdWB_register_Asm_16 :
7986 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
7987 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
7988 rGPR:$Rm, pred:$p)>;
7989 def VST4LNdWB_register_Asm_32 :
7990 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
7991 (ins VecListFourDWordIndexed:$list,
7992 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
7993 def VST4LNqWB_register_Asm_16 :
7994 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
7995 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
7996 rGPR:$Rm, pred:$p)>;
7997 def VST4LNqWB_register_Asm_32 :
7998 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
7999 (ins VecListFourQWordIndexed:$list,
8000 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8003 // VST4 multiple structure pseudo-instructions. These need special handling for
8004 // the vector operands that the normal instructions don't yet model.
8005 // FIXME: Remove these when the register classes and instructions are updated.
8006 def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8007 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8009 def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8010 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8012 def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8013 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8015 def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8016 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8018 def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8019 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8021 def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8022 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8025 def VST4dWB_fixed_Asm_8 :
8026 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8027 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8029 def VST4dWB_fixed_Asm_16 :
8030 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8031 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8033 def VST4dWB_fixed_Asm_32 :
8034 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8035 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8037 def VST4qWB_fixed_Asm_8 :
8038 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8039 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8041 def VST4qWB_fixed_Asm_16 :
8042 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8043 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8045 def VST4qWB_fixed_Asm_32 :
8046 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8047 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8049 def VST4dWB_register_Asm_8 :
8050 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8051 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8052 rGPR:$Rm, pred:$p)>;
8053 def VST4dWB_register_Asm_16 :
8054 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8055 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8056 rGPR:$Rm, pred:$p)>;
8057 def VST4dWB_register_Asm_32 :
8058 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8059 (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8060 rGPR:$Rm, pred:$p)>;
8061 def VST4qWB_register_Asm_8 :
8062 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8063 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8064 rGPR:$Rm, pred:$p)>;
8065 def VST4qWB_register_Asm_16 :
8066 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8067 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8068 rGPR:$Rm, pred:$p)>;
8069 def VST4qWB_register_Asm_32 :
8070 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8071 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8072 rGPR:$Rm, pred:$p)>;
8074 // VMOV/VMVN takes an optional datatype suffix
8075 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8076 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
8077 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8078 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
8080 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8081 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
8082 defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8083 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
8085 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8086 // D-register versions.
8087 def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
8088 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8089 def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
8090 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8091 def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
8092 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8093 def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
8094 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8095 def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
8096 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8097 def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
8098 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8099 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
8100 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8101 let Predicates = [HasNEON, HasFullFP16] in
8102 def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
8103 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8104 // Q-register versions.
8105 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
8106 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8107 def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
8108 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8109 def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
8110 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8111 def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
8112 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8113 def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
8114 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8115 def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
8116 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8117 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
8118 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8119 let Predicates = [HasNEON, HasFullFP16] in
8120 def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
8121 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8123 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8124 // D-register versions.
8125 def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
8126 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8127 def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
8128 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8129 def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
8130 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8131 def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
8132 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8133 def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
8134 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8135 def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
8136 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8137 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
8138 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8139 let Predicates = [HasNEON, HasFullFP16] in
8140 def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
8141 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8142 // Q-register versions.
8143 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
8144 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8145 def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
8146 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8147 def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
8148 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8149 def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
8150 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8151 def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
8152 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8153 def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
8154 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8155 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
8156 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8157 let Predicates = [HasNEON, HasFullFP16] in
8158 def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
8159 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8161 // VSWP allows, but does not require, a type suffix.
8162 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8163 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
8164 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8165 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
8167 // VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
8168 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8169 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8170 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8171 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8172 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8173 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8174 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8175 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8176 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8177 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8178 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8179 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8181 // "vmov Rd, #-imm" can be handled via "vmvn".
8182 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8183 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8184 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
8185 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8186 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8187 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8188 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
8189 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
8191 // 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
8192 // these should restrict to just the Q register variants, but the register
8193 // classes are enough to match correctly regardless, so we keep it simple
8194 // and just use MnemonicAlias.
8195 def : NEONMnemonicAlias<"vbicq", "vbic">;
8196 def : NEONMnemonicAlias<"vandq", "vand">;
8197 def : NEONMnemonicAlias<"veorq", "veor">;
8198 def : NEONMnemonicAlias<"vorrq", "vorr">;
8200 def : NEONMnemonicAlias<"vmovq", "vmov">;
8201 def : NEONMnemonicAlias<"vmvnq", "vmvn">;
8202 // Explicit versions for floating point so that the FPImm variants get
8203 // handled early. The parser gets confused otherwise.
8204 def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
8205 def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
8207 def : NEONMnemonicAlias<"vaddq", "vadd">;
8208 def : NEONMnemonicAlias<"vsubq", "vsub">;
8210 def : NEONMnemonicAlias<"vminq", "vmin">;
8211 def : NEONMnemonicAlias<"vmaxq", "vmax">;
8213 def : NEONMnemonicAlias<"vmulq", "vmul">;
8215 def : NEONMnemonicAlias<"vabsq", "vabs">;
8217 def : NEONMnemonicAlias<"vshlq", "vshl">;
8218 def : NEONMnemonicAlias<"vshrq", "vshr">;
8220 def : NEONMnemonicAlias<"vcvtq", "vcvt">;
8222 def : NEONMnemonicAlias<"vcleq", "vcle">;
8223 def : NEONMnemonicAlias<"vceqq", "vceq">;
8225 def : NEONMnemonicAlias<"vzipq", "vzip">;
8226 def : NEONMnemonicAlias<"vswpq", "vswp">;
8228 def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
8229 def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
8232 // Alias for loading floating point immediates that aren't representable
8233 // using the vmov.f32 encoding but the bitpattern is representable using
8234 // the .i32 encoding.
8235 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
8236 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
8237 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
8238 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;