1 //===--- arm_neon_incl.td - ARM NEON compiler interface ------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines data structures shared by arm_neon.td and arm_fp16.td.
11 // It constains base operation classes, operations, instructions, instruction
14 //===----------------------------------------------------------------------===//
16 // Each intrinsic is a subclass of the Inst class. An intrinsic can either
17 // generate a __builtin_* call or it can expand to a set of generic operations.
19 // The operations are subclasses of Operation providing a list of DAGs, the
20 // last of which is the return value. The available DAG nodes are documented
23 //===----------------------------------------------------------------------===//
25 // The base Operation class. All operations must subclass this.
26 class Operation<list<dag> ops=[]> {
30 // An operation that only contains a single DAG.
31 class Op<dag op> : Operation<[op]>;
32 // A shorter version of Operation - takes a list of DAGs. The last of these will
33 // be the return value.
34 class LOp<list<dag> ops> : Operation<ops>;
36 // These defs and classes are used internally to implement the SetTheory
37 // expansion and should be ignored.
38 foreach Index = 0-63 in
42 //===----------------------------------------------------------------------===//
43 // Available operations
44 //===----------------------------------------------------------------------===//
46 // DAG arguments can either be operations (documented below) or variables.
47 // Variables are prefixed with '$'. There are variables for each input argument,
48 // with the name $pN, where N starts at zero. So the zero'th argument will be
49 // $p0, the first $p1 etc.
51 // op - Binary or unary operator, depending on the number of arguments. The
52 // operator itself is just treated as a raw string and is not checked.
53 // example: (op "+", $p0, $p1) -> "__p0 + __p1".
54 // (op "-", $p0) -> "-__p0"
56 // call - Invoke another intrinsic. The input types are type checked and
57 // disambiguated. If there is no intrinsic defined that takes
58 // the given types (or if there is a type ambiguity) an error is
59 // generated at tblgen time. The name of the intrinsic is the raw
60 // name as given to the Inst class (not mangled).
61 // example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)"
62 // (assuming $p0 has type int16x8_t).
64 // cast - Perform a cast to a different type. This gets emitted as a static
65 // C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use
68 // The syntax is (cast MOD* VAL). The last argument is the value to
69 // cast, preceded by a sequence of type modifiers. The target type
70 // starts off as the type of VAL, and is modified by MOD in sequence.
71 // The available modifiers are:
72 // - $X - Take the type of parameter/variable X. For example:
73 // (cast $p0, $p1) would cast $p1 to the type of $p0.
74 // - "R" - The type of the return type.
75 // - A typedef string - A NEON or stdint.h type that is then parsed.
76 // for example: (cast "uint32x4_t", $p0).
77 // - "U" - Make the type unsigned.
78 // - "S" - Make the type signed.
79 // - "H" - Halve the number of lanes in the type.
80 // - "D" - Double the number of lanes in the type.
81 // - "8" - Convert type to an equivalent vector of 8-bit signed
83 // example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
84 // value is of type "int32x4_t".
85 // (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
86 // has type float64x1_t or any other vector type of 64 bits).
87 // (cast "int32_t", $p2) -> "(int32_t)__p2"
89 // bitcast - Same as "cast", except a reinterpret-cast is produced:
90 // (bitcast "T", $p0) -> "*(T*)&__p0".
91 // The VAL argument is saved to a temporary so it can be used
94 // dup - Take a scalar argument and create a vector by duplicating it into
95 // all lanes. The type of the vector is the base type of the intrinsic.
96 // example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type
99 // splat - Take a vector and a lane index, and return a vector of the same type
100 // containing repeated instances of the source vector at the lane index.
101 // example: (splat $p0, $p1) ->
102 // "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)"
103 // (assuming __p0 has four elements).
105 // save_temp - Create a temporary (local) variable. The variable takes a name
106 // based on the zero'th parameter and can be referenced using
107 // using that name in subsequent DAGs in the same
108 // operation. The scope of a temp is the operation. If a variable
109 // with the given name already exists, an error will be given at
111 // example: [(save_temp $var, (call "foo", $p0)),
112 // (op "+", $var, $p1)] ->
113 // "int32x2_t __var = foo(__p0); return __var + __p1;"
115 // name_replace - Return the name of the current intrinsic with the first
116 // argument replaced by the second argument. Raises an error if
117 // the first argument does not exist in the intrinsic name.
118 // example: (call (name_replace "_high_", "_"), $p0) (to call the non-high
119 // version of this intrinsic).
121 // literal - Create a literal piece of code. The code is treated as a raw
122 // string, and must be given a type. The type is a stdint.h or
123 // NEON intrinsic type as given to (cast).
124 // example: (literal "int32_t", "0")
126 // shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK).
127 // The MASK argument is a set of elements. The elements are generated
128 // from the two special defs "mask0" and "mask1". "mask0" expands to
129 // the lane indices in sequence for ARG0, and "mask1" expands to
130 // the lane indices in sequence for ARG1. They can be used as-is, e.g.
132 // (shuffle $p0, $p1, mask0) -> $p0
133 // (shuffle $p0, $p1, mask1) -> $p1
135 // or, more usefully, they can be manipulated using the SetTheory
136 // operators plus some extra operators defined in the NEON emitter.
137 // The operators are described below.
138 // example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) ->
139 // A concatenation of the high halves of the input vectors.
142 // add, interleave, decimate: These set operators are vanilla SetTheory
143 // operators and take their normal definition.
147 // rotl - Rotate set left by a number of elements.
148 // example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2]
150 // rotl - Rotate set right by a number of elements.
151 // example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3]
153 // highhalf - Take only the high half of the input.
154 // example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements)
156 // highhalf - Take only the low half of the input.
157 // example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements)
159 // rev - Perform a variable-width reversal of the elements. The zero'th argument
160 // is a width in bits to reverse. The lanes this maps to is determined
161 // based on the element width of the underlying type.
162 // example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements)
163 // example: (rev 32, mask0) -> [1, 0, 3, 2] (if 16-bit elements)
165 // mask0 - The initial sequence of lanes for shuffle ARG0
166 def mask0 : MaskExpand;
167 // mask0 - The initial sequence of lanes for shuffle ARG1
168 def mask1 : MaskExpand;
170 def OP_NONE : Operation;
171 def OP_UNAVAILABLE : Operation {
175 //===----------------------------------------------------------------------===//
176 // Instruction definitions
177 //===----------------------------------------------------------------------===//
179 // Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
180 // a sequence of typespecs.
182 // The name is the base name of the intrinsic, for example "vget_lane". This is
183 // then mangled by the tblgen backend to add type information ("vget_lane_s16").
185 // A typespec is a sequence of uppercase characters (modifiers) followed by one
186 // lowercase character. A typespec encodes a particular "base type" of the
189 // An example typespec is "Qs" - quad-size short - uint16x8_t. The available
190 // typespec codes are given below.
192 // The string given to an Inst class is a sequence of typespecs. The intrinsic
193 // is instantiated for every typespec in the sequence. For example "sdQsQd".
195 // The prototype is a string that defines the return type of the intrinsic
196 // and the type of each argument. The return type and every argument gets a
197 // "modifier" that can change in some way the "base type" of the intrinsic.
199 // The modifier 'd' means "default" and does not modify the base type in any
200 // way. The available modifiers are given below.
213 // Typespec modifiers
214 // ------------------
215 // S: scalar, only used for function mangling.
218 // H: 128b without mangling 'q'
221 // Prototype modifiers
222 // -------------------
223 // prototype: return (arg, arg, ...)
226 // t: best-fit integer (int/poly args)
227 // x: signed integer (int/float args)
228 // u: unsigned integer (int/float args)
229 // f: float (int args)
230 // F: double (int args)
231 // H: half (int args)
233 // g: default, ignore 'Q' size modifier.
234 // j: default, force 'Q' size modifier.
235 // w: double width elements, same num elts
236 // n: double width elements, half num elts
237 // h: half width elements, double num elts
238 // q: half width elements, quad num elts
239 // e: half width elements, double num elts, unsigned
240 // m: half width elements, same num elts
242 // l: constant uint64
243 // s: scalar of element type
244 // z: scalar of half width element type, signed
245 // r: scalar of double width element type, signed
246 // a: scalar of element type (splat to vector type)
247 // b: scalar of unsigned integer/long type (int/float args)
248 // $: scalar of signed integer/long type (int/float args)
249 // y: scalar of float
250 // o: scalar of double
251 // k: default elt width, double num elts
252 // 2,3,4: array of default vectors
253 // B,C,D: array of default elts, force 'Q' size modifier.
255 // c: const pointer type
256 // 7: vector of 8-bit elements, ignore 'Q' size modifier
257 // 8: vector of 8-bit elements, same width as default type
258 // 9: vector of 8-bit elements, force 'Q' size modifier
260 // Every intrinsic subclasses Inst.
261 class Inst <string n, string p, string t, Operation o> {
263 string Prototype = p;
265 string ArchGuard = "";
267 Operation Operation = o;
268 bit CartesianProductOfTypes = 0;
269 bit BigEndianSafe = 0;
271 bit isScalarShift = 0;
272 bit isScalarNarrowShift = 0;
274 // For immediate checks: the immediate will be assumed to specify the lane of
275 // a Q register. Only used for intrinsics which end up calling polymorphic
279 // Certain intrinsics have different names than their representative
280 // instructions. This field allows us to handle this correctly when we
281 // are generating tests.
282 string InstName = "";
284 // Certain intrinsics even though they are not a WOpInst or LOpInst,
285 // generate a WOpInst/LOpInst instruction (see below for definition
286 // of a WOpInst/LOpInst). For testing purposes we need to know
287 // this. Ex: vset_lane which outputs vmov instructions.
288 bit isHiddenWInst = 0;
289 bit isHiddenLInst = 0;
292 // The following instruction classes are implemented via builtins.
293 // These declarations are used to generate Builtins.def:
295 // SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
296 // IInst: Instruction with generic integer suffix (e.g., "i8")
297 // WInst: Instruction with only bit size suffix (e.g., "8")
298 class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
299 class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
300 class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
302 // The following instruction classes are implemented via operators
303 // instead of builtins. As such these declarations are only used for
304 // the purpose of generating tests.
306 // SOpInst: Instruction with signed/unsigned suffix (e.g., "s8",
308 // IOpInst: Instruction with generic integer suffix (e.g., "i8").
309 // WOpInst: Instruction with bit size only suffix (e.g., "8").
310 // LOpInst: Logical instruction with no bit size suffix.
311 // NoTestOpInst: Intrinsic that has no corresponding instruction.
312 class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
313 class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
314 class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
315 class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
316 class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}