contrib/llvm/tools/clang/include/clang/Basic/arm_neon_incl.td

   1 //===--- arm_neon_incl.td - ARM NEON compiler interface ------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 //  This file defines data structures shared by arm_neon.td and arm_fp16.td.
  11 //  It constains base operation classes, operations, instructions, instruction
  12 //  modifiers, etc.
  13 //
  14 //===----------------------------------------------------------------------===//
  15 //
  16 // Each intrinsic is a subclass of the Inst class. An intrinsic can either
  17 // generate a __builtin_* call or it can expand to a set of generic operations.
  18 //
  19 // The operations are subclasses of Operation providing a list of DAGs, the
  20 // last of which is the return value. The available DAG nodes are documented
  21 // below.
  22 //
  23 //===----------------------------------------------------------------------===//
  24
  25 // The base Operation class. All operations must subclass this.
  26 class Operation<list<dag> ops=[]> {
  27   list<dag> Ops = ops;
  28   bit Unavailable = 0;
  29 }
  30 // An operation that only contains a single DAG.
  31 class Op<dag op> : Operation<[op]>;
  32 // A shorter version of Operation - takes a list of DAGs. The last of these will
  33 // be the return value.
  34 class LOp<list<dag> ops> : Operation<ops>;
  35
  36 // These defs and classes are used internally to implement the SetTheory
  37 // expansion and should be ignored.
  38 foreach Index = 0-63 in
  39   def sv##Index;
  40 class MaskExpand;
  41
  42 //===----------------------------------------------------------------------===//
  43 // Available operations
  44 //===----------------------------------------------------------------------===//
  45
  46 // DAG arguments can either be operations (documented below) or variables.
  47 // Variables are prefixed with '$'. There are variables for each input argument,
  48 // with the name $pN, where N starts at zero. So the zero'th argument will be
  49 // $p0, the first $p1 etc.
  50
  51 // op - Binary or unary operator, depending on the number of arguments. The
  52 //      operator itself is just treated as a raw string and is not checked.
  53 // example: (op "+", $p0, $p1) -> "__p0 + __p1".
  54 //          (op "-", $p0)      -> "-__p0"
  55 def op;
  56 // call - Invoke another intrinsic. The input types are type checked and
  57 //        disambiguated. If there is no intrinsic defined that takes
  58 //        the given types (or if there is a type ambiguity) an error is
  59 //        generated at tblgen time. The name of the intrinsic is the raw
  60 //        name as given to the Inst class (not mangled).
  61 // example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)"
  62 //            (assuming $p0 has type int16x8_t).
  63 def call;
  64 // cast - Perform a cast to a different type. This gets emitted as a static
  65 //        C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use
  66 //        "bitcast".
  67 //
  68 //        The syntax is (cast MOD* VAL). The last argument is the value to
  69 //        cast, preceded by a sequence of type modifiers. The target type
  70 //        starts off as the type of VAL, and is modified by MOD in sequence.
  71 //        The available modifiers are:
  72 //          - $X  - Take the type of parameter/variable X. For example:
  73 //                  (cast $p0, $p1) would cast $p1 to the type of $p0.
  74 //          - "R" - The type of the return type.
  75 //          - A typedef string - A NEON or stdint.h type that is then parsed.
  76 //                               for example: (cast "uint32x4_t", $p0).
  77 //          - "U" - Make the type unsigned.
  78 //          - "S" - Make the type signed.
  79 //          - "H" - Halve the number of lanes in the type.
  80 //          - "D" - Double the number of lanes in the type.
  81 //          - "8" - Convert type to an equivalent vector of 8-bit signed
  82 //                  integers.
  83 // example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return
  84 //           value is of type "int32x4_t".
  85 //          (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0
  86 //           has type float64x1_t or any other vector type of 64 bits).
  87 //          (cast "int32_t", $p2) -> "(int32_t)__p2"
  88 def cast;
  89 // bitcast - Same as "cast", except a reinterpret-cast is produced:
  90 //             (bitcast "T", $p0) -> "*(T*)&__p0".
  91 //           The VAL argument is saved to a temporary so it can be used
  92 //           as an l-value.
  93 def bitcast;
  94 // dup - Take a scalar argument and create a vector by duplicating it into
  95 //       all lanes. The type of the vector is the base type of the intrinsic.
  96 // example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type
  97 //          is uint32x2_t).
  98 def dup;
  99 // dup_typed - Take a vector and a scalar argument, and create a new vector of
 100 //             the same type by duplicating the scalar value into all lanes.
 101 // example: (dup_typed $p1, $p2) -> "(float16x4_t) {__p2, __p2, __p2, __p2}"
 102 //          (assuming __p1 is float16x4_t, and __p2 is a compatible scalar).
 103 def dup_typed;
 104 // splat - Take a vector and a lane index, and return a vector of the same type
 105 //         containing repeated instances of the source vector at the lane index.
 106 // example: (splat $p0, $p1) ->
 107 //            "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)"
 108 //          (assuming __p0 has four elements).
 109 def splat;
 110 // save_temp - Create a temporary (local) variable. The variable takes a name
 111 //             based on the zero'th parameter and can be referenced using
 112 //             using that name in subsequent DAGs in the same
 113 //             operation. The scope of a temp is the operation. If a variable
 114 //             with the given name already exists, an error will be given at
 115 //             tblgen time.
 116 // example: [(save_temp $var, (call "foo", $p0)),
 117 //           (op "+", $var, $p1)] ->
 118 //              "int32x2_t __var = foo(__p0); return __var + __p1;"
 119 def save_temp;
 120 // name_replace - Return the name of the current intrinsic with the first
 121 //                argument replaced by the second argument. Raises an error if
 122 //                the first argument does not exist in the intrinsic name.
 123 // example: (call (name_replace "_high_", "_"), $p0) (to call the non-high
 124 //            version of this intrinsic).
 125 def name_replace;
 126 // literal - Create a literal piece of code. The code is treated as a raw
 127 //           string, and must be given a type. The type is a stdint.h or
 128 //           NEON intrinsic type as given to (cast).
 129 // example: (literal "int32_t", "0")
 130 def literal;
 131 // shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK).
 132 //           The MASK argument is a set of elements. The elements are generated
 133 //           from the two special defs "mask0" and "mask1". "mask0" expands to
 134 //           the lane indices in sequence for ARG0, and "mask1" expands to
 135 //           the lane indices in sequence for ARG1. They can be used as-is, e.g.
 136 //
 137 //             (shuffle $p0, $p1, mask0) -> $p0
 138 //             (shuffle $p0, $p1, mask1) -> $p1
 139 //
 140 //           or, more usefully, they can be manipulated using the SetTheory
 141 //           operators plus some extra operators defined in the NEON emitter.
 142 //           The operators are described below.
 143 // example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) ->
 144 //            A concatenation of the high halves of the input vectors.
 145 def shuffle;
 146
 147 // add, interleave, decimate: These set operators are vanilla SetTheory
 148 // operators and take their normal definition.
 149 def add;
 150 def interleave;
 151 def decimate;
 152 // rotl - Rotate set left by a number of elements.
 153 // example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2]
 154 def rotl;
 155 // rotl - Rotate set right by a number of elements.
 156 // example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3]
 157 def rotr;
 158 // highhalf - Take only the high half of the input.
 159 // example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements)
 160 def highhalf;
 161 // highhalf - Take only the low half of the input.
 162 // example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements)
 163 def lowhalf;
 164 // rev - Perform a variable-width reversal of the elements. The zero'th argument
 165 //       is a width in bits to reverse. The lanes this maps to is determined
 166 //       based on the element width of the underlying type.
 167 // example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements)
 168 // example: (rev 32, mask0) -> [1, 0, 3, 2]             (if 16-bit elements)
 169 def rev;
 170 // mask0 - The initial sequence of lanes for shuffle ARG0
 171 def mask0 : MaskExpand;
 172 // mask0 - The initial sequence of lanes for shuffle ARG1
 173 def mask1 : MaskExpand;
 174
 175 def OP_NONE  : Operation;
 176 def OP_UNAVAILABLE : Operation {
 177   let Unavailable = 1;
 178 }
 179
 180 //===----------------------------------------------------------------------===//
 181 // Instruction definitions
 182 //===----------------------------------------------------------------------===//
 183
 184 // Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
 185 // a sequence of typespecs.
 186 //
 187 // The name is the base name of the intrinsic, for example "vget_lane". This is
 188 // then mangled by the tblgen backend to add type information ("vget_lane_s16").
 189 //
 190 // A typespec is a sequence of uppercase characters (modifiers) followed by one
 191 // lowercase character. A typespec encodes a particular "base type" of the
 192 // intrinsic.
 193 //
 194 // An example typespec is "Qs" - quad-size short - uint16x8_t. The available
 195 // typespec codes are given below.
 196 //
 197 // The string given to an Inst class is a sequence of typespecs. The intrinsic
 198 // is instantiated for every typespec in the sequence. For example "sdQsQd".
 199 //
 200 // The prototype is a string that defines the return type of the intrinsic
 201 // and the type of each argument. The return type and every argument gets a
 202 // "modifier" that can change in some way the "base type" of the intrinsic.
 203 //
 204 // The modifier 'd' means "default" and does not modify the base type in any
 205 // way. The available modifiers are given below.
 206 //
 207 // Typespecs
 208 // ---------
 209 // c: char
 210 // s: short
 211 // i: int
 212 // l: long
 213 // k: 128-bit long
 214 // f: float
 215 // h: half-float
 216 // d: double
 217 //
 218 // Typespec modifiers
 219 // ------------------
 220 // S: scalar, only used for function mangling.
 221 // U: unsigned
 222 // Q: 128b
 223 // H: 128b without mangling 'q'
 224 // P: polynomial
 225 //
 226 // Prototype modifiers
 227 // -------------------
 228 // prototype: return (arg, arg, ...)
 229 //
 230 // v: void
 231 // t: best-fit integer (int/poly args)
 232 // x: signed integer   (int/float args)
 233 // u: unsigned integer (int/float args)
 234 // f: float (int args)
 235 // F: double (int args)
 236 // H: half (int args)
 237 // 0: half (int args), ignore 'Q' size modifier.
 238 // 1: half (int args), force 'Q' size modifier.
 239 // d: default
 240 // g: default, ignore 'Q' size modifier.
 241 // j: default, force 'Q' size modifier.
 242 // w: double width elements, same num elts
 243 // n: double width elements, half num elts
 244 // h: half width elements, double num elts
 245 // q: half width elements, quad num elts
 246 // e: half width elements, double num elts, unsigned
 247 // m: half width elements, same num elts
 248 // i: constant int
 249 // l: constant uint64
 250 // s: scalar of element type
 251 // z: scalar of half width element type, signed
 252 // r: scalar of double width element type, signed
 253 // a: scalar of element type (splat to vector type)
 254 // b: scalar of unsigned integer/long type (int/float args)
 255 // $: scalar of signed integer/long type (int/float args)
 256 // y: scalar of float
 257 // o: scalar of double
 258 // k: default elt width, double num elts
 259 // 2,3,4: array of default vectors
 260 // B,C,D: array of default elts, force 'Q' size modifier.
 261 // p: pointer type
 262 // c: const pointer type
 263 // 7: vector of 8-bit elements, ignore 'Q' size modifier
 264 // 8: vector of 8-bit elements, same width as default type
 265 // 9: vector of 8-bit elements, force 'Q' size modifier
 266
 267 // Every intrinsic subclasses Inst.
 268 class Inst <string n, string p, string t, Operation o> {
 269   string Name = n;
 270   string Prototype = p;
 271   string Types = t;
 272   string ArchGuard = "";
 273
 274   Operation Operation = o;
 275   bit CartesianProductOfTypes = 0;
 276   bit BigEndianSafe = 0;
 277   bit isShift = 0;
 278   bit isScalarShift = 0;
 279   bit isScalarNarrowShift = 0;
 280   bit isVCVT_N = 0;
 281   // For immediate checks: the immediate will be assumed to specify the lane of
 282   // a Q register. Only used for intrinsics which end up calling polymorphic
 283   // builtins.
 284   bit isLaneQ = 0;
 285
 286   // Certain intrinsics have different names than their representative
 287   // instructions. This field allows us to handle this correctly when we
 288   // are generating tests.
 289   string InstName = "";
 290
 291   // Certain intrinsics even though they are not a WOpInst or LOpInst,
 292   // generate a WOpInst/LOpInst instruction (see below for definition
 293   // of a WOpInst/LOpInst). For testing purposes we need to know
 294   // this. Ex: vset_lane which outputs vmov instructions.
 295   bit isHiddenWInst = 0;
 296   bit isHiddenLInst = 0;
 297 }
 298
 299 // The following instruction classes are implemented via builtins.
 300 // These declarations are used to generate Builtins.def:
 301 //
 302 // SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
 303 // IInst: Instruction with generic integer suffix (e.g., "i8")
 304 // WInst: Instruction with only bit size suffix (e.g., "8")
 305 class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
 306 class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
 307 class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
 308
 309 // The following instruction classes are implemented via operators
 310 // instead of builtins. As such these declarations are only used for
 311 // the purpose of generating tests.
 312 //
 313 // SOpInst:       Instruction with signed/unsigned suffix (e.g., "s8",
 314 //                "u8", "p8").
 315 // IOpInst:       Instruction with generic integer suffix (e.g., "i8").
 316 // WOpInst:       Instruction with bit size only suffix (e.g., "8").
 317 // LOpInst:       Logical instruction with no bit size suffix.
 318 // NoTestOpInst:  Intrinsic that has no corresponding instruction.
 319 class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
 320 class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
 321 class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
 322 class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}
 323 class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {}