1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface. See ARM document DUI0348B.
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors. Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point. A complete set of tests
22 // for Neon intrinsics can be generated by calling the runTests() entry point.
24 //===----------------------------------------------------------------------===//
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/TableGen/Error.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
155 OpScalarQDMulHiLaneQ,
156 OpScalarQRDMulHiLane,
157 OpScalarQRDMulHiLaneQ,
164 ClassI, // generic integer instruction, e.g., "i8" suffix
165 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
166 ClassW, // width-specific instruction, e.g., "8" suffix
167 ClassB, // bitcast arguments with enum argument to specify type
168 ClassL, // Logical instructions which are op instructions
169 // but we need to not emit any suffix for in our
171 ClassNoTest // Instructions which we do not test since they are
172 // not TRUE instructions.
175 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
176 /// builtins. These must be kept in sync with the flags in
177 /// include/clang/Basic/TargetBuiltins.h.
179 class NeonTypeFlags {
201 NeonTypeFlags(unsigned F) : Flags(F) {}
202 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
204 Flags |= UnsignedFlag;
209 uint32_t getFlags() const { return Flags; }
211 } // end anonymous namespace
215 RecordKeeper &Records;
216 StringMap<OpKind> OpMap;
217 DenseMap<Record*, ClassKind> ClassMap;
220 NeonEmitter(RecordKeeper &R) : Records(R) {
221 OpMap["OP_NONE"] = OpNone;
222 OpMap["OP_UNAVAILABLE"] = OpUnavailable;
223 OpMap["OP_ADD"] = OpAdd;
224 OpMap["OP_ADDL"] = OpAddl;
225 OpMap["OP_ADDLHi"] = OpAddlHi;
226 OpMap["OP_ADDW"] = OpAddw;
227 OpMap["OP_ADDWHi"] = OpAddwHi;
228 OpMap["OP_SUB"] = OpSub;
229 OpMap["OP_SUBL"] = OpSubl;
230 OpMap["OP_SUBLHi"] = OpSublHi;
231 OpMap["OP_SUBW"] = OpSubw;
232 OpMap["OP_SUBWHi"] = OpSubwHi;
233 OpMap["OP_MUL"] = OpMul;
234 OpMap["OP_MLA"] = OpMla;
235 OpMap["OP_MLAL"] = OpMlal;
236 OpMap["OP_MULLHi"] = OpMullHi;
237 OpMap["OP_MULLHi_N"] = OpMullHiN;
238 OpMap["OP_MLALHi"] = OpMlalHi;
239 OpMap["OP_MLALHi_N"] = OpMlalHiN;
240 OpMap["OP_MLS"] = OpMls;
241 OpMap["OP_MLSL"] = OpMlsl;
242 OpMap["OP_MLSLHi"] = OpMlslHi;
243 OpMap["OP_MLSLHi_N"] = OpMlslHiN;
244 OpMap["OP_MUL_N"] = OpMulN;
245 OpMap["OP_MLA_N"] = OpMlaN;
246 OpMap["OP_MLS_N"] = OpMlsN;
247 OpMap["OP_FMLA_N"] = OpFMlaN;
248 OpMap["OP_FMLS_N"] = OpFMlsN;
249 OpMap["OP_MLAL_N"] = OpMlalN;
250 OpMap["OP_MLSL_N"] = OpMlslN;
251 OpMap["OP_MUL_LN"]= OpMulLane;
252 OpMap["OP_MULX_LN"]= OpMulXLane;
253 OpMap["OP_MULL_LN"] = OpMullLane;
254 OpMap["OP_MULLHi_LN"] = OpMullHiLane;
255 OpMap["OP_MLA_LN"]= OpMlaLane;
256 OpMap["OP_MLS_LN"]= OpMlsLane;
257 OpMap["OP_MLAL_LN"] = OpMlalLane;
258 OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
259 OpMap["OP_MLSL_LN"] = OpMlslLane;
260 OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
261 OpMap["OP_QDMULL_LN"] = OpQDMullLane;
262 OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
263 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
264 OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
265 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
266 OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
267 OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
268 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
269 OpMap["OP_FMS_LN"] = OpFMSLane;
270 OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
271 OpMap["OP_TRN1"] = OpTrn1;
272 OpMap["OP_ZIP1"] = OpZip1;
273 OpMap["OP_UZP1"] = OpUzp1;
274 OpMap["OP_TRN2"] = OpTrn2;
275 OpMap["OP_ZIP2"] = OpZip2;
276 OpMap["OP_UZP2"] = OpUzp2;
277 OpMap["OP_EQ"] = OpEq;
278 OpMap["OP_GE"] = OpGe;
279 OpMap["OP_LE"] = OpLe;
280 OpMap["OP_GT"] = OpGt;
281 OpMap["OP_LT"] = OpLt;
282 OpMap["OP_NEG"] = OpNeg;
283 OpMap["OP_NOT"] = OpNot;
284 OpMap["OP_AND"] = OpAnd;
285 OpMap["OP_OR"] = OpOr;
286 OpMap["OP_XOR"] = OpXor;
287 OpMap["OP_ANDN"] = OpAndNot;
288 OpMap["OP_ORN"] = OpOrNot;
289 OpMap["OP_CAST"] = OpCast;
290 OpMap["OP_CONC"] = OpConcat;
291 OpMap["OP_HI"] = OpHi;
292 OpMap["OP_LO"] = OpLo;
293 OpMap["OP_DUP"] = OpDup;
294 OpMap["OP_DUP_LN"] = OpDupLane;
295 OpMap["OP_SEL"] = OpSelect;
296 OpMap["OP_REV16"] = OpRev16;
297 OpMap["OP_REV32"] = OpRev32;
298 OpMap["OP_REV64"] = OpRev64;
299 OpMap["OP_XTN"] = OpXtnHi;
300 OpMap["OP_SQXTUN"] = OpSqxtunHi;
301 OpMap["OP_QXTN"] = OpQxtnHi;
302 OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
303 OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
304 OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
305 OpMap["OP_REINT"] = OpReinterpret;
306 OpMap["OP_ADDHNHi"] = OpAddhnHi;
307 OpMap["OP_RADDHNHi"] = OpRAddhnHi;
308 OpMap["OP_SUBHNHi"] = OpSubhnHi;
309 OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
310 OpMap["OP_ABDL"] = OpAbdl;
311 OpMap["OP_ABDLHi"] = OpAbdlHi;
312 OpMap["OP_ABA"] = OpAba;
313 OpMap["OP_ABAL"] = OpAbal;
314 OpMap["OP_ABALHi"] = OpAbalHi;
315 OpMap["OP_QDMULLHi"] = OpQDMullHi;
316 OpMap["OP_QDMULLHi_N"] = OpQDMullHiN;
317 OpMap["OP_QDMLALHi"] = OpQDMlalHi;
318 OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN;
319 OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
320 OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN;
321 OpMap["OP_DIV"] = OpDiv;
322 OpMap["OP_LONG_HI"] = OpLongHi;
323 OpMap["OP_NARROW_HI"] = OpNarrowHi;
324 OpMap["OP_MOVL_HI"] = OpMovlHi;
325 OpMap["OP_COPY_LN"] = OpCopyLane;
326 OpMap["OP_COPYQ_LN"] = OpCopyQLane;
327 OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
328 OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
329 OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
330 OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
331 OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
332 OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
333 OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
334 OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
335 OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
336 OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
337 OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
338 OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
339 OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
340 OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane;
341 OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane;
343 Record *SI = R.getClass("SInst");
344 Record *II = R.getClass("IInst");
345 Record *WI = R.getClass("WInst");
346 Record *SOpI = R.getClass("SOpInst");
347 Record *IOpI = R.getClass("IOpInst");
348 Record *WOpI = R.getClass("WOpInst");
349 Record *LOpI = R.getClass("LOpInst");
350 Record *NoTestOpI = R.getClass("NoTestOpInst");
352 ClassMap[SI] = ClassS;
353 ClassMap[II] = ClassI;
354 ClassMap[WI] = ClassW;
355 ClassMap[SOpI] = ClassS;
356 ClassMap[IOpI] = ClassI;
357 ClassMap[WOpI] = ClassW;
358 ClassMap[LOpI] = ClassL;
359 ClassMap[NoTestOpI] = ClassNoTest;
362 // run - Emit arm_neon.h.inc
363 void run(raw_ostream &o);
365 // runHeader - Emit all the __builtin prototypes used in arm_neon.h
366 void runHeader(raw_ostream &o);
368 // runTests - Emit tests for all the Neon intrinsics.
369 void runTests(raw_ostream &o);
372 void emitIntrinsic(raw_ostream &OS, Record *R,
373 StringMap<ClassKind> &EmittedMap);
374 void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
375 bool isA64GenBuiltinDef);
376 void genOverloadTypeCheckCode(raw_ostream &OS,
377 StringMap<ClassKind> &A64IntrinsicMap,
378 bool isA64TypeCheck);
379 void genIntrinsicRangeCheckCode(raw_ostream &OS,
380 StringMap<ClassKind> &A64IntrinsicMap,
381 bool isA64RangeCheck);
382 void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
385 } // end anonymous namespace
387 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
388 /// which each StringRef representing a single type declared in the string.
389 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
390 /// 2xfloat and 4xfloat respectively.
391 static void ParseTypes(Record *r, std::string &s,
392 SmallVectorImpl<StringRef> &TV) {
393 const char *data = s.data();
396 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
397 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
398 || data[len] == 'H' || data[len] == 'S')
411 PrintFatalError(r->getLoc(),
412 "Unexpected letter: " + std::string(data + len, 1));
414 TV.push_back(StringRef(data, len + 1));
420 /// Widen - Convert a type code into the next wider type. char -> short,
421 /// short -> int, etc.
422 static char Widen(const char t) {
435 PrintFatalError("unhandled type in widen!");
439 /// Narrow - Convert a type code into the next smaller type. short -> char,
440 /// float -> half float, etc.
441 static char Narrow(const char t) {
454 PrintFatalError("unhandled type in narrow!");
458 static std::string GetNarrowTypestr(StringRef ty)
461 for (size_t i = 0, end = ty.size(); i < end; i++) {
481 /// For a particular StringRef, return the base type code, and whether it has
482 /// the quad-vector, polynomial, or unsigned modifiers set.
483 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
486 if (ty[off] == 'S') {
490 if (ty[off] == 'Q' || ty[off] == 'H') {
496 if (ty[off] == 'P') {
501 // remember unsigned.
502 if (ty[off] == 'U') {
507 // base type to get the type string for.
511 /// ModType - Transform a type code and its modifiers based on a mod code. The
512 /// mod code definitions may be found at the top of arm_neon.td.
513 static char ModType(const char mod, char type, bool &quad, bool &poly,
514 bool &usgn, bool &scal, bool &cnst, bool &pntr) {
628 static bool IsMultiVecProto(const char p) {
629 return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
632 /// TypeString - for a modifier and type, generate the name of the typedef for
633 /// that type. QUc -> uint8x8_t.
634 static std::string TypeString(const char mod, StringRef typestr) {
647 // base type to get the type string for.
648 char type = ClassifyType(typestr, quad, poly, usgn);
650 // Based on the modifying character, change the type and width if necessary.
651 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
660 s += poly ? "poly8" : "int8";
663 s += quad ? "x16" : "x8";
666 s += poly ? "poly16" : "int16";
669 s += quad ? "x8" : "x4";
675 s += quad ? "x4" : "x2";
678 s += (poly && !usgn)? "poly64" : "int64";
681 s += quad ? "x2" : "x1";
687 s += quad ? "x8" : "x4";
693 s += quad ? "x4" : "x2";
699 s += quad ? "x2" : "x1";
703 PrintFatalError("unhandled type!");
706 if (mod == '2' || mod == 'B')
708 if (mod == '3' || mod == 'C')
710 if (mod == '4' || mod == 'D')
713 // Append _t, finishing the type string typedef type.
725 /// BuiltinTypeString - for a modifier and type, generate the clang
726 /// BuiltinsARM.def prototype code for the function. See the top of clang's
727 /// Builtins.def for a description of the type strings.
728 static std::string BuiltinTypeString(const char mod, StringRef typestr,
729 ClassKind ck, bool ret) {
742 // base type to get the type string for.
743 char type = ClassifyType(typestr, quad, poly, usgn);
745 // Based on the modifying character, change the type and width if necessary.
746 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
748 // All pointers are void* pointers. Change type to 'v' now.
754 // Treat half-float ('h') types as unsigned short ('s') types.
759 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
760 scal && type != 'f' && type != 'd');
767 else if (type == 'c')
768 s.push_back('S'); // make chars explicitly signed
770 if (type == 'l') // 64-bit long
782 // Since the return value must be one type, return a vector type of the
783 // appropriate width which we will bitcast. An exception is made for
784 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
785 // fashion, storing them to a pointer arg.
787 if (IsMultiVecProto(mod))
788 return "vv*"; // void result with void* first argument
789 if (mod == 'f' || (ck != ClassB && type == 'f'))
790 return quad ? "V4f" : "V2f";
791 if (mod == 'F' || (ck != ClassB && type == 'd'))
792 return quad ? "V2d" : "V1d";
793 if (ck != ClassB && type == 's')
794 return quad ? "V8s" : "V4s";
795 if (ck != ClassB && type == 'i')
796 return quad ? "V4i" : "V2i";
797 if (ck != ClassB && type == 'l')
798 return quad ? "V2LLi" : "V1LLi";
800 return quad ? "V16Sc" : "V8Sc";
803 // Non-return array types are passed as individual vectors.
804 if (mod == '2' || mod == 'B')
805 return quad ? "V16ScV16Sc" : "V8ScV8Sc";
806 if (mod == '3' || mod == 'C')
807 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
808 if (mod == '4' || mod == 'D')
809 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
811 if (mod == 'f' || (ck != ClassB && type == 'f'))
812 return quad ? "V4f" : "V2f";
813 if (mod == 'F' || (ck != ClassB && type == 'd'))
814 return quad ? "V2d" : "V1d";
815 if (ck != ClassB && type == 's')
816 return quad ? "V8s" : "V4s";
817 if (ck != ClassB && type == 'i')
818 return quad ? "V4i" : "V2i";
819 if (ck != ClassB && type == 'l')
820 return quad ? "V2LLi" : "V1LLi";
822 return quad ? "V16Sc" : "V8Sc";
825 /// InstructionTypeCode - Computes the ARM argument character code and
826 /// quad status for a specific type string and ClassKind.
827 static void InstructionTypeCode(const StringRef &typeStr,
830 std::string &typeCode) {
833 char type = ClassifyType(typeStr, quad, poly, usgn);
838 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
839 case ClassI: typeCode = "i8"; break;
840 case ClassW: typeCode = "8"; break;
846 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
847 case ClassI: typeCode = "i16"; break;
848 case ClassW: typeCode = "16"; break;
854 case ClassS: typeCode = usgn ? "u32" : "s32"; break;
855 case ClassI: typeCode = "i32"; break;
856 case ClassW: typeCode = "32"; break;
862 case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
863 case ClassI: typeCode = "i64"; break;
864 case ClassW: typeCode = "64"; break;
871 case ClassI: typeCode = "f16"; break;
872 case ClassW: typeCode = "16"; break;
879 case ClassI: typeCode = "f32"; break;
880 case ClassW: typeCode = "32"; break;
891 PrintFatalError("unhandled type!");
897 PrintFatalError("unhandled type!");
901 static char Insert_BHSD_Suffix(StringRef typestr){
903 if(typestr[off++] == 'S'){
904 while(typestr[off] == 'Q' || typestr[off] == 'H'||
905 typestr[off] == 'P' || typestr[off] == 'U')
907 switch (typestr[off]){
909 case 'c' : return 'b';
910 case 's' : return 'h';
912 case 'f' : return 's';
914 case 'd' : return 'd';
920 static bool endsWith_xN(std::string const &name) {
921 if (name.length() > 3) {
922 if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
923 name.compare(name.length() - 3, 3, "_x3") == 0 ||
924 name.compare(name.length() - 3, 3, "_x4") == 0)
930 /// MangleName - Append a type or width suffix to a base neon function name,
931 /// and insert a 'q' in the appropriate location if type string starts with 'Q'.
932 /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
933 /// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
934 static std::string MangleName(const std::string &name, StringRef typestr,
936 if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" ||
937 name == "vcvt_f64_f32")
941 std::string typeCode = "";
943 InstructionTypeCode(typestr, ck, quad, typeCode);
945 std::string s = name;
947 if (typeCode.size() > 0) {
948 // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
950 s.insert(s.length() - 3, "_" + typeCode);
958 // Insert a 'q' before the first '_' character so that it ends up before
959 // _lane or _n on vector-scalar operations.
960 if (typestr.find("Q") != StringRef::npos) {
961 size_t pos = s.find('_');
962 s = s.insert(pos, "q");
964 char ins = Insert_BHSD_Suffix(typestr);
966 size_t pos = s.find('_');
967 s = s.insert(pos, &ins, 1);
973 static void PreprocessInstruction(const StringRef &Name,
974 const std::string &InstName,
977 bool &HasLanePostfix,
981 // All of our instruction name fields from arm_neon.td are of the form
982 // <instructionname>_...
983 // Thus we grab our instruction name via computation of said Prefix.
984 const size_t PrefixEnd = Name.find_first_of('_');
985 // If InstName is passed in, we use that instead of our name Prefix.
986 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
988 const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
990 HasNPostfix = Postfix.count("_n");
991 HasLanePostfix = Postfix.count("_lane");
992 HasDupPostfix = Postfix.count("_dup");
993 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
995 if (InstName.compare("vtbl") == 0 ||
996 InstName.compare("vtbx") == 0) {
997 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
998 // encoding to get its true value.
999 TBNumber = Name[Name.size()-1] - 48;
1003 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
1004 /// extracted, generate a FileCheck pattern for a Load Or Store
1006 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
1007 const std::string& OutTypeCode,
1009 const bool &HasDupPostfix,
1010 const bool &HasLanePostfix,
1012 std::string &RegisterSuffix) {
1013 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
1014 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
1015 // will output a series of v{ld,st}1s, so we have to handle it specially.
1016 if ((Count == 3 || Count == 4) && IsQuad) {
1017 RegisterSuffix += "{";
1018 for (size_t i = 0; i < Count; i++) {
1019 RegisterSuffix += "d{{[0-9]+}}";
1020 if (HasDupPostfix) {
1021 RegisterSuffix += "[]";
1023 if (HasLanePostfix) {
1024 RegisterSuffix += "[{{[0-9]+}}]";
1027 RegisterSuffix += ", ";
1030 RegisterSuffix += "}";
1033 // Handle normal loads and stores.
1034 RegisterSuffix += "{";
1035 for (size_t i = 0; i < Count; i++) {
1036 RegisterSuffix += "d{{[0-9]+}}";
1037 if (HasDupPostfix) {
1038 RegisterSuffix += "[]";
1040 if (HasLanePostfix) {
1041 RegisterSuffix += "[{{[0-9]+}}]";
1043 if (IsQuad && !HasLanePostfix) {
1044 RegisterSuffix += ", d{{[0-9]+}}";
1045 if (HasDupPostfix) {
1046 RegisterSuffix += "[]";
1050 RegisterSuffix += ", ";
1053 RegisterSuffix += "}, [r{{[0-9]+}}";
1055 // We only include the alignment hint if we have a vld1.*64 or
1056 // a dup/lane instruction.
1058 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
1059 RegisterSuffix += ":" + OutTypeCode;
1063 RegisterSuffix += "]";
1067 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
1068 const bool &HasNPostfix) {
1069 return (NameRef.count("vmla") ||
1070 NameRef.count("vmlal") ||
1071 NameRef.count("vmlsl") ||
1072 NameRef.count("vmull") ||
1073 NameRef.count("vqdmlal") ||
1074 NameRef.count("vqdmlsl") ||
1075 NameRef.count("vqdmulh") ||
1076 NameRef.count("vqdmull") ||
1077 NameRef.count("vqrdmulh")) && HasNPostfix;
1080 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
1081 const bool &HasLanePostfix) {
1082 return (NameRef.count("vmla") ||
1083 NameRef.count("vmls") ||
1084 NameRef.count("vmlal") ||
1085 NameRef.count("vmlsl") ||
1086 (NameRef.count("vmul") && NameRef.size() == 3)||
1087 NameRef.count("vqdmlal") ||
1088 NameRef.count("vqdmlsl") ||
1089 NameRef.count("vqdmulh") ||
1090 NameRef.count("vqrdmulh")) && HasLanePostfix;
1093 static bool IsSpecialLaneMultiply(const StringRef &NameRef,
1094 const bool &HasLanePostfix,
1095 const bool &IsQuad) {
1096 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
1098 const bool IsVMull = NameRef.count("mull") && !IsQuad;
1099 return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
1102 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
1103 const std::string &Proto,
1104 const bool &HasNPostfix,
1106 const bool &HasLanePostfix,
1107 const bool &HasDupPostfix,
1108 std::string &NormedProto) {
1109 // Handle generic case.
1110 const StringRef NameRef(Name);
1111 for (size_t i = 0, end = Proto.size(); i < end; i++) {
1121 NormedProto += IsQuad? 'q' : 'd';
1134 NormedProto += HasLanePostfix? 'a' : 'i';
1137 if (HasLanePostfix) {
1139 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
1140 NormedProto += IsQuad? 'q' : 'd';
1148 // Handle Special Cases.
1149 const bool IsNotVExt = !NameRef.count("vext");
1150 const bool IsVPADAL = NameRef.count("vpadal");
1151 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1153 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1156 if (IsSpecialLaneMul) {
1158 NormedProto[2] = NormedProto[3];
1159 NormedProto.erase(3);
1160 } else if (NormedProto.size() == 4 &&
1161 NormedProto[0] == NormedProto[1] &&
1163 // If NormedProto.size() == 4 and the first two proto characters are the
1164 // same, ignore the first.
1165 NormedProto = NormedProto.substr(1, 3);
1166 } else if (Is5OpLaneAccum) {
1167 // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1168 std::string tmp = NormedProto.substr(1,2);
1169 tmp += NormedProto[4];
1171 } else if (IsVPADAL) {
1172 // If we have VPADAL, ignore the first character.
1173 NormedProto = NormedProto.substr(0, 2);
1174 } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1175 // If our instruction is a dup instruction, keep only the first and
1177 std::string tmp = "";
1178 tmp += NormedProto[0];
1179 tmp += NormedProto[NormedProto.size()-1];
1184 /// GenerateRegisterCheckPatterns - Given a bunch of data we have
1185 /// extracted, generate a FileCheck pattern to check that an
1186 /// instruction's arguments are correct.
1187 static void GenerateRegisterCheckPattern(const std::string &Name,
1188 const std::string &Proto,
1189 const std::string &OutTypeCode,
1190 const bool &HasNPostfix,
1192 const bool &HasLanePostfix,
1193 const bool &HasDupPostfix,
1194 const size_t &TBNumber,
1195 std::string &RegisterSuffix) {
1197 RegisterSuffix = "";
1199 const StringRef NameRef(Name);
1200 const StringRef ProtoRef(Proto);
1202 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1206 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1207 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1210 // Grab N value from v{ld,st}N using its ascii representation.
1211 const size_t Count = NameRef[3] - 48;
1213 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1214 HasDupPostfix, HasLanePostfix,
1215 Count, RegisterSuffix);
1216 } else if (IsTBXOrTBL) {
1217 RegisterSuffix += "d{{[0-9]+}}, {";
1218 for (size_t i = 0; i < TBNumber-1; i++) {
1219 RegisterSuffix += "d{{[0-9]+}}, ";
1221 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1223 // Handle a normal instruction.
1224 if (NameRef.count("vget") || NameRef.count("vset"))
1227 // We first normalize our proto, since we only need to emit 4
1228 // different types of checks, yet have more than 4 proto types
1229 // that map onto those 4 patterns.
1230 std::string NormalizedProto("");
1231 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1232 HasLanePostfix, HasDupPostfix,
1235 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1236 const char &c = NormalizedProto[i];
1239 RegisterSuffix += "q{{[0-9]+}}, ";
1243 RegisterSuffix += "d{{[0-9]+}}, ";
1247 RegisterSuffix += "#{{[0-9]+}}, ";
1251 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1256 // Remove extra ", ".
1257 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1261 /// GenerateChecksForIntrinsic - Given a specific instruction name +
1262 /// typestr + class kind, generate the proper set of FileCheck
1263 /// Patterns to check for. We could just return a string, but instead
1264 /// use a vector since it provides us with the extra flexibility of
1265 /// emitting multiple checks, which comes in handy for certain cases
1266 /// like mla where we want to check for 2 different instructions.
1267 static void GenerateChecksForIntrinsic(const std::string &Name,
1268 const std::string &Proto,
1269 StringRef &OutTypeStr,
1270 StringRef &InTypeStr,
1272 const std::string &InstName,
1274 std::vector<std::string>& Result) {
1276 // If Ck is a ClassNoTest instruction, just return so no test is
1278 if(Ck == ClassNoTest)
1281 if (Name == "vcvt_f32_f16") {
1282 Result.push_back("vcvt.f32.f16");
1287 // Now we preprocess our instruction given the data we have to get the
1288 // data that we need.
1289 // Create a StringRef for String Manipulation of our Name.
1290 const StringRef NameRef(Name);
1291 // Instruction Prefix.
1293 // The type code for our out type string.
1294 std::string OutTypeCode;
1295 // To handle our different cases, we need to check for different postfixes.
1296 // Is our instruction a quad instruction.
1297 bool IsQuad = false;
1298 // Our instruction is of the form <instructionname>_n.
1299 bool HasNPostfix = false;
1300 // Our instruction is of the form <instructionname>_lane.
1301 bool HasLanePostfix = false;
1302 // Our instruction is of the form <instructionname>_dup.
1303 bool HasDupPostfix = false;
1304 // Our instruction is a vcvt instruction which requires special handling.
1305 bool IsSpecialVCvt = false;
1306 // If we have a vtbxN or vtblN instruction, this is set to N.
1307 size_t TBNumber = -1;
1309 std::string RegisterSuffix;
1311 PreprocessInstruction(NameRef, InstName, Prefix,
1312 HasNPostfix, HasLanePostfix, HasDupPostfix,
1313 IsSpecialVCvt, TBNumber);
1315 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1316 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1317 HasLanePostfix, HasDupPostfix, TBNumber,
1320 // In the following section, we handle a bunch of special cases. You can tell
1321 // a special case by the fact we are returning early.
1323 // If our instruction is a logical instruction without postfix or a
1324 // hidden LOp just return the current Prefix.
1325 if (Ck == ClassL || IsHiddenLOp) {
1326 Result.push_back(Prefix + " " + RegisterSuffix);
1330 // If we have a vmov, due to the many different cases, some of which
1331 // vary within the different intrinsics generated for a single
1332 // instruction type, just output a vmov. (e.g. given an instruction
1333 // A, A.u32 might be vmov and A.u8 might be vmov.8).
1335 // FIXME: Maybe something can be done about this. The two cases that we care
1336 // about are vmov as an LType and vmov as a WType.
1337 if (Prefix == "vmov") {
1338 Result.push_back(Prefix + " " + RegisterSuffix);
1342 // In the following section, we handle special cases.
1344 if (OutTypeCode == "64") {
1345 // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1346 // type, the intrinsic will be optimized away, so just return
1347 // nothing. On the other hand if we are handling an uint64x2_t
1348 // (i.e. quad instruction), vdup/vmov instructions should be
1350 if (Prefix == "vdup" || Prefix == "vext") {
1352 Result.push_back("{{vmov|vdup}}");
1357 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1358 // multiple register operands.
1359 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1360 || Prefix == "vld4";
1361 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1362 || Prefix == "vst4";
1363 if (MultiLoadPrefix || MultiStorePrefix) {
1364 Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1368 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1369 // emitting said instructions. So return a check for
1370 // vldr/vstr/vmov/str instead.
1371 if (HasLanePostfix || HasDupPostfix) {
1372 if (Prefix == "vst1") {
1373 Result.push_back("{{str|vstr|vmov}}");
1375 } else if (Prefix == "vld1") {
1376 Result.push_back("{{ldr|vldr|vmov}}");
1382 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1383 // sometimes disassembled as vtrn.32. We use a regex to handle both
1385 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1386 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1390 // Currently on most ARM processors, we do not use vmla/vmls for
1391 // quad floating point operations. Instead we output vmul + vadd. So
1392 // check if we have one of those instructions and just output a
1394 if (OutTypeCode == "f32") {
1395 if (Prefix == "vmls") {
1396 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1397 Result.push_back("vsub." + OutTypeCode);
1399 } else if (Prefix == "vmla") {
1400 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1401 Result.push_back("vadd." + OutTypeCode);
1406 // If we have vcvt, get the input type from the instruction name
1407 // (which should be of the form instname_inputtype) and append it
1408 // before the output type.
1409 if (Prefix == "vcvt") {
1410 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1411 Prefix += "." + inTypeCode;
1414 // Append output type code to get our final mangled instruction.
1415 Prefix += "." + OutTypeCode;
1417 Result.push_back(Prefix + " " + RegisterSuffix);
1420 /// UseMacro - Examine the prototype string to determine if the intrinsic
1421 /// should be defined as a preprocessor macro instead of an inline function.
1422 static bool UseMacro(const std::string &proto) {
1423 // If this builtin takes an immediate argument, we need to #define it rather
1424 // than use a standard declaration, so that SemaChecking can range check
1425 // the immediate passed by the user.
1426 if (proto.find('i') != std::string::npos)
1429 // Pointer arguments need to use macros to avoid hiding aligned attributes
1430 // from the pointer type.
1431 if (proto.find('p') != std::string::npos ||
1432 proto.find('c') != std::string::npos)
1438 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1439 /// defined as a macro should be accessed directly instead of being first
1440 /// assigned to a local temporary.
1441 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1442 // True for constant ints (i), pointers (p) and const pointers (c).
1443 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1446 // Generate the string "(argtype a, argtype b, ...)"
1447 static std::string GenArgs(const std::string &proto, StringRef typestr,
1448 const std::string &name) {
1449 bool define = UseMacro(proto);
1455 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1457 // Some macro arguments are used directly instead of being assigned
1458 // to local temporaries; prepend an underscore prefix to make their
1459 // names consistent with the local temporaries.
1460 if (MacroArgUsedDirectly(proto, i))
1463 s += TypeString(proto[i], typestr) + " __";
1466 //To avoid argument being multiple defined, add extra number for renaming.
1467 if (name == "vcopy_lane" || name == "vcopy_laneq")
1477 // Macro arguments are not type-checked like inline function arguments, so
1478 // assign them to local temporaries to get the right type checking.
1479 static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
1480 const std::string &name ) {
1483 bool generatedLocal = false;
1485 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1486 // Do not create a temporary for an immediate argument.
1487 // That would defeat the whole point of using a macro!
1488 if (MacroArgUsedDirectly(proto, i))
1490 generatedLocal = true;
1491 bool extranumber = false;
1492 if (name == "vcopy_lane" || name == "vcopy_laneq")
1495 s += TypeString(proto[i], typestr) + " __";
1511 // Use the vmovl builtin to sign-extend or zero-extend a vector.
1512 static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1513 std::string s, high;
1514 high = h ? "_high" : "";
1515 s = MangleName("vmovl" + high, typestr, ClassS);
1520 // Get the high 64-bit part of a vector
1521 static std::string GetHigh(const std::string &a, StringRef typestr) {
1523 s = MangleName("vget_high", typestr, ClassS);
1528 // Gen operation with two operands and get high 64-bit for both of two operands.
1529 static std::string Gen2OpWith2High(StringRef typestr,
1530 const std::string &op,
1531 const std::string &a,
1532 const std::string &b) {
1534 std::string Op1 = GetHigh(a, typestr);
1535 std::string Op2 = GetHigh(b, typestr);
1536 s = MangleName(op, typestr, ClassS);
1537 s += "(" + Op1 + ", " + Op2 + ");";
1541 // Gen operation with three operands and get high 64-bit of the latter
1543 static std::string Gen3OpWith2High(StringRef typestr,
1544 const std::string &op,
1545 const std::string &a,
1546 const std::string &b,
1547 const std::string &c) {
1549 std::string Op1 = GetHigh(b, typestr);
1550 std::string Op2 = GetHigh(c, typestr);
1551 s = MangleName(op, typestr, ClassS);
1552 s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1556 // Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1557 static std::string GenCombine(std::string typestr,
1558 const std::string &a,
1559 const std::string &b) {
1561 s = MangleName("vcombine", typestr, ClassS);
1562 s += "(" + a + ", " + b + ")";
1566 static std::string Duplicate(unsigned nElts, StringRef typestr,
1567 const std::string &a) {
1570 s = "(" + TypeString('d', typestr) + "){ ";
1571 for (unsigned i = 0; i != nElts; ++i) {
1573 if ((i + 1) < nElts)
1581 static std::string SplatLane(unsigned nElts, const std::string &vec,
1582 const std::string &lane) {
1583 std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1584 for (unsigned i = 0; i < nElts; ++i)
1590 static std::string RemoveHigh(const std::string &name) {
1591 std::string s = name;
1592 std::size_t found = s.find("_high_");
1593 if (found == std::string::npos)
1594 PrintFatalError("name should contain \"_high_\" for high intrinsics");
1595 s.replace(found, 5, "");
1599 static unsigned GetNumElements(StringRef typestr, bool &quad) {
1602 char type = ClassifyType(typestr, quad, dummy, dummy);
1605 case 'c': nElts = 8; break;
1606 case 's': nElts = 4; break;
1607 case 'i': nElts = 2; break;
1608 case 'l': nElts = 1; break;
1609 case 'h': nElts = 4; break;
1610 case 'f': nElts = 2; break;
1615 PrintFatalError("unhandled type!");
1617 if (quad) nElts <<= 1;
1621 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1622 static std::string GenOpString(const std::string &name, OpKind op,
1623 const std::string &proto, StringRef typestr) {
1625 unsigned nElts = GetNumElements(typestr, quad);
1626 bool define = UseMacro(proto);
1628 std::string ts = TypeString(proto[0], typestr);
1639 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1642 s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1645 s += "__a + " + Extend(typestr, "__b") + ";";
1648 s += "__a + " + Extend(typestr, "__b", 1) + ";";
1654 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1657 s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1660 s += "__a - " + Extend(typestr, "__b") + ";";
1663 s += "__a - " + Extend(typestr, "__b", 1) + ";";
1666 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1669 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1672 s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
1673 SplatLane(nElts, "__b", "__c") + ");";
1679 s += MangleName("vfma", typestr, ClassS);
1680 s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
1683 s += MangleName("vfms", typestr, ClassS);
1684 s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
1687 s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1688 SplatLane(nElts, "__b", "__c") + ");";
1691 s += MangleName("vmull", typestr, ClassS) + "(" +
1692 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1695 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1698 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1701 s += "__a + (__b * __c);";
1704 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1705 Duplicate(nElts, typestr, "__c") + ");";
1708 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1709 SplatLane(nElts, "__c", "__d") + ");";
1712 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
1713 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1716 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1719 s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1722 s += MangleName("vmull_n", typestr, ClassS);
1723 s += "(" + GetHigh("__a", typestr) + ", __b);";
1726 s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1729 s += MangleName("vmlal_n", typestr, ClassS);
1730 s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
1733 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1736 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1739 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
1740 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
1741 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
1742 s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1745 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
1746 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
1747 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
1748 s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1751 s += "__a - (__b * __c);";
1754 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1755 Duplicate(nElts, typestr, "__c") + ");";
1758 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1759 SplatLane(nElts, "__c", "__d") + ");";
1762 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
1763 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1766 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1769 s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1772 s += MangleName("vmlsl_n", typestr, ClassS);
1773 s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
1776 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1777 SplatLane(nElts, "__b", "__c") + ");";
1779 case OpQDMullHiLane:
1780 s += MangleName("vqdmull", typestr, ClassS) + "(" +
1781 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1784 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1785 SplatLane(nElts, "__c", "__d") + ");";
1787 case OpQDMlalHiLane:
1788 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
1789 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1792 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1793 SplatLane(nElts, "__c", "__d") + ");";
1795 case OpQDMlslHiLane:
1796 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
1797 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1800 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1801 SplatLane(nElts, "__b", "__c") + ");";
1804 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1805 SplatLane(nElts, "__b", "__c") + ");";
1808 s += "(" + ts + ")(__a == __b);";
1811 s += "(" + ts + ")(__a >= __b);";
1814 s += "(" + ts + ")(__a <= __b);";
1817 s += "(" + ts + ")(__a > __b);";
1820 s += "(" + ts + ")(__a < __b);";
1844 s += "(" + ts + ")__a;";
1847 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1848 s += ", (int64x1_t)__b, 0, 1);";
1851 // nElts is for the result vector, so the source is twice that number.
1852 s += "__builtin_shufflevector(__a, __a";
1853 for (unsigned i = nElts; i < nElts * 2; ++i)
1854 s += ", " + utostr(i);
1858 s += "__builtin_shufflevector(__a, __a";
1859 for (unsigned i = 0; i < nElts; ++i)
1860 s += ", " + utostr(i);
1864 s += Duplicate(nElts, typestr, "__a") + ";";
1867 s += SplatLane(nElts, "__a", "__b") + ";";
1870 // ((0 & 1) | (~0 & 2))
1871 s += "(" + ts + ")";
1872 ts = TypeString(proto[1], typestr);
1873 s += "((__a & (" + ts + ")__b) | ";
1874 s += "(~__a & (" + ts + ")__c));";
1877 s += "__builtin_shufflevector(__a, __a";
1878 for (unsigned i = 2; i <= nElts; i += 2)
1879 for (unsigned j = 0; j != 2; ++j)
1880 s += ", " + utostr(i - j - 1);
1884 unsigned WordElts = nElts >> (1 + (int)quad);
1885 s += "__builtin_shufflevector(__a, __a";
1886 for (unsigned i = WordElts; i <= nElts; i += WordElts)
1887 for (unsigned j = 0; j != WordElts; ++j)
1888 s += ", " + utostr(i - j - 1);
1893 unsigned DblWordElts = nElts >> (int)quad;
1894 s += "__builtin_shufflevector(__a, __a";
1895 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1896 for (unsigned j = 0; j != DblWordElts; ++j)
1897 s += ", " + utostr(i - j - 1);
1902 s = TypeString(proto[1], typestr) + " __a1 = " +
1903 MangleName("vmovn", typestr, ClassS) + "(__b);\n " +
1904 "return __builtin_shufflevector(__a, __a1";
1905 for (unsigned i = 0; i < nElts * 4; ++i)
1906 s += ", " + utostr(i);
1911 s = TypeString(proto[1], typestr) + " __a1 = " +
1912 MangleName("vqmovun", typestr, ClassS) + "(__b);\n " +
1913 "return __builtin_shufflevector(__a, __a1";
1914 for (unsigned i = 0; i < nElts * 4; ++i)
1915 s += ", " + utostr(i);
1920 s = TypeString(proto[1], typestr) + " __a1 = " +
1921 MangleName("vqmovn", typestr, ClassS) + "(__b);\n " +
1922 "return __builtin_shufflevector(__a, __a1";
1923 for (unsigned i = 0; i < nElts * 4; ++i)
1924 s += ", " + utostr(i);
1929 std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
1930 s = TypeString(proto[1], typestr) + " __a1 = " +
1931 MangleName(FName, typestr, ClassS) + "(__b);\n " +
1932 "return __builtin_shufflevector(__a, __a1";
1933 for (unsigned i = 0; i < nElts * 4; ++i)
1934 s += ", " + utostr(i);
1939 std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
1940 s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
1941 ";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);";
1945 s = TypeString(proto[1], typestr) + " __a1 = " +
1946 MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " +
1947 "return __builtin_shufflevector(__a, __a1";
1948 for (unsigned i = 0; i < nElts * 4; ++i)
1949 s += ", " + utostr(i);
1954 s += "__builtin_shufflevector(__a, __b";
1955 for (unsigned i = 0; i < nElts; i++)
1956 s += ", " + utostr(2*i);
1960 s += "__builtin_shufflevector(__a, __b";
1961 for (unsigned i = 0; i < nElts; i++)
1962 s += ", " + utostr(2*i+1);
1966 s += "__builtin_shufflevector(__a, __b";
1967 for (unsigned i = 0; i < (nElts/2); i++)
1968 s += ", " + utostr(i) + ", " + utostr(i+nElts);
1972 s += "__builtin_shufflevector(__a, __b";
1973 for (unsigned i = nElts/2; i < nElts; i++)
1974 s += ", " + utostr(i) + ", " + utostr(i+nElts);
1978 s += "__builtin_shufflevector(__a, __b";
1979 for (unsigned i = 0; i < (nElts/2); i++)
1980 s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
1984 s += "__builtin_shufflevector(__a, __b";
1985 for (unsigned i = 0; i < (nElts/2); i++)
1986 s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
1990 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1991 if (typestr[0] != 'U') {
1992 // vabd results are always unsigned and must be zero-extended.
1993 std::string utype = "U" + typestr.str();
1994 s += "(" + TypeString(proto[0], typestr) + ")";
1995 abd = "(" + TypeString('d', utype) + ")" + abd;
1996 s += Extend(utype, abd) + ";";
1998 s += Extend(typestr, abd) + ";";
2003 s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
2006 std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
2007 s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
2012 std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
2013 s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
2018 std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
2019 s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
2024 std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
2025 s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
2030 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
2033 s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
2036 s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
2039 s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
2042 s += MangleName("vqdmull_n", typestr, ClassS);
2043 s += "(" + GetHigh("__a", typestr) + ", __b);";
2046 s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
2049 s += MangleName("vqdmlal_n", typestr, ClassS);
2050 s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
2053 s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
2056 s += MangleName("vqdmlsl_n", typestr, ClassS);
2057 s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
2063 s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2064 MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s;
2065 s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
2070 // Another local variable __a1 is needed for calling a Macro,
2071 // or using __a will have naming conflict when Macro expanding.
2072 s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2073 MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
2074 s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
2079 s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
2080 MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
2084 s += TypeString('s', typestr) + " __c2 = " +
2085 MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " +
2086 MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
2090 std::string typeCode = "";
2091 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2092 s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
2093 "(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
2097 std::string typeCode = "";
2098 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2099 s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
2100 "(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);";
2103 case OpScalarMulLane: {
2104 std::string typeCode = "";
2105 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2106 s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2107 "(__b, __c);\\\n __a * __d1;";
2110 case OpScalarMulLaneQ: {
2111 std::string typeCode = "";
2112 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2113 s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
2114 "(__b, __c);\\\n __a * __d1;";
2117 case OpScalarMulXLane: {
2119 char type = ClassifyType(typestr, dummy, dummy, dummy);
2120 if (type == 'f') type = 's';
2121 std::string typeCode = "";
2122 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2123 s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2124 "(__b, __c);\\\n vmulx" + type + "_" +
2125 typeCode + "(__a, __d1);";
2128 case OpScalarMulXLaneQ: {
2130 char type = ClassifyType(typestr, dummy, dummy, dummy);
2131 if (type == 'f') type = 's';
2132 std::string typeCode = "";
2133 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2134 s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
2135 typeCode + "(__b, __c);\\\n vmulx" + type +
2136 "_" + typeCode + "(__a, __d1);";
2140 case OpScalarVMulXLane: {
2142 char type = ClassifyType(typestr, dummy, dummy, dummy);
2143 if (type == 'f') type = 's';
2144 std::string typeCode = "";
2145 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2146 s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2147 typeCode + "(__a, 0);\\\n" +
2148 " " + TypeString('s', typestr) + " __e1 = vget_lane_" +
2149 typeCode + "(__b, __c);\\\n" +
2150 " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2151 typeCode + "(__d1, __e1);\\\n" +
2152 " " + TypeString('d', typestr) + " __g1;\\\n" +
2153 " vset_lane_" + typeCode + "(__f1, __g1, __c);";
2157 case OpScalarVMulXLaneQ: {
2159 char type = ClassifyType(typestr, dummy, dummy, dummy);
2160 if (type == 'f') type = 's';
2161 std::string typeCode = "";
2162 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2163 s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2164 typeCode + "(__a, 0);\\\n" +
2165 " " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
2166 typeCode + "(__b, __c);\\\n" +
2167 " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2168 typeCode + "(__d1, __e1);\\\n" +
2169 " " + TypeString('d', typestr) + " __g1;\\\n" +
2170 " vset_lane_" + typeCode + "(__f1, __g1, 0);";
2173 case OpScalarQDMullLane: {
2174 std::string typeCode = "";
2175 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2176 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
2177 "vget_lane_" + typeCode + "(b, __c));";
2180 case OpScalarQDMullLaneQ: {
2181 std::string typeCode = "";
2182 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2183 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
2184 "vgetq_lane_" + typeCode + "(b, __c));";
2187 case OpScalarQDMulHiLane: {
2188 std::string typeCode = "";
2189 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2190 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
2191 "vget_lane_" + typeCode + "(__b, __c));";
2194 case OpScalarQDMulHiLaneQ: {
2195 std::string typeCode = "";
2196 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2197 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
2198 "vgetq_lane_" + typeCode + "(__b, __c));";
2201 case OpScalarQRDMulHiLane: {
2202 std::string typeCode = "";
2203 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2204 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
2205 "vget_lane_" + typeCode + "(__b, __c));";
2208 case OpScalarQRDMulHiLaneQ: {
2209 std::string typeCode = "";
2210 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2211 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
2212 "vgetq_lane_" + typeCode + "(__b, __c));";
2215 case OpScalarGetLane:{
2216 std::string typeCode = "";
2217 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2219 s += "int16x8_t __a1 = vreinterpretq_s16_f16(__a);\\\n";
2220 s += " vgetq_lane_s16(__a1, __b);";
2222 s += "int16x4_t __a1 = vreinterpret_s16_f16(__a);\\\n";
2223 s += " vget_lane_s16(__a1, __b);";
2227 case OpScalarSetLane:{
2228 std::string typeCode = "";
2229 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2230 s += "int16_t __a1 = (int16_t)__a;\\\n";
2232 s += " int16x8_t __b1 = vreinterpretq_s16_f16(b);\\\n";
2233 s += " int16x8_t __b2 = vsetq_lane_s16(__a1, __b1, __c);\\\n";
2234 s += " vreinterpretq_f16_s16(__b2);";
2236 s += " int16x4_t __b1 = vreinterpret_s16_f16(b);\\\n";
2237 s += " int16x4_t __b2 = vset_lane_s16(__a1, __b1, __c);\\\n";
2238 s += " vreinterpret_f16_s16(__b2);";
2244 PrintFatalError("unknown OpKind!");
2249 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
2250 unsigned mod = proto[0];
2252 if (mod == 'v' || mod == 'f' || mod == 'F')
2262 // Base type to get the type string for.
2263 char type = ClassifyType(typestr, quad, poly, usgn);
2265 // Based on the modifying character, change the type and width if necessary.
2266 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
2268 NeonTypeFlags::EltType ET;
2271 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
2274 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
2277 ET = NeonTypeFlags::Int32;
2280 ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
2283 ET = NeonTypeFlags::Float16;
2286 ET = NeonTypeFlags::Float32;
2289 ET = NeonTypeFlags::Float64;
2292 PrintFatalError("unhandled type!");
2294 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
2295 return Flags.getFlags();
2298 // We don't check 'a' in this function, because for builtin function the
2299 // argument matching to 'a' uses a vector type splatted from a scalar type.
2300 static bool ProtoHasScalar(const std::string proto)
2302 return (proto.find('s') != std::string::npos
2303 || proto.find('z') != std::string::npos
2304 || proto.find('r') != std::string::npos
2305 || proto.find('b') != std::string::npos
2306 || proto.find('$') != std::string::npos
2307 || proto.find('y') != std::string::npos
2308 || proto.find('o') != std::string::npos);
2311 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
2312 static std::string GenBuiltin(const std::string &name, const std::string &proto,
2313 StringRef typestr, ClassKind ck) {
2316 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
2317 // sret-like argument.
2318 bool sret = IsMultiVecProto(proto[0]);
2320 bool define = UseMacro(proto);
2322 // Check if the prototype has a scalar operand with the type of the vector
2323 // elements. If not, bitcasting the args will take care of arg checking.
2324 // The actual signedness etc. will be taken care of with special enums.
2325 if (!ProtoHasScalar(proto))
2328 if (proto[0] != 'v') {
2329 std::string ts = TypeString(proto[0], typestr);
2335 s += "(" + ts + ")";
2339 s += "return (" + ts + ")";
2343 bool splat = proto.find('a') != std::string::npos;
2345 s += "__builtin_neon_";
2347 // Call the non-splat builtin: chop off the "_n" suffix from the name.
2348 std::string vname(name, 0, name.size()-2);
2349 s += MangleName(vname, typestr, ck);
2351 s += MangleName(name, typestr, ck);
2355 // Pass the address of the return variable as the first argument to sret-like
2361 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2362 std::string args = std::string(&arg, 1);
2364 // Use the local temporaries instead of the macro arguments.
2367 bool argQuad = false;
2368 bool argPoly = false;
2369 bool argUsgn = false;
2370 bool argScalar = false;
2372 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
2373 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
2376 // Handle multiple-vector values specially, emitting each subvector as an
2377 // argument to the __builtin.
2378 unsigned NumOfVec = 0;
2379 if (proto[i] >= '2' && proto[i] <= '4') {
2380 NumOfVec = proto[i] - '0';
2381 } else if (proto[i] >= 'B' && proto[i] <= 'D') {
2382 NumOfVec = proto[i] - 'A' + 1;
2386 // Check if an explicit cast is needed.
2387 if (argType != 'c' || argPoly || argUsgn)
2388 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
2390 for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
2391 s += args + ".val[" + utostr(vi) + "]";
2401 if (splat && (i + 1) == e)
2402 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
2404 // Check if an explicit cast is needed.
2405 if ((splat || !argScalar) &&
2406 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
2407 std::string argTypeStr = "c";
2409 argTypeStr = argType;
2411 argTypeStr = "Q" + argTypeStr;
2412 args = "(" + TypeString('d', argTypeStr) + ")" + args;
2420 // Extra constant integer to hold type class enum for this function, e.g. s8
2422 s += ", " + utostr(GetNeonEnum(proto, typestr));
2426 if (proto[0] != 'v' && sret) {
2435 static std::string GenBuiltinDef(const std::string &name,
2436 const std::string &proto,
2437 StringRef typestr, ClassKind ck) {
2438 std::string s("BUILTIN(__builtin_neon_");
2440 // If all types are the same size, bitcasting the args will take care
2441 // of arg checking. The actual signedness etc. will be taken care of with
2443 if (!ProtoHasScalar(proto))
2446 s += MangleName(name, typestr, ck);
2449 for (unsigned i = 0, e = proto.size(); i != e; ++i)
2450 s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
2452 // Extra constant integer to hold type class enum for this function, e.g. s8
2460 static std::string GenIntrinsic(const std::string &name,
2461 const std::string &proto,
2462 StringRef outTypeStr, StringRef inTypeStr,
2463 OpKind kind, ClassKind classKind) {
2464 assert(!proto.empty() && "");
2465 bool define = UseMacro(proto) && kind != OpUnavailable;
2468 // static always inline + return type
2472 s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
2474 // Function name with type suffix
2475 std::string mangledName = MangleName(name, outTypeStr, ClassS);
2476 if (outTypeStr != inTypeStr) {
2477 // If the input type is different (e.g., for vreinterpret), append a suffix
2478 // for the input type. String off a "Q" (quad) prefix so that MangleName
2479 // does not insert another "q" in the name.
2480 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2481 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2482 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2486 // Function arguments
2487 s += GenArgs(proto, inTypeStr, name);
2491 s += " __extension__ ({ \\\n ";
2492 s += GenMacroLocals(proto, inTypeStr, name);
2493 } else if (kind == OpUnavailable) {
2494 s += " __attribute__((unavailable));\n";
2500 s += GenOpString(name, kind, proto, outTypeStr);
2502 s += GenBuiltin(name, proto, outTypeStr, classKind);
2511 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
2512 /// is comprised of type definitions and function declarations.
2513 void NeonEmitter::run(raw_ostream &OS) {
2515 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2518 " * Permission is hereby granted, free of charge, to any person obtaining "
2520 " * of this software and associated documentation files (the \"Software\"),"
2522 " * in the Software without restriction, including without limitation the "
2524 " * to use, copy, modify, merge, publish, distribute, sublicense, "
2526 " * copies of the Software, and to permit persons to whom the Software is\n"
2527 " * furnished to do so, subject to the following conditions:\n"
2529 " * The above copyright notice and this permission notice shall be "
2531 " * all copies or substantial portions of the Software.\n"
2533 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2535 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2536 "MERCHANTABILITY,\n"
2537 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2539 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2541 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2543 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2545 " * THE SOFTWARE.\n"
2547 " *===--------------------------------------------------------------------"
2551 OS << "#ifndef __ARM_NEON_H\n";
2552 OS << "#define __ARM_NEON_H\n\n";
2554 OS << "#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)\n";
2555 OS << "#error \"NEON support not enabled\"\n";
2558 OS << "#include <stdint.h>\n\n";
2560 // Emit NEON-specific scalar typedefs.
2561 OS << "typedef float float32_t;\n";
2562 OS << "typedef __fp16 float16_t;\n";
2564 OS << "#ifdef __aarch64__\n";
2565 OS << "typedef double float64_t;\n";
2568 // For now, signedness of polynomial types depends on target
2569 OS << "#ifdef __aarch64__\n";
2570 OS << "typedef uint8_t poly8_t;\n";
2571 OS << "typedef uint16_t poly16_t;\n";
2572 OS << "typedef uint64_t poly64_t;\n";
2574 OS << "typedef int8_t poly8_t;\n";
2575 OS << "typedef int16_t poly16_t;\n";
2578 // Emit Neon vector typedefs.
2579 std::string TypedefTypes(
2580 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
2581 SmallVector<StringRef, 24> TDTypeVec;
2582 ParseTypes(0, TypedefTypes, TDTypeVec);
2584 // Emit vector typedefs.
2588 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2589 bool dummy, quad = false, poly = false;
2590 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2594 if (type == 'd' || (type == 'l' && poly)) {
2595 preinsert = isA64? false: true;
2598 postinsert = isA64? true: false;
2604 OS << "#ifdef __aarch64__\n";
2607 OS << "typedef __attribute__((neon_polyvector_type(";
2609 OS << "typedef __attribute__((neon_vector_type(";
2611 unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2612 OS << utostr(nElts) << "))) ";
2616 OS << TypeString('s', TDTypeVec[i]);
2617 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2620 postinsert = isA64? true: false;
2625 // Emit struct typedefs.
2627 for (unsigned vi = 2; vi != 5; ++vi) {
2628 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2629 bool dummy, quad = false, poly = false;
2630 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2634 if (type == 'd' || (type == 'l' && poly)) {
2635 preinsert = isA64? false: true;
2638 postinsert = isA64? true: false;
2644 OS << "#ifdef __aarch64__\n";
2646 std::string ts = TypeString('d', TDTypeVec[i]);
2647 std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2648 OS << "typedef struct " << vs << " {\n";
2649 OS << " " << ts << " val";
2650 OS << "[" << utostr(vi) << "]";
2656 postinsert = isA64? true: false;
2661 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2663 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2665 StringMap<ClassKind> EmittedMap;
2667 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
2668 // intrinsics. (Some of the saturating multiply instructions are also
2669 // used to implement the corresponding "_lane" variants, but tablegen
2670 // sorts the records into alphabetical order so that the "_lane" variants
2671 // come after the intrinsics they use.)
2672 emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
2673 emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
2674 emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
2675 emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
2677 // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
2678 // common intrinsics appear only once in the output stream.
2679 // The check for uniquiness is done in emitIntrinsic.
2680 // Emit ARM intrinsics.
2681 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2684 // Skip AArch64 intrinsics; they will be emitted at the end.
2685 bool isA64 = R->getValueAsBit("isA64");
2689 if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
2690 R->getName() != "VABD")
2691 emitIntrinsic(OS, R, EmittedMap);
2694 // Emit AArch64-specific intrinsics.
2695 OS << "#ifdef __aarch64__\n";
2697 emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
2698 emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
2699 emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
2701 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2704 // Skip ARM intrinsics already included above.
2705 bool isA64 = R->getValueAsBit("isA64");
2709 // Skip crypto temporarily, and will emit them all together at the end.
2710 bool isCrypto = R->getValueAsBit("isCrypto");
2714 emitIntrinsic(OS, R, EmittedMap);
2717 OS << "#ifdef __ARM_FEATURE_CRYPTO\n";
2719 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2722 // Skip crypto temporarily, and will emit them all together at the end.
2723 bool isCrypto = R->getValueAsBit("isCrypto");
2727 emitIntrinsic(OS, R, EmittedMap);
2734 OS << "#undef __ai\n\n";
2735 OS << "#endif /* __ARM_NEON_H */\n";
2738 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2739 /// intrinsics specified by record R checking for intrinsic uniqueness.
2740 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2741 StringMap<ClassKind> &EmittedMap) {
2742 std::string name = R->getValueAsString("Name");
2743 std::string Proto = R->getValueAsString("Prototype");
2744 std::string Types = R->getValueAsString("Types");
2746 SmallVector<StringRef, 16> TypeVec;
2747 ParseTypes(R, Types, TypeVec);
2749 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2751 ClassKind classKind = ClassNone;
2752 if (R->getSuperClasses().size() >= 2)
2753 classKind = ClassMap[R->getSuperClasses()[1]];
2754 if (classKind == ClassNone && kind == OpNone)
2755 PrintFatalError(R->getLoc(), "Builtin has no class kind");
2757 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2758 if (kind == OpReinterpret) {
2759 bool outQuad = false;
2761 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2762 for (unsigned srcti = 0, srcte = TypeVec.size();
2763 srcti != srcte; ++srcti) {
2764 bool inQuad = false;
2765 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2766 if (srcti == ti || inQuad != outQuad)
2768 std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2770 if (EmittedMap.count(s))
2772 EmittedMap[s] = ClassS;
2777 GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2778 if (EmittedMap.count(s))
2780 EmittedMap[s] = classKind;
2787 static unsigned RangeFromType(const char mod, StringRef typestr) {
2788 // base type to get the type string for.
2789 bool quad = false, dummy = false;
2790 char type = ClassifyType(typestr, quad, dummy, dummy);
2791 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2795 return (8 << (int)quad) - 1;
2798 return (4 << (int)quad) - 1;
2801 return (2 << (int)quad) - 1;
2804 return (1 << (int)quad) - 1;
2806 PrintFatalError("unhandled type!");
2810 static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
2811 // base type to get the type string for.
2813 char type = ClassifyType(typestr, dummy, dummy, dummy);
2814 type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
2829 PrintFatalError("unhandled type!");
2833 /// Generate the ARM and AArch64 intrinsic range checking code for
2834 /// shift/lane immediates, checking for unique declarations.
2836 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2837 StringMap<ClassKind> &A64IntrinsicMap,
2838 bool isA64RangeCheck) {
2839 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2840 StringMap<OpKind> EmittedMap;
2842 // Generate the intrinsic range checking code for shift/lane immediates.
2843 if (isA64RangeCheck)
2844 OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2846 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2848 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2851 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2855 std::string name = R->getValueAsString("Name");
2856 std::string Proto = R->getValueAsString("Prototype");
2857 std::string Types = R->getValueAsString("Types");
2858 std::string Rename = name + "@" + Proto;
2860 // Functions with 'a' (the splat code) in the type prototype should not get
2861 // their own builtin as they use the non-splat variant.
2862 if (Proto.find('a') != std::string::npos)
2865 // Functions which do not have an immediate do not need to have range
2866 // checking code emitted.
2867 size_t immPos = Proto.find('i');
2868 if (immPos == std::string::npos)
2871 SmallVector<StringRef, 16> TypeVec;
2872 ParseTypes(R, Types, TypeVec);
2874 if (R->getSuperClasses().size() < 2)
2875 PrintFatalError(R->getLoc(), "Builtin has no class kind");
2877 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2878 if (!ProtoHasScalar(Proto))
2881 // Do not include AArch64 range checks if not generating code for AArch64.
2882 bool isA64 = R->getValueAsBit("isA64");
2883 if (!isA64RangeCheck && isA64)
2886 // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2887 // redefined by AArch64 to handle new types.
2888 if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2889 ClassKind &A64CK = A64IntrinsicMap[Rename];
2890 if (A64CK == ck && ck != ClassNone)
2894 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2895 std::string namestr, shiftstr, rangestr;
2897 if (R->getValueAsBit("isVCVT_N")) {
2898 // VCVT between floating- and fixed-point values takes an immediate
2899 // in the range [1, 32] for f32, or [1, 64] for f64.
2901 if (name.find("32") != std::string::npos)
2902 rangestr = "l = 1; u = 31"; // upper bound = l + u
2903 else if (name.find("64") != std::string::npos)
2904 rangestr = "l = 1; u = 63";
2906 PrintFatalError(R->getLoc(),
2907 "Fixed point convert name should contains \"32\" or \"64\"");
2909 } else if (R->getValueAsBit("isScalarShift")) {
2910 // Right shifts have an 'r' in the name, left shifts do not. Convert
2911 // instructions have the same bounds and right shifts.
2912 if (name.find('r') != std::string::npos ||
2913 name.find("cvt") != std::string::npos)
2914 rangestr = "l = 1; ";
2916 unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]);
2917 // Narrow shift has half the upper bound
2918 if (R->getValueAsBit("isScalarNarrowShift"))
2921 rangestr += "u = " + utostr(upBound);
2922 } else if (R->getValueAsBit("isShift")) {
2923 // Builtins which are overloaded by type will need to have their upper
2924 // bound computed at Sema time based on the type constant.
2925 shiftstr = ", true";
2927 // Right shifts have an 'r' in the name, left shifts do not.
2928 if (name.find('r') != std::string::npos)
2929 rangestr = "l = 1; ";
2931 rangestr += "u = RFT(TV" + shiftstr + ")";
2933 // The immediate generally refers to a lane in the preceding argument.
2934 assert(immPos > 0 && "unexpected immediate operand");
2936 "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2938 // Make sure cases appear only once by uniquing them in a string map.
2939 namestr = MangleName(name, TypeVec[ti], ck);
2940 if (EmittedMap.count(namestr))
2942 EmittedMap[namestr] = OpNone;
2944 // Calculate the index of the immediate that should be range checked.
2945 unsigned immidx = 0;
2947 // Builtins that return a struct of multiple vectors have an extra
2948 // leading arg for the struct return.
2949 if (IsMultiVecProto(Proto[0]))
2952 // Add one to the index for each argument until we reach the immediate
2953 // to be checked. Structs of vectors are passed as multiple arguments.
2954 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2955 switch (Proto[ii]) {
2976 if (isA64RangeCheck)
2977 OS << "case AArch64::BI__builtin_neon_";
2979 OS << "case ARM::BI__builtin_neon_";
2980 OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2981 << rangestr << "; break;\n";
2987 /// Generate the ARM and AArch64 overloaded type checking code for
2988 /// SemaChecking.cpp, checking for unique builtin declarations.
2990 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2991 StringMap<ClassKind> &A64IntrinsicMap,
2992 bool isA64TypeCheck) {
2993 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2994 StringMap<OpKind> EmittedMap;
2996 // Generate the overloaded type checking code for SemaChecking.cpp
2998 OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
3000 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
3002 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3004 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3008 std::string Proto = R->getValueAsString("Prototype");
3009 std::string Types = R->getValueAsString("Types");
3010 std::string name = R->getValueAsString("Name");
3011 std::string Rename = name + "@" + Proto;
3013 // Functions with 'a' (the splat code) in the type prototype should not get
3014 // their own builtin as they use the non-splat variant.
3015 if (Proto.find('a') != std::string::npos)
3018 // Functions which have a scalar argument cannot be overloaded, no need to
3019 // check them if we are emitting the type checking code.
3020 if (ProtoHasScalar(Proto))
3023 SmallVector<StringRef, 16> TypeVec;
3024 ParseTypes(R, Types, TypeVec);
3026 if (R->getSuperClasses().size() < 2)
3027 PrintFatalError(R->getLoc(), "Builtin has no class kind");
3029 // Do not include AArch64 type checks if not generating code for AArch64.
3030 bool isA64 = R->getValueAsBit("isA64");
3031 if (!isA64TypeCheck && isA64)
3034 // Include ARM type check in AArch64 but only if ARM intrinsics
3035 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
3036 // redefined in AArch64 to handle an additional 2 x f64 type.
3037 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3038 if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
3039 ClassKind &A64CK = A64IntrinsicMap[Rename];
3040 if (A64CK == ck && ck != ClassNone)
3044 int si = -1, qi = -1;
3045 uint64_t mask = 0, qmask = 0;
3046 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3047 // Generate the switch case(s) for this builtin for the type validation.
3048 bool quad = false, poly = false, usgn = false;
3049 (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
3053 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
3056 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
3060 // Check if the builtin function has a pointer or const pointer argument.
3062 bool HasConstPtr = false;
3063 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
3064 char ArgType = Proto[arg];
3065 if (ArgType == 'c') {
3067 PtrArgNum = arg - 1;
3070 if (ArgType == 'p') {
3071 PtrArgNum = arg - 1;
3075 // For sret builtins, adjust the pointer argument index.
3076 if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
3079 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
3080 // and vst1_lane intrinsics. Using a pointer to the vector element
3081 // type with one of those operations causes codegen to select an aligned
3082 // load/store instruction. If you want an unaligned operation,
3083 // the pointer argument needs to have less alignment than element type,
3084 // so just accept any pointer type.
3085 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
3087 HasConstPtr = false;
3092 OS << "case AArch64::BI__builtin_neon_";
3094 OS << "case ARM::BI__builtin_neon_";
3095 OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
3096 << "0x" << utohexstr(mask) << "ULL";
3098 OS << "; PtrArgNum = " << PtrArgNum;
3100 OS << "; HasConstPtr = true";
3105 OS << "case AArch64::BI__builtin_neon_";
3107 OS << "case ARM::BI__builtin_neon_";
3108 OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
3109 << "0x" << utohexstr(qmask) << "ULL";
3111 OS << "; PtrArgNum = " << PtrArgNum;
3113 OS << "; HasConstPtr = true";
3120 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
3121 /// declaration of builtins, checking for unique builtin declarations.
3122 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
3123 StringMap<ClassKind> &A64IntrinsicMap,
3124 bool isA64GenBuiltinDef) {
3125 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3126 StringMap<OpKind> EmittedMap;
3128 // Generate BuiltinsARM.def and BuiltinsAArch64.def
3129 if (isA64GenBuiltinDef)
3130 OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
3132 OS << "#ifdef GET_NEON_BUILTINS\n";
3134 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3136 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3140 std::string Proto = R->getValueAsString("Prototype");
3141 std::string name = R->getValueAsString("Name");
3142 std::string Rename = name + "@" + Proto;
3144 // Functions with 'a' (the splat code) in the type prototype should not get
3145 // their own builtin as they use the non-splat variant.
3146 if (Proto.find('a') != std::string::npos)
3149 std::string Types = R->getValueAsString("Types");
3150 SmallVector<StringRef, 16> TypeVec;
3151 ParseTypes(R, Types, TypeVec);
3153 if (R->getSuperClasses().size() < 2)
3154 PrintFatalError(R->getLoc(), "Builtin has no class kind");
3156 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3158 // Do not include AArch64 BUILTIN() macros if not generating
3160 bool isA64 = R->getValueAsBit("isA64");
3161 if (!isA64GenBuiltinDef && isA64)
3164 // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics
3165 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
3166 // redefined in AArch64 to handle an additional 2 x f64 type.
3167 if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
3168 ClassKind &A64CK = A64IntrinsicMap[Rename];
3169 if (A64CK == ck && ck != ClassNone)
3173 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3174 // Generate the declaration for this builtin, ensuring
3175 // that each unique BUILTIN() macro appears only once in the output
3177 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
3178 if (EmittedMap.count(bd))
3181 EmittedMap[bd] = OpNone;
3188 /// runHeader - Emit a file with sections defining:
3189 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
3190 /// 2. the SemaChecking code for the type overload checking.
3191 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
3192 void NeonEmitter::runHeader(raw_ostream &OS) {
3193 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3195 // build a map of AArch64 intriniscs to be used in uniqueness checks.
3196 StringMap<ClassKind> A64IntrinsicMap;
3197 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3200 bool isA64 = R->getValueAsBit("isA64");
3204 ClassKind CK = ClassNone;
3205 if (R->getSuperClasses().size() >= 2)
3206 CK = ClassMap[R->getSuperClasses()[1]];
3208 std::string Name = R->getValueAsString("Name");
3209 std::string Proto = R->getValueAsString("Prototype");
3210 std::string Rename = Name + "@" + Proto;
3211 if (A64IntrinsicMap.count(Rename))
3213 A64IntrinsicMap[Rename] = CK;
3216 // Generate BuiltinsARM.def for ARM
3217 genBuiltinsDef(OS, A64IntrinsicMap, false);
3219 // Generate BuiltinsAArch64.def for AArch64
3220 genBuiltinsDef(OS, A64IntrinsicMap, true);
3222 // Generate ARM overloaded type checking code for SemaChecking.cpp
3223 genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
3225 // Generate AArch64 overloaded type checking code for SemaChecking.cpp
3226 genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
3228 // Generate ARM range checking code for shift/lane immediates.
3229 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
3231 // Generate the AArch64 range checking code for shift/lane immediates.
3232 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
3235 /// GenTest - Write out a test for the intrinsic specified by the name and
3236 /// type strings, including the embedded patterns for FileCheck to match.
3237 static std::string GenTest(const std::string &name,
3238 const std::string &proto,
3239 StringRef outTypeStr, StringRef inTypeStr,
3240 bool isShift, bool isHiddenLOp,
3241 ClassKind ck, const std::string &InstName,
3243 std::string & testFuncProto) {
3244 assert(!proto.empty() && "");
3247 // Function name with type suffix
3248 std::string mangledName = MangleName(name, outTypeStr, ClassS);
3249 if (outTypeStr != inTypeStr) {
3250 // If the input type is different (e.g., for vreinterpret), append a suffix
3251 // for the input type. String off a "Q" (quad) prefix so that MangleName
3252 // does not insert another "q" in the name.
3253 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
3254 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
3255 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
3258 // todo: GenerateChecksForIntrinsic does not generate CHECK
3259 // for aarch64 instructions yet
3260 std::vector<std::string> FileCheckPatterns;
3262 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
3263 isHiddenLOp, FileCheckPatterns);
3264 s+= "// CHECK_ARM: test_" + mangledName + "\n";
3266 s += "// CHECK_AARCH64: test_" + mangledName + "\n";
3268 // Emit the FileCheck patterns.
3269 // If for any reason we do not want to emit a check, mangledInst
3270 // will be the empty string.
3271 if (FileCheckPatterns.size()) {
3272 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
3273 e = FileCheckPatterns.end();
3276 s += "// CHECK_ARM: " + *i + "\n";
3280 // Emit the start of the test function.
3282 testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
3285 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3286 // Do not create arguments for values that must be immediate constants.
3287 if (proto[i] == 'i')
3289 testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
3290 testFuncProto.push_back(arg);
3293 testFuncProto += ")";
3298 if (proto[0] != 'v')
3300 s += mangledName + "(";
3302 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3303 if (proto[i] == 'i') {
3304 // For immediate operands, test the maximum value.
3308 // The immediate generally refers to a lane in the preceding argument.
3309 s += utostr(RangeFromType(proto[i-1], inTypeStr));
3320 /// Write out all intrinsic tests for the specified target, checking
3321 /// for intrinsic test uniqueness.
3322 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
3323 bool isA64GenTest) {
3325 OS << "#ifdef __aarch64__\n";
3327 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3328 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3330 std::string name = R->getValueAsString("Name");
3331 std::string Proto = R->getValueAsString("Prototype");
3332 std::string Types = R->getValueAsString("Types");
3333 bool isShift = R->getValueAsBit("isShift");
3334 std::string InstName = R->getValueAsString("InstName");
3335 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
3336 bool isA64 = R->getValueAsBit("isA64");
3338 // do not include AArch64 intrinsic test if not generating
3340 if (!isA64GenTest && isA64)
3343 SmallVector<StringRef, 16> TypeVec;
3344 ParseTypes(R, Types, TypeVec);
3346 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3347 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
3348 if (kind == OpUnavailable)
3350 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3351 if (kind == OpReinterpret) {
3352 bool outQuad = false;
3354 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
3355 for (unsigned srcti = 0, srcte = TypeVec.size();
3356 srcti != srcte; ++srcti) {
3357 bool inQuad = false;
3358 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
3359 if (srcti == ti || inQuad != outQuad)
3361 std::string testFuncProto;
3362 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
3363 isShift, isHiddenLOp, ck, InstName, isA64,
3365 if (EmittedMap.count(testFuncProto))
3367 EmittedMap[testFuncProto] = kind;
3371 std::string testFuncProto;
3372 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
3373 isHiddenLOp, ck, InstName, isA64, testFuncProto);
3374 if (EmittedMap.count(testFuncProto))
3376 EmittedMap[testFuncProto] = kind;
3385 /// runTests - Write out a complete set of tests for all of the Neon
3387 void NeonEmitter::runTests(raw_ostream &OS) {
3388 OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
3390 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
3391 "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n"
3393 "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
3394 "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n"
3395 "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n"
3397 "// REQUIRES: long_tests\n"
3399 "#include <arm_neon.h>\n"
3402 // ARM tests must be emitted before AArch64 tests to ensure
3403 // tests for intrinsics that are common to ARM and AArch64
3404 // appear only once in the output stream.
3405 // The check for uniqueness is done in genTargetTest.
3406 StringMap<OpKind> EmittedMap;
3408 genTargetTest(OS, EmittedMap, false);
3410 genTargetTest(OS, EmittedMap, true);
3414 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
3415 NeonEmitter(Records).run(OS);
3417 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
3418 NeonEmitter(Records).runHeader(OS);
3420 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
3421 NeonEmitter(Records).runTests(OS);
3423 } // End namespace clang