1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface. See ARM document DUI0348B.
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors. Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point. A complete set of tests
22 // for Neon intrinsics can be generated by calling the runTests() entry point.
24 //===----------------------------------------------------------------------===//
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/TableGen/Error.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
98 ClassI, // generic integer instruction, e.g., "i8" suffix
99 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
100 ClassW, // width-specific instruction, e.g., "8" suffix
101 ClassB, // bitcast arguments with enum argument to specify type
102 ClassL, // Logical instructions which are op instructions
103 // but we need to not emit any suffix for in our
105 ClassNoTest // Instructions which we do not test since they are
106 // not TRUE instructions.
109 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
110 /// builtins. These must be kept in sync with the flags in
111 /// include/clang/Basic/TargetBuiltins.h.
113 class NeonTypeFlags {
133 NeonTypeFlags(unsigned F) : Flags(F) {}
134 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
136 Flags |= UnsignedFlag;
141 uint32_t getFlags() const { return Flags; }
143 } // end anonymous namespace
147 RecordKeeper &Records;
148 StringMap<OpKind> OpMap;
149 DenseMap<Record*, ClassKind> ClassMap;
152 NeonEmitter(RecordKeeper &R) : Records(R) {
153 OpMap["OP_NONE"] = OpNone;
154 OpMap["OP_UNAVAILABLE"] = OpUnavailable;
155 OpMap["OP_ADD"] = OpAdd;
156 OpMap["OP_ADDL"] = OpAddl;
157 OpMap["OP_ADDW"] = OpAddw;
158 OpMap["OP_SUB"] = OpSub;
159 OpMap["OP_SUBL"] = OpSubl;
160 OpMap["OP_SUBW"] = OpSubw;
161 OpMap["OP_MUL"] = OpMul;
162 OpMap["OP_MLA"] = OpMla;
163 OpMap["OP_MLAL"] = OpMlal;
164 OpMap["OP_MLS"] = OpMls;
165 OpMap["OP_MLSL"] = OpMlsl;
166 OpMap["OP_MUL_N"] = OpMulN;
167 OpMap["OP_MLA_N"] = OpMlaN;
168 OpMap["OP_MLS_N"] = OpMlsN;
169 OpMap["OP_MLAL_N"] = OpMlalN;
170 OpMap["OP_MLSL_N"] = OpMlslN;
171 OpMap["OP_MUL_LN"]= OpMulLane;
172 OpMap["OP_MULL_LN"] = OpMullLane;
173 OpMap["OP_MLA_LN"]= OpMlaLane;
174 OpMap["OP_MLS_LN"]= OpMlsLane;
175 OpMap["OP_MLAL_LN"] = OpMlalLane;
176 OpMap["OP_MLSL_LN"] = OpMlslLane;
177 OpMap["OP_QDMULL_LN"] = OpQDMullLane;
178 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
179 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
180 OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
181 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
182 OpMap["OP_EQ"] = OpEq;
183 OpMap["OP_GE"] = OpGe;
184 OpMap["OP_LE"] = OpLe;
185 OpMap["OP_GT"] = OpGt;
186 OpMap["OP_LT"] = OpLt;
187 OpMap["OP_NEG"] = OpNeg;
188 OpMap["OP_NOT"] = OpNot;
189 OpMap["OP_AND"] = OpAnd;
190 OpMap["OP_OR"] = OpOr;
191 OpMap["OP_XOR"] = OpXor;
192 OpMap["OP_ANDN"] = OpAndNot;
193 OpMap["OP_ORN"] = OpOrNot;
194 OpMap["OP_CAST"] = OpCast;
195 OpMap["OP_CONC"] = OpConcat;
196 OpMap["OP_HI"] = OpHi;
197 OpMap["OP_LO"] = OpLo;
198 OpMap["OP_DUP"] = OpDup;
199 OpMap["OP_DUP_LN"] = OpDupLane;
200 OpMap["OP_SEL"] = OpSelect;
201 OpMap["OP_REV16"] = OpRev16;
202 OpMap["OP_REV32"] = OpRev32;
203 OpMap["OP_REV64"] = OpRev64;
204 OpMap["OP_REINT"] = OpReinterpret;
205 OpMap["OP_ABDL"] = OpAbdl;
206 OpMap["OP_ABA"] = OpAba;
207 OpMap["OP_ABAL"] = OpAbal;
209 Record *SI = R.getClass("SInst");
210 Record *II = R.getClass("IInst");
211 Record *WI = R.getClass("WInst");
212 Record *SOpI = R.getClass("SOpInst");
213 Record *IOpI = R.getClass("IOpInst");
214 Record *WOpI = R.getClass("WOpInst");
215 Record *LOpI = R.getClass("LOpInst");
216 Record *NoTestOpI = R.getClass("NoTestOpInst");
218 ClassMap[SI] = ClassS;
219 ClassMap[II] = ClassI;
220 ClassMap[WI] = ClassW;
221 ClassMap[SOpI] = ClassS;
222 ClassMap[IOpI] = ClassI;
223 ClassMap[WOpI] = ClassW;
224 ClassMap[LOpI] = ClassL;
225 ClassMap[NoTestOpI] = ClassNoTest;
228 // run - Emit arm_neon.h.inc
229 void run(raw_ostream &o);
231 // runHeader - Emit all the __builtin prototypes used in arm_neon.h
232 void runHeader(raw_ostream &o);
234 // runTests - Emit tests for all the Neon intrinsics.
235 void runTests(raw_ostream &o);
238 void emitIntrinsic(raw_ostream &OS, Record *R);
240 } // end anonymous namespace
242 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
243 /// which each StringRef representing a single type declared in the string.
244 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
245 /// 2xfloat and 4xfloat respectively.
246 static void ParseTypes(Record *r, std::string &s,
247 SmallVectorImpl<StringRef> &TV) {
248 const char *data = s.data();
251 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
252 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
264 PrintFatalError(r->getLoc(),
265 "Unexpected letter: " + std::string(data + len, 1));
267 TV.push_back(StringRef(data, len + 1));
273 /// Widen - Convert a type code into the next wider type. char -> short,
274 /// short -> int, etc.
275 static char Widen(const char t) {
286 PrintFatalError("unhandled type in widen!");
290 /// Narrow - Convert a type code into the next smaller type. short -> char,
291 /// float -> half float, etc.
292 static char Narrow(const char t) {
303 PrintFatalError("unhandled type in narrow!");
307 /// For a particular StringRef, return the base type code, and whether it has
308 /// the quad-vector, polynomial, or unsigned modifiers set.
309 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
313 if (ty[off] == 'Q') {
319 if (ty[off] == 'P') {
324 // remember unsigned.
325 if (ty[off] == 'U') {
330 // base type to get the type string for.
334 /// ModType - Transform a type code and its modifiers based on a mod code. The
335 /// mod code definitions may be found at the top of arm_neon.td.
336 static char ModType(const char mod, char type, bool &quad, bool &poly,
337 bool &usgn, bool &scal, bool &cnst, bool &pntr) {
410 /// TypeString - for a modifier and type, generate the name of the typedef for
411 /// that type. QUc -> uint8x8_t.
412 static std::string TypeString(const char mod, StringRef typestr) {
425 // base type to get the type string for.
426 char type = ClassifyType(typestr, quad, poly, usgn);
428 // Based on the modifying character, change the type and width if necessary.
429 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
438 s += poly ? "poly8" : "int8";
441 s += quad ? "x16" : "x8";
444 s += poly ? "poly16" : "int16";
447 s += quad ? "x8" : "x4";
453 s += quad ? "x4" : "x2";
459 s += quad ? "x2" : "x1";
465 s += quad ? "x8" : "x4";
471 s += quad ? "x4" : "x2";
474 PrintFatalError("unhandled type!");
484 // Append _t, finishing the type string typedef type.
496 /// BuiltinTypeString - for a modifier and type, generate the clang
497 /// BuiltinsARM.def prototype code for the function. See the top of clang's
498 /// Builtins.def for a description of the type strings.
499 static std::string BuiltinTypeString(const char mod, StringRef typestr,
500 ClassKind ck, bool ret) {
513 // base type to get the type string for.
514 char type = ClassifyType(typestr, quad, poly, usgn);
516 // Based on the modifying character, change the type and width if necessary.
517 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
519 // All pointers are void* pointers. Change type to 'v' now.
525 // Treat half-float ('h') types as unsigned short ('s') types.
530 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
537 else if (type == 'c')
538 s.push_back('S'); // make chars explicitly signed
540 if (type == 'l') // 64-bit long
552 // Since the return value must be one type, return a vector type of the
553 // appropriate width which we will bitcast. An exception is made for
554 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
555 // fashion, storing them to a pointer arg.
557 if (mod >= '2' && mod <= '4')
558 return "vv*"; // void result with void* first argument
559 if (mod == 'f' || (ck != ClassB && type == 'f'))
560 return quad ? "V4f" : "V2f";
561 if (ck != ClassB && type == 's')
562 return quad ? "V8s" : "V4s";
563 if (ck != ClassB && type == 'i')
564 return quad ? "V4i" : "V2i";
565 if (ck != ClassB && type == 'l')
566 return quad ? "V2LLi" : "V1LLi";
568 return quad ? "V16Sc" : "V8Sc";
571 // Non-return array types are passed as individual vectors.
573 return quad ? "V16ScV16Sc" : "V8ScV8Sc";
575 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
577 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
579 if (mod == 'f' || (ck != ClassB && type == 'f'))
580 return quad ? "V4f" : "V2f";
581 if (ck != ClassB && type == 's')
582 return quad ? "V8s" : "V4s";
583 if (ck != ClassB && type == 'i')
584 return quad ? "V4i" : "V2i";
585 if (ck != ClassB && type == 'l')
586 return quad ? "V2LLi" : "V1LLi";
588 return quad ? "V16Sc" : "V8Sc";
591 /// InstructionTypeCode - Computes the ARM argument character code and
592 /// quad status for a specific type string and ClassKind.
593 static void InstructionTypeCode(const StringRef &typeStr,
596 std::string &typeCode) {
599 char type = ClassifyType(typeStr, quad, poly, usgn);
604 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
605 case ClassI: typeCode = "i8"; break;
606 case ClassW: typeCode = "8"; break;
612 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
613 case ClassI: typeCode = "i16"; break;
614 case ClassW: typeCode = "16"; break;
620 case ClassS: typeCode = usgn ? "u32" : "s32"; break;
621 case ClassI: typeCode = "i32"; break;
622 case ClassW: typeCode = "32"; break;
628 case ClassS: typeCode = usgn ? "u64" : "s64"; break;
629 case ClassI: typeCode = "i64"; break;
630 case ClassW: typeCode = "64"; break;
637 case ClassI: typeCode = "f16"; break;
638 case ClassW: typeCode = "16"; break;
645 case ClassI: typeCode = "f32"; break;
646 case ClassW: typeCode = "32"; break;
651 PrintFatalError("unhandled type!");
655 /// MangleName - Append a type or width suffix to a base neon function name,
656 /// and insert a 'q' in the appropriate location if the operation works on
657 /// 128b rather than 64b. E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
658 static std::string MangleName(const std::string &name, StringRef typestr,
660 if (name == "vcvt_f32_f16")
664 std::string typeCode = "";
666 InstructionTypeCode(typestr, ck, quad, typeCode);
668 std::string s = name;
670 if (typeCode.size() > 0) {
677 // Insert a 'q' before the first '_' character so that it ends up before
678 // _lane or _n on vector-scalar operations.
680 size_t pos = s.find('_');
681 s = s.insert(pos, "q");
687 static void PreprocessInstruction(const StringRef &Name,
688 const std::string &InstName,
691 bool &HasLanePostfix,
695 // All of our instruction name fields from arm_neon.td are of the form
696 // <instructionname>_...
697 // Thus we grab our instruction name via computation of said Prefix.
698 const size_t PrefixEnd = Name.find_first_of('_');
699 // If InstName is passed in, we use that instead of our name Prefix.
700 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
702 const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
704 HasNPostfix = Postfix.count("_n");
705 HasLanePostfix = Postfix.count("_lane");
706 HasDupPostfix = Postfix.count("_dup");
707 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
709 if (InstName.compare("vtbl") == 0 ||
710 InstName.compare("vtbx") == 0) {
711 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
712 // encoding to get its true value.
713 TBNumber = Name[Name.size()-1] - 48;
717 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
718 /// extracted, generate a FileCheck pattern for a Load Or Store
720 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
721 const std::string& OutTypeCode,
723 const bool &HasDupPostfix,
724 const bool &HasLanePostfix,
726 std::string &RegisterSuffix) {
727 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
728 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
729 // will output a series of v{ld,st}1s, so we have to handle it specially.
730 if ((Count == 3 || Count == 4) && IsQuad) {
731 RegisterSuffix += "{";
732 for (size_t i = 0; i < Count; i++) {
733 RegisterSuffix += "d{{[0-9]+}}";
735 RegisterSuffix += "[]";
737 if (HasLanePostfix) {
738 RegisterSuffix += "[{{[0-9]+}}]";
741 RegisterSuffix += ", ";
744 RegisterSuffix += "}";
747 // Handle normal loads and stores.
748 RegisterSuffix += "{";
749 for (size_t i = 0; i < Count; i++) {
750 RegisterSuffix += "d{{[0-9]+}}";
752 RegisterSuffix += "[]";
754 if (HasLanePostfix) {
755 RegisterSuffix += "[{{[0-9]+}}]";
757 if (IsQuad && !HasLanePostfix) {
758 RegisterSuffix += ", d{{[0-9]+}}";
760 RegisterSuffix += "[]";
764 RegisterSuffix += ", ";
767 RegisterSuffix += "}, [r{{[0-9]+}}";
769 // We only include the alignment hint if we have a vld1.*64 or
770 // a dup/lane instruction.
772 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
773 RegisterSuffix += ", :" + OutTypeCode;
774 } else if (OutTypeCode == "64") {
775 RegisterSuffix += ", :64";
779 RegisterSuffix += "]";
783 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
784 const bool &HasNPostfix) {
785 return (NameRef.count("vmla") ||
786 NameRef.count("vmlal") ||
787 NameRef.count("vmlsl") ||
788 NameRef.count("vmull") ||
789 NameRef.count("vqdmlal") ||
790 NameRef.count("vqdmlsl") ||
791 NameRef.count("vqdmulh") ||
792 NameRef.count("vqdmull") ||
793 NameRef.count("vqrdmulh")) && HasNPostfix;
796 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
797 const bool &HasLanePostfix) {
798 return (NameRef.count("vmla") ||
799 NameRef.count("vmls") ||
800 NameRef.count("vmlal") ||
801 NameRef.count("vmlsl") ||
802 (NameRef.count("vmul") && NameRef.size() == 3)||
803 NameRef.count("vqdmlal") ||
804 NameRef.count("vqdmlsl") ||
805 NameRef.count("vqdmulh") ||
806 NameRef.count("vqrdmulh")) && HasLanePostfix;
809 static bool IsSpecialLaneMultiply(const StringRef &NameRef,
810 const bool &HasLanePostfix,
811 const bool &IsQuad) {
812 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
814 const bool IsVMull = NameRef.count("mull") && !IsQuad;
815 return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
818 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
819 const std::string &Proto,
820 const bool &HasNPostfix,
822 const bool &HasLanePostfix,
823 const bool &HasDupPostfix,
824 std::string &NormedProto) {
825 // Handle generic case.
826 const StringRef NameRef(Name);
827 for (size_t i = 0, end = Proto.size(); i < end; i++) {
836 NormedProto += IsQuad? 'q' : 'd';
848 NormedProto += HasLanePostfix? 'a' : 'i';
851 if (HasLanePostfix) {
853 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
854 NormedProto += IsQuad? 'q' : 'd';
862 // Handle Special Cases.
863 const bool IsNotVExt = !NameRef.count("vext");
864 const bool IsVPADAL = NameRef.count("vpadal");
865 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
867 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
870 if (IsSpecialLaneMul) {
872 NormedProto[2] = NormedProto[3];
873 NormedProto.erase(3);
874 } else if (NormedProto.size() == 4 &&
875 NormedProto[0] == NormedProto[1] &&
877 // If NormedProto.size() == 4 and the first two proto characters are the
878 // same, ignore the first.
879 NormedProto = NormedProto.substr(1, 3);
880 } else if (Is5OpLaneAccum) {
881 // If we have a 5 op lane accumulator operation, we take characters 1,2,4
882 std::string tmp = NormedProto.substr(1,2);
883 tmp += NormedProto[4];
885 } else if (IsVPADAL) {
886 // If we have VPADAL, ignore the first character.
887 NormedProto = NormedProto.substr(0, 2);
888 } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
889 // If our instruction is a dup instruction, keep only the first and
891 std::string tmp = "";
892 tmp += NormedProto[0];
893 tmp += NormedProto[NormedProto.size()-1];
898 /// GenerateRegisterCheckPatterns - Given a bunch of data we have
899 /// extracted, generate a FileCheck pattern to check that an
900 /// instruction's arguments are correct.
901 static void GenerateRegisterCheckPattern(const std::string &Name,
902 const std::string &Proto,
903 const std::string &OutTypeCode,
904 const bool &HasNPostfix,
906 const bool &HasLanePostfix,
907 const bool &HasDupPostfix,
908 const size_t &TBNumber,
909 std::string &RegisterSuffix) {
913 const StringRef NameRef(Name);
914 const StringRef ProtoRef(Proto);
916 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
920 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
921 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
924 // Grab N value from v{ld,st}N using its ascii representation.
925 const size_t Count = NameRef[3] - 48;
927 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
928 HasDupPostfix, HasLanePostfix,
929 Count, RegisterSuffix);
930 } else if (IsTBXOrTBL) {
931 RegisterSuffix += "d{{[0-9]+}}, {";
932 for (size_t i = 0; i < TBNumber-1; i++) {
933 RegisterSuffix += "d{{[0-9]+}}, ";
935 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
937 // Handle a normal instruction.
938 if (NameRef.count("vget") || NameRef.count("vset"))
941 // We first normalize our proto, since we only need to emit 4
942 // different types of checks, yet have more than 4 proto types
943 // that map onto those 4 patterns.
944 std::string NormalizedProto("");
945 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
946 HasLanePostfix, HasDupPostfix,
949 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
950 const char &c = NormalizedProto[i];
953 RegisterSuffix += "q{{[0-9]+}}, ";
957 RegisterSuffix += "d{{[0-9]+}}, ";
961 RegisterSuffix += "#{{[0-9]+}}, ";
965 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
970 // Remove extra ", ".
971 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
975 /// GenerateChecksForIntrinsic - Given a specific instruction name +
976 /// typestr + class kind, generate the proper set of FileCheck
977 /// Patterns to check for. We could just return a string, but instead
978 /// use a vector since it provides us with the extra flexibility of
979 /// emitting multiple checks, which comes in handy for certain cases
980 /// like mla where we want to check for 2 different instructions.
981 static void GenerateChecksForIntrinsic(const std::string &Name,
982 const std::string &Proto,
983 StringRef &OutTypeStr,
984 StringRef &InTypeStr,
986 const std::string &InstName,
988 std::vector<std::string>& Result) {
990 // If Ck is a ClassNoTest instruction, just return so no test is
992 if(Ck == ClassNoTest)
995 if (Name == "vcvt_f32_f16") {
996 Result.push_back("vcvt.f32.f16");
1001 // Now we preprocess our instruction given the data we have to get the
1002 // data that we need.
1003 // Create a StringRef for String Manipulation of our Name.
1004 const StringRef NameRef(Name);
1005 // Instruction Prefix.
1007 // The type code for our out type string.
1008 std::string OutTypeCode;
1009 // To handle our different cases, we need to check for different postfixes.
1010 // Is our instruction a quad instruction.
1011 bool IsQuad = false;
1012 // Our instruction is of the form <instructionname>_n.
1013 bool HasNPostfix = false;
1014 // Our instruction is of the form <instructionname>_lane.
1015 bool HasLanePostfix = false;
1016 // Our instruction is of the form <instructionname>_dup.
1017 bool HasDupPostfix = false;
1018 // Our instruction is a vcvt instruction which requires special handling.
1019 bool IsSpecialVCvt = false;
1020 // If we have a vtbxN or vtblN instruction, this is set to N.
1021 size_t TBNumber = -1;
1023 std::string RegisterSuffix;
1025 PreprocessInstruction(NameRef, InstName, Prefix,
1026 HasNPostfix, HasLanePostfix, HasDupPostfix,
1027 IsSpecialVCvt, TBNumber);
1029 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1030 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1031 HasLanePostfix, HasDupPostfix, TBNumber,
1034 // In the following section, we handle a bunch of special cases. You can tell
1035 // a special case by the fact we are returning early.
1037 // If our instruction is a logical instruction without postfix or a
1038 // hidden LOp just return the current Prefix.
1039 if (Ck == ClassL || IsHiddenLOp) {
1040 Result.push_back(Prefix + " " + RegisterSuffix);
1044 // If we have a vmov, due to the many different cases, some of which
1045 // vary within the different intrinsics generated for a single
1046 // instruction type, just output a vmov. (e.g. given an instruction
1047 // A, A.u32 might be vmov and A.u8 might be vmov.8).
1049 // FIXME: Maybe something can be done about this. The two cases that we care
1050 // about are vmov as an LType and vmov as a WType.
1051 if (Prefix == "vmov") {
1052 Result.push_back(Prefix + " " + RegisterSuffix);
1056 // In the following section, we handle special cases.
1058 if (OutTypeCode == "64") {
1059 // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1060 // type, the intrinsic will be optimized away, so just return
1061 // nothing. On the other hand if we are handling an uint64x2_t
1062 // (i.e. quad instruction), vdup/vmov instructions should be
1064 if (Prefix == "vdup" || Prefix == "vext") {
1066 Result.push_back("{{vmov|vdup}}");
1071 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1072 // multiple register operands.
1073 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1074 || Prefix == "vld4";
1075 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1076 || Prefix == "vst4";
1077 if (MultiLoadPrefix || MultiStorePrefix) {
1078 Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1082 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1083 // emitting said instructions. So return a check for
1084 // vldr/vstr/vmov/str instead.
1085 if (HasLanePostfix || HasDupPostfix) {
1086 if (Prefix == "vst1") {
1087 Result.push_back("{{str|vstr|vmov}}");
1089 } else if (Prefix == "vld1") {
1090 Result.push_back("{{ldr|vldr|vmov}}");
1096 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1097 // sometimes disassembled as vtrn.32. We use a regex to handle both
1099 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1100 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1104 // Currently on most ARM processors, we do not use vmla/vmls for
1105 // quad floating point operations. Instead we output vmul + vadd. So
1106 // check if we have one of those instructions and just output a
1108 if (OutTypeCode == "f32") {
1109 if (Prefix == "vmls") {
1110 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1111 Result.push_back("vsub." + OutTypeCode);
1113 } else if (Prefix == "vmla") {
1114 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1115 Result.push_back("vadd." + OutTypeCode);
1120 // If we have vcvt, get the input type from the instruction name
1121 // (which should be of the form instname_inputtype) and append it
1122 // before the output type.
1123 if (Prefix == "vcvt") {
1124 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1125 Prefix += "." + inTypeCode;
1128 // Append output type code to get our final mangled instruction.
1129 Prefix += "." + OutTypeCode;
1131 Result.push_back(Prefix + " " + RegisterSuffix);
1134 /// UseMacro - Examine the prototype string to determine if the intrinsic
1135 /// should be defined as a preprocessor macro instead of an inline function.
1136 static bool UseMacro(const std::string &proto) {
1137 // If this builtin takes an immediate argument, we need to #define it rather
1138 // than use a standard declaration, so that SemaChecking can range check
1139 // the immediate passed by the user.
1140 if (proto.find('i') != std::string::npos)
1143 // Pointer arguments need to use macros to avoid hiding aligned attributes
1144 // from the pointer type.
1145 if (proto.find('p') != std::string::npos ||
1146 proto.find('c') != std::string::npos)
1152 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1153 /// defined as a macro should be accessed directly instead of being first
1154 /// assigned to a local temporary.
1155 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1156 // True for constant ints (i), pointers (p) and const pointers (c).
1157 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1160 // Generate the string "(argtype a, argtype b, ...)"
1161 static std::string GenArgs(const std::string &proto, StringRef typestr) {
1162 bool define = UseMacro(proto);
1168 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1170 // Some macro arguments are used directly instead of being assigned
1171 // to local temporaries; prepend an underscore prefix to make their
1172 // names consistent with the local temporaries.
1173 if (MacroArgUsedDirectly(proto, i))
1176 s += TypeString(proto[i], typestr) + " __";
1187 // Macro arguments are not type-checked like inline function arguments, so
1188 // assign them to local temporaries to get the right type checking.
1189 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
1192 bool generatedLocal = false;
1194 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1195 // Do not create a temporary for an immediate argument.
1196 // That would defeat the whole point of using a macro!
1197 if (MacroArgUsedDirectly(proto, i))
1199 generatedLocal = true;
1201 s += TypeString(proto[i], typestr) + " __";
1213 // Use the vmovl builtin to sign-extend or zero-extend a vector.
1214 static std::string Extend(StringRef typestr, const std::string &a) {
1216 s = MangleName("vmovl", typestr, ClassS);
1221 static std::string Duplicate(unsigned nElts, StringRef typestr,
1222 const std::string &a) {
1225 s = "(" + TypeString('d', typestr) + "){ ";
1226 for (unsigned i = 0; i != nElts; ++i) {
1228 if ((i + 1) < nElts)
1236 static std::string SplatLane(unsigned nElts, const std::string &vec,
1237 const std::string &lane) {
1238 std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1239 for (unsigned i = 0; i < nElts; ++i)
1245 static unsigned GetNumElements(StringRef typestr, bool &quad) {
1248 char type = ClassifyType(typestr, quad, dummy, dummy);
1251 case 'c': nElts = 8; break;
1252 case 's': nElts = 4; break;
1253 case 'i': nElts = 2; break;
1254 case 'l': nElts = 1; break;
1255 case 'h': nElts = 4; break;
1256 case 'f': nElts = 2; break;
1258 PrintFatalError("unhandled type!");
1260 if (quad) nElts <<= 1;
1264 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1265 static std::string GenOpString(OpKind op, const std::string &proto,
1266 StringRef typestr) {
1268 unsigned nElts = GetNumElements(typestr, quad);
1269 bool define = UseMacro(proto);
1271 std::string ts = TypeString(proto[0], typestr);
1282 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1285 s += "__a + " + Extend(typestr, "__b") + ";";
1291 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1294 s += "__a - " + Extend(typestr, "__b") + ";";
1297 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1300 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1306 s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1307 SplatLane(nElts, "__b", "__c") + ");";
1310 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1313 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1316 s += "__a + (__b * __c);";
1319 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1320 Duplicate(nElts, typestr, "__c") + ");";
1323 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1324 SplatLane(nElts, "__c", "__d") + ");";
1327 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1330 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1333 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1336 s += "__a - (__b * __c);";
1339 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1340 Duplicate(nElts, typestr, "__c") + ");";
1343 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1344 SplatLane(nElts, "__c", "__d") + ");";
1347 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1350 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1351 SplatLane(nElts, "__b", "__c") + ");";
1354 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1355 SplatLane(nElts, "__c", "__d") + ");";
1358 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1359 SplatLane(nElts, "__c", "__d") + ");";
1362 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1363 SplatLane(nElts, "__b", "__c") + ");";
1366 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1367 SplatLane(nElts, "__b", "__c") + ");";
1370 s += "(" + ts + ")(__a == __b);";
1373 s += "(" + ts + ")(__a >= __b);";
1376 s += "(" + ts + ")(__a <= __b);";
1379 s += "(" + ts + ")(__a > __b);";
1382 s += "(" + ts + ")(__a < __b);";
1406 s += "(" + ts + ")__a;";
1409 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1410 s += ", (int64x1_t)__b, 0, 1);";
1414 ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);";
1418 ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);";
1421 s += Duplicate(nElts, typestr, "__a") + ";";
1424 s += SplatLane(nElts, "__a", "__b") + ";";
1427 // ((0 & 1) | (~0 & 2))
1428 s += "(" + ts + ")";
1429 ts = TypeString(proto[1], typestr);
1430 s += "((__a & (" + ts + ")__b) | ";
1431 s += "(~__a & (" + ts + ")__c));";
1434 s += "__builtin_shufflevector(__a, __a";
1435 for (unsigned i = 2; i <= nElts; i += 2)
1436 for (unsigned j = 0; j != 2; ++j)
1437 s += ", " + utostr(i - j - 1);
1441 unsigned WordElts = nElts >> (1 + (int)quad);
1442 s += "__builtin_shufflevector(__a, __a";
1443 for (unsigned i = WordElts; i <= nElts; i += WordElts)
1444 for (unsigned j = 0; j != WordElts; ++j)
1445 s += ", " + utostr(i - j - 1);
1450 unsigned DblWordElts = nElts >> (int)quad;
1451 s += "__builtin_shufflevector(__a, __a";
1452 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1453 for (unsigned j = 0; j != DblWordElts; ++j)
1454 s += ", " + utostr(i - j - 1);
1459 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1460 if (typestr[0] != 'U') {
1461 // vabd results are always unsigned and must be zero-extended.
1462 std::string utype = "U" + typestr.str();
1463 s += "(" + TypeString(proto[0], typestr) + ")";
1464 abd = "(" + TypeString('d', utype) + ")" + abd;
1465 s += Extend(utype, abd) + ";";
1467 s += Extend(typestr, abd) + ";";
1472 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1476 std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
1477 if (typestr[0] != 'U') {
1478 // vabd results are always unsigned and must be zero-extended.
1479 std::string utype = "U" + typestr.str();
1480 s += "(" + TypeString(proto[0], typestr) + ")";
1481 abd = "(" + TypeString('d', utype) + ")" + abd;
1482 s += Extend(utype, abd) + ";";
1484 s += Extend(typestr, abd) + ";";
1489 PrintFatalError("unknown OpKind!");
1494 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1495 unsigned mod = proto[0];
1497 if (mod == 'v' || mod == 'f')
1507 // Base type to get the type string for.
1508 char type = ClassifyType(typestr, quad, poly, usgn);
1510 // Based on the modifying character, change the type and width if necessary.
1511 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1513 NeonTypeFlags::EltType ET;
1516 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1519 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1522 ET = NeonTypeFlags::Int32;
1525 ET = NeonTypeFlags::Int64;
1528 ET = NeonTypeFlags::Float16;
1531 ET = NeonTypeFlags::Float32;
1534 PrintFatalError("unhandled type!");
1536 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1537 return Flags.getFlags();
1540 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
1541 static std::string GenBuiltin(const std::string &name, const std::string &proto,
1542 StringRef typestr, ClassKind ck) {
1545 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1546 // sret-like argument.
1547 bool sret = (proto[0] >= '2' && proto[0] <= '4');
1549 bool define = UseMacro(proto);
1551 // Check if the prototype has a scalar operand with the type of the vector
1552 // elements. If not, bitcasting the args will take care of arg checking.
1553 // The actual signedness etc. will be taken care of with special enums.
1554 if (proto.find('s') == std::string::npos)
1557 if (proto[0] != 'v') {
1558 std::string ts = TypeString(proto[0], typestr);
1564 s += "(" + ts + ")";
1568 s += "return (" + ts + ")";
1572 bool splat = proto.find('a') != std::string::npos;
1574 s += "__builtin_neon_";
1576 // Call the non-splat builtin: chop off the "_n" suffix from the name.
1577 std::string vname(name, 0, name.size()-2);
1578 s += MangleName(vname, typestr, ck);
1580 s += MangleName(name, typestr, ck);
1584 // Pass the address of the return variable as the first argument to sret-like
1590 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1591 std::string args = std::string(&arg, 1);
1593 // Use the local temporaries instead of the macro arguments.
1596 bool argQuad = false;
1597 bool argPoly = false;
1598 bool argUsgn = false;
1599 bool argScalar = false;
1601 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1602 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1605 // Handle multiple-vector values specially, emitting each subvector as an
1606 // argument to the __builtin.
1607 if (proto[i] >= '2' && proto[i] <= '4') {
1608 // Check if an explicit cast is needed.
1609 if (argType != 'c' || argPoly || argUsgn)
1610 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1612 for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1613 s += args + ".val[" + utostr(vi) + "]";
1623 if (splat && (i + 1) == e)
1624 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
1626 // Check if an explicit cast is needed.
1627 if ((splat || !argScalar) &&
1628 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
1629 std::string argTypeStr = "c";
1631 argTypeStr = argType;
1633 argTypeStr = "Q" + argTypeStr;
1634 args = "(" + TypeString('d', argTypeStr) + ")" + args;
1642 // Extra constant integer to hold type class enum for this function, e.g. s8
1644 s += ", " + utostr(GetNeonEnum(proto, typestr));
1648 if (proto[0] != 'v' && sret) {
1657 static std::string GenBuiltinDef(const std::string &name,
1658 const std::string &proto,
1659 StringRef typestr, ClassKind ck) {
1660 std::string s("BUILTIN(__builtin_neon_");
1662 // If all types are the same size, bitcasting the args will take care
1663 // of arg checking. The actual signedness etc. will be taken care of with
1665 if (proto.find('s') == std::string::npos)
1668 s += MangleName(name, typestr, ck);
1671 for (unsigned i = 0, e = proto.size(); i != e; ++i)
1672 s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
1674 // Extra constant integer to hold type class enum for this function, e.g. s8
1682 static std::string GenIntrinsic(const std::string &name,
1683 const std::string &proto,
1684 StringRef outTypeStr, StringRef inTypeStr,
1685 OpKind kind, ClassKind classKind) {
1686 assert(!proto.empty() && "");
1687 bool define = UseMacro(proto) && kind != OpUnavailable;
1690 // static always inline + return type
1694 s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
1696 // Function name with type suffix
1697 std::string mangledName = MangleName(name, outTypeStr, ClassS);
1698 if (outTypeStr != inTypeStr) {
1699 // If the input type is different (e.g., for vreinterpret), append a suffix
1700 // for the input type. String off a "Q" (quad) prefix so that MangleName
1701 // does not insert another "q" in the name.
1702 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1703 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1704 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1708 // Function arguments
1709 s += GenArgs(proto, inTypeStr);
1713 s += " __extension__ ({ \\\n ";
1714 s += GenMacroLocals(proto, inTypeStr);
1715 } else if (kind == OpUnavailable) {
1716 s += " __attribute__((unavailable));\n";
1722 s += GenOpString(kind, proto, outTypeStr);
1724 s += GenBuiltin(name, proto, outTypeStr, classKind);
1733 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
1734 /// is comprised of type definitions and function declarations.
1735 void NeonEmitter::run(raw_ostream &OS) {
1737 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
1740 " * Permission is hereby granted, free of charge, to any person obtaining "
1742 " * of this software and associated documentation files (the \"Software\"),"
1744 " * in the Software without restriction, including without limitation the "
1746 " * to use, copy, modify, merge, publish, distribute, sublicense, "
1748 " * copies of the Software, and to permit persons to whom the Software is\n"
1749 " * furnished to do so, subject to the following conditions:\n"
1751 " * The above copyright notice and this permission notice shall be "
1753 " * all copies or substantial portions of the Software.\n"
1755 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
1757 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
1758 "MERCHANTABILITY,\n"
1759 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
1761 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
1763 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
1765 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
1767 " * THE SOFTWARE.\n"
1769 " *===--------------------------------------------------------------------"
1773 OS << "#ifndef __ARM_NEON_H\n";
1774 OS << "#define __ARM_NEON_H\n\n";
1776 OS << "#ifndef __ARM_NEON__\n";
1777 OS << "#error \"NEON support not enabled\"\n";
1780 OS << "#include <stdint.h>\n\n";
1782 // Emit NEON-specific scalar typedefs.
1783 OS << "typedef float float32_t;\n";
1784 OS << "typedef int8_t poly8_t;\n";
1785 OS << "typedef int16_t poly16_t;\n";
1786 OS << "typedef uint16_t float16_t;\n";
1788 // Emit Neon vector typedefs.
1789 std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs");
1790 SmallVector<StringRef, 24> TDTypeVec;
1791 ParseTypes(0, TypedefTypes, TDTypeVec);
1793 // Emit vector typedefs.
1794 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1795 bool dummy, quad = false, poly = false;
1796 (void) ClassifyType(TDTypeVec[i], quad, poly, dummy);
1798 OS << "typedef __attribute__((neon_polyvector_type(";
1800 OS << "typedef __attribute__((neon_vector_type(";
1802 unsigned nElts = GetNumElements(TDTypeVec[i], quad);
1803 OS << utostr(nElts) << "))) ";
1807 OS << TypeString('s', TDTypeVec[i]);
1808 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
1812 // Emit struct typedefs.
1813 for (unsigned vi = 2; vi != 5; ++vi) {
1814 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1815 std::string ts = TypeString('d', TDTypeVec[i]);
1816 std::string vs = TypeString('0' + vi, TDTypeVec[i]);
1817 OS << "typedef struct " << vs << " {\n";
1818 OS << " " << ts << " val";
1819 OS << "[" << utostr(vi) << "]";
1821 OS << vs << ";\n\n";
1825 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
1827 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1829 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
1830 // intrinsics. (Some of the saturating multiply instructions are also
1831 // used to implement the corresponding "_lane" variants, but tablegen
1832 // sorts the records into alphabetical order so that the "_lane" variants
1833 // come after the intrinsics they use.)
1834 emitIntrinsic(OS, Records.getDef("VMOVL"));
1835 emitIntrinsic(OS, Records.getDef("VMULL"));
1836 emitIntrinsic(OS, Records.getDef("VABD"));
1838 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1840 if (R->getName() != "VMOVL" &&
1841 R->getName() != "VMULL" &&
1842 R->getName() != "VABD")
1843 emitIntrinsic(OS, R);
1846 OS << "#undef __ai\n\n";
1847 OS << "#endif /* __ARM_NEON_H */\n";
1850 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
1851 /// intrinsics specified by record R.
1852 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
1853 std::string name = R->getValueAsString("Name");
1854 std::string Proto = R->getValueAsString("Prototype");
1855 std::string Types = R->getValueAsString("Types");
1857 SmallVector<StringRef, 16> TypeVec;
1858 ParseTypes(R, Types, TypeVec);
1860 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
1862 ClassKind classKind = ClassNone;
1863 if (R->getSuperClasses().size() >= 2)
1864 classKind = ClassMap[R->getSuperClasses()[1]];
1865 if (classKind == ClassNone && kind == OpNone)
1866 PrintFatalError(R->getLoc(), "Builtin has no class kind");
1868 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1869 if (kind == OpReinterpret) {
1870 bool outQuad = false;
1872 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
1873 for (unsigned srcti = 0, srcte = TypeVec.size();
1874 srcti != srcte; ++srcti) {
1875 bool inQuad = false;
1876 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
1877 if (srcti == ti || inQuad != outQuad)
1879 OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
1883 OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti],
1890 static unsigned RangeFromType(const char mod, StringRef typestr) {
1891 // base type to get the type string for.
1892 bool quad = false, dummy = false;
1893 char type = ClassifyType(typestr, quad, dummy, dummy);
1894 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
1898 return (8 << (int)quad) - 1;
1901 return (4 << (int)quad) - 1;
1904 return (2 << (int)quad) - 1;
1906 return (1 << (int)quad) - 1;
1908 PrintFatalError("unhandled type!");
1912 /// runHeader - Emit a file with sections defining:
1913 /// 1. the NEON section of BuiltinsARM.def.
1914 /// 2. the SemaChecking code for the type overload checking.
1915 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
1916 void NeonEmitter::runHeader(raw_ostream &OS) {
1917 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1919 StringMap<OpKind> EmittedMap;
1921 // Generate BuiltinsARM.def for NEON
1922 OS << "#ifdef GET_NEON_BUILTINS\n";
1923 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1925 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1929 std::string Proto = R->getValueAsString("Prototype");
1931 // Functions with 'a' (the splat code) in the type prototype should not get
1932 // their own builtin as they use the non-splat variant.
1933 if (Proto.find('a') != std::string::npos)
1936 std::string Types = R->getValueAsString("Types");
1937 SmallVector<StringRef, 16> TypeVec;
1938 ParseTypes(R, Types, TypeVec);
1940 if (R->getSuperClasses().size() < 2)
1941 PrintFatalError(R->getLoc(), "Builtin has no class kind");
1943 std::string name = R->getValueAsString("Name");
1944 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
1946 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1947 // Generate the BuiltinsARM.def declaration for this builtin, ensuring
1948 // that each unique BUILTIN() macro appears only once in the output
1950 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
1951 if (EmittedMap.count(bd))
1954 EmittedMap[bd] = OpNone;
1960 // Generate the overloaded type checking code for SemaChecking.cpp
1961 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
1962 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1964 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1968 std::string Proto = R->getValueAsString("Prototype");
1969 std::string Types = R->getValueAsString("Types");
1970 std::string name = R->getValueAsString("Name");
1972 // Functions with 'a' (the splat code) in the type prototype should not get
1973 // their own builtin as they use the non-splat variant.
1974 if (Proto.find('a') != std::string::npos)
1977 // Functions which have a scalar argument cannot be overloaded, no need to
1978 // check them if we are emitting the type checking code.
1979 if (Proto.find('s') != std::string::npos)
1982 SmallVector<StringRef, 16> TypeVec;
1983 ParseTypes(R, Types, TypeVec);
1985 if (R->getSuperClasses().size() < 2)
1986 PrintFatalError(R->getLoc(), "Builtin has no class kind");
1988 int si = -1, qi = -1;
1989 uint64_t mask = 0, qmask = 0;
1990 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1991 // Generate the switch case(s) for this builtin for the type validation.
1992 bool quad = false, poly = false, usgn = false;
1993 (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
1997 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2000 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2004 // Check if the builtin function has a pointer or const pointer argument.
2006 bool HasConstPtr = false;
2007 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2008 char ArgType = Proto[arg];
2009 if (ArgType == 'c') {
2011 PtrArgNum = arg - 1;
2014 if (ArgType == 'p') {
2015 PtrArgNum = arg - 1;
2019 // For sret builtins, adjust the pointer argument index.
2020 if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
2023 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2024 // and vst1_lane intrinsics. Using a pointer to the vector element
2025 // type with one of those operations causes codegen to select an aligned
2026 // load/store instruction. If you want an unaligned operation,
2027 // the pointer argument needs to have less alignment than element type,
2028 // so just accept any pointer type.
2029 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2031 HasConstPtr = false;
2035 OS << "case ARM::BI__builtin_neon_"
2036 << MangleName(name, TypeVec[si], ClassB)
2037 << ": mask = " << "0x" << utohexstr(mask) << "ULL";
2039 OS << "; PtrArgNum = " << PtrArgNum;
2041 OS << "; HasConstPtr = true";
2045 OS << "case ARM::BI__builtin_neon_"
2046 << MangleName(name, TypeVec[qi], ClassB)
2047 << ": mask = " << "0x" << utohexstr(qmask) << "ULL";
2049 OS << "; PtrArgNum = " << PtrArgNum;
2051 OS << "; HasConstPtr = true";
2057 // Generate the intrinsic range checking code for shift/lane immediates.
2058 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2059 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2062 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2066 std::string name = R->getValueAsString("Name");
2067 std::string Proto = R->getValueAsString("Prototype");
2068 std::string Types = R->getValueAsString("Types");
2070 // Functions with 'a' (the splat code) in the type prototype should not get
2071 // their own builtin as they use the non-splat variant.
2072 if (Proto.find('a') != std::string::npos)
2075 // Functions which do not have an immediate do not need to have range
2076 // checking code emitted.
2077 size_t immPos = Proto.find('i');
2078 if (immPos == std::string::npos)
2081 SmallVector<StringRef, 16> TypeVec;
2082 ParseTypes(R, Types, TypeVec);
2084 if (R->getSuperClasses().size() < 2)
2085 PrintFatalError(R->getLoc(), "Builtin has no class kind");
2087 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2089 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2090 std::string namestr, shiftstr, rangestr;
2092 if (R->getValueAsBit("isVCVT_N")) {
2093 // VCVT between floating- and fixed-point values takes an immediate
2094 // in the range 1 to 32.
2096 rangestr = "l = 1; u = 31"; // upper bound = l + u
2097 } else if (Proto.find('s') == std::string::npos) {
2098 // Builtins which are overloaded by type will need to have their upper
2099 // bound computed at Sema time based on the type constant.
2101 if (R->getValueAsBit("isShift")) {
2102 shiftstr = ", true";
2104 // Right shifts have an 'r' in the name, left shifts do not.
2105 if (name.find('r') != std::string::npos)
2106 rangestr = "l = 1; ";
2108 rangestr += "u = RFT(TV" + shiftstr + ")";
2110 // The immediate generally refers to a lane in the preceding argument.
2111 assert(immPos > 0 && "unexpected immediate operand");
2112 rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti]));
2114 // Make sure cases appear only once by uniquing them in a string map.
2115 namestr = MangleName(name, TypeVec[ti], ck);
2116 if (EmittedMap.count(namestr))
2118 EmittedMap[namestr] = OpNone;
2120 // Calculate the index of the immediate that should be range checked.
2121 unsigned immidx = 0;
2123 // Builtins that return a struct of multiple vectors have an extra
2124 // leading arg for the struct return.
2125 if (Proto[0] >= '2' && Proto[0] <= '4')
2128 // Add one to the index for each argument until we reach the immediate
2129 // to be checked. Structs of vectors are passed as multiple arguments.
2130 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2131 switch (Proto[ii]) {
2132 default: immidx += 1; break;
2133 case '2': immidx += 2; break;
2134 case '3': immidx += 3; break;
2135 case '4': immidx += 4; break;
2136 case 'i': ie = ii + 1; break;
2139 OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck)
2140 << ": i = " << immidx << "; " << rangestr << "; break;\n";
2146 /// GenTest - Write out a test for the intrinsic specified by the name and
2147 /// type strings, including the embedded patterns for FileCheck to match.
2148 static std::string GenTest(const std::string &name,
2149 const std::string &proto,
2150 StringRef outTypeStr, StringRef inTypeStr,
2151 bool isShift, bool isHiddenLOp,
2152 ClassKind ck, const std::string &InstName) {
2153 assert(!proto.empty() && "");
2156 // Function name with type suffix
2157 std::string mangledName = MangleName(name, outTypeStr, ClassS);
2158 if (outTypeStr != inTypeStr) {
2159 // If the input type is different (e.g., for vreinterpret), append a suffix
2160 // for the input type. String off a "Q" (quad) prefix so that MangleName
2161 // does not insert another "q" in the name.
2162 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2163 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2164 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2167 std::vector<std::string> FileCheckPatterns;
2168 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
2169 isHiddenLOp, FileCheckPatterns);
2171 // Emit the FileCheck patterns.
2172 s += "// CHECK: test_" + mangledName + "\n";
2173 // If for any reason we do not want to emit a check, mangledInst
2174 // will be the empty string.
2175 if (FileCheckPatterns.size()) {
2176 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
2177 e = FileCheckPatterns.end();
2180 s += "// CHECK: " + *i + "\n";
2184 // Emit the start of the test function.
2185 s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
2188 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2189 // Do not create arguments for values that must be immediate constants.
2190 if (proto[i] == 'i')
2192 s += comma + TypeString(proto[i], inTypeStr) + " ";
2198 if (proto[0] != 'v')
2200 s += mangledName + "(";
2202 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2203 if (proto[i] == 'i') {
2204 // For immediate operands, test the maximum value.
2208 // The immediate generally refers to a lane in the preceding argument.
2209 s += utostr(RangeFromType(proto[i-1], inTypeStr));
2220 /// runTests - Write out a complete set of tests for all of the Neon
2222 void NeonEmitter::runTests(raw_ostream &OS) {
2224 "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\\\n"
2225 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
2226 "// RUN: | FileCheck %s\n"
2228 "#include <arm_neon.h>\n"
2231 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2232 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2234 std::string name = R->getValueAsString("Name");
2235 std::string Proto = R->getValueAsString("Prototype");
2236 std::string Types = R->getValueAsString("Types");
2237 bool isShift = R->getValueAsBit("isShift");
2238 std::string InstName = R->getValueAsString("InstName");
2239 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
2241 SmallVector<StringRef, 16> TypeVec;
2242 ParseTypes(R, Types, TypeVec);
2244 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2245 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2246 if (kind == OpUnavailable)
2248 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2249 if (kind == OpReinterpret) {
2250 bool outQuad = false;
2252 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2253 for (unsigned srcti = 0, srcte = TypeVec.size();
2254 srcti != srcte; ++srcti) {
2255 bool inQuad = false;
2256 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2257 if (srcti == ti || inQuad != outQuad)
2259 OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
2260 isShift, isHiddenLOp, ck, InstName);
2263 OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti],
2264 isShift, isHiddenLOp, ck, InstName);
2272 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2273 NeonEmitter(Records).run(OS);
2275 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2276 NeonEmitter(Records).runHeader(OS);
2278 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2279 NeonEmitter(Records).runTests(OS);
2281 } // End namespace clang