contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp

   1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
  11 // a declaration and definition of each function specified by the ARM NEON
  12 // compiler interface.  See ARM document DUI0348B.
  13 //
  14 // Each NEON instruction is implemented in terms of 1 or more functions which
  15 // are suffixed with the element type of the input vectors.  Functions may be
  16 // implemented in terms of generic vector operations such as +, *, -, etc. or
  17 // by calling a __builtin_-prefixed function which will be handled by clang's
  18 // CodeGen library.
  19 //
  20 // Additional validation code can be generated by this file when runHeader() is
  21 // called, rather than the normal run() entry point.  A complete set of tests
  22 // for Neon intrinsics can be generated by calling the runTests() entry point.
  23 //
  24 //===----------------------------------------------------------------------===//
  25
  26 #include "llvm/ADT/DenseMap.h"
  27 #include "llvm/ADT/SmallString.h"
  28 #include "llvm/ADT/SmallVector.h"
  29 #include "llvm/ADT/StringExtras.h"
  30 #include "llvm/ADT/StringMap.h"
  31 #include "llvm/Support/ErrorHandling.h"
  32 #include "llvm/TableGen/Error.h"
  33 #include "llvm/TableGen/Record.h"
  34 #include "llvm/TableGen/TableGenBackend.h"
  35 #include <string>
  36 using namespace llvm;
  37
  38 enum OpKind {
  39   OpNone,
  40   OpUnavailable,
  41   OpAdd,
  42   OpAddl,
  43   OpAddlHi,
  44   OpAddw,
  45   OpAddwHi,
  46   OpSub,
  47   OpSubl,
  48   OpSublHi,
  49   OpSubw,
  50   OpSubwHi,
  51   OpMul,
  52   OpMla,
  53   OpMlal,
  54   OpMullHi,
  55   OpMullHiN,
  56   OpMlalHi,
  57   OpMlalHiN,
  58   OpMls,
  59   OpMlsl,
  60   OpMlslHi,
  61   OpMlslHiN,
  62   OpMulN,
  63   OpMlaN,
  64   OpMlsN,
  65   OpFMlaN,
  66   OpFMlsN,
  67   OpMlalN,
  68   OpMlslN,
  69   OpMulLane,
  70   OpMulXLane,
  71   OpMullLane,
  72   OpMullHiLane,
  73   OpMlaLane,
  74   OpMlsLane,
  75   OpMlalLane,
  76   OpMlalHiLane,
  77   OpMlslLane,
  78   OpMlslHiLane,
  79   OpQDMullLane,
  80   OpQDMullHiLane,
  81   OpQDMlalLane,
  82   OpQDMlalHiLane,
  83   OpQDMlslLane,
  84   OpQDMlslHiLane,
  85   OpQDMulhLane,
  86   OpQRDMulhLane,
  87   OpFMSLane,
  88   OpFMSLaneQ,
  89   OpTrn1,
  90   OpZip1,
  91   OpUzp1,
  92   OpTrn2,
  93   OpZip2,
  94   OpUzp2,
  95   OpEq,
  96   OpGe,
  97   OpLe,
  98   OpGt,
  99   OpLt,
 100   OpNeg,
 101   OpNot,
 102   OpAnd,
 103   OpOr,
 104   OpXor,
 105   OpAndNot,
 106   OpOrNot,
 107   OpCast,
 108   OpConcat,
 109   OpDup,
 110   OpDupLane,
 111   OpHi,
 112   OpLo,
 113   OpSelect,
 114   OpRev16,
 115   OpRev32,
 116   OpRev64,
 117   OpXtnHi,
 118   OpSqxtunHi,
 119   OpQxtnHi,
 120   OpFcvtnHi,
 121   OpFcvtlHi,
 122   OpFcvtxnHi,
 123   OpReinterpret,
 124   OpAddhnHi,
 125   OpRAddhnHi,
 126   OpSubhnHi,
 127   OpRSubhnHi,
 128   OpAbdl,
 129   OpAbdlHi,
 130   OpAba,
 131   OpAbal,
 132   OpAbalHi,
 133   OpQDMullHi,
 134   OpQDMullHiN,
 135   OpQDMlalHi,
 136   OpQDMlalHiN,
 137   OpQDMlslHi,
 138   OpQDMlslHiN,
 139   OpDiv,
 140   OpLongHi,
 141   OpNarrowHi,
 142   OpMovlHi,
 143   OpCopyLane,
 144   OpCopyQLane,
 145   OpCopyLaneQ,
 146   OpScalarMulLane,
 147   OpScalarMulLaneQ,
 148   OpScalarMulXLane,
 149   OpScalarMulXLaneQ,
 150   OpScalarVMulXLane,
 151   OpScalarVMulXLaneQ,
 152   OpScalarQDMullLane,
 153   OpScalarQDMullLaneQ,
 154   OpScalarQDMulHiLane,
 155   OpScalarQDMulHiLaneQ,
 156   OpScalarQRDMulHiLane,
 157   OpScalarQRDMulHiLaneQ,
 158   OpScalarGetLane,
 159   OpScalarSetLane
 160 };
 161
 162 enum ClassKind {
 163   ClassNone,
 164   ClassI,           // generic integer instruction, e.g., "i8" suffix
 165   ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
 166   ClassW,           // width-specific instruction, e.g., "8" suffix
 167   ClassB,           // bitcast arguments with enum argument to specify type
 168   ClassL,           // Logical instructions which are op instructions
 169                     // but we need to not emit any suffix for in our
 170                     // tests.
 171   ClassNoTest       // Instructions which we do not test since they are
 172                     // not TRUE instructions.
 173 };
 174
 175 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
 176 /// builtins.  These must be kept in sync with the flags in
 177 /// include/clang/Basic/TargetBuiltins.h.
 178 namespace {
 179 class NeonTypeFlags {
 180   enum {
 181     EltTypeMask = 0xf,
 182     UnsignedFlag = 0x10,
 183     QuadFlag = 0x20
 184   };
 185   uint32_t Flags;
 186
 187 public:
 188   enum EltType {
 189     Int8,
 190     Int16,
 191     Int32,
 192     Int64,
 193     Poly8,
 194     Poly16,
 195     Poly64,
 196     Float16,
 197     Float32,
 198     Float64
 199   };
 200
 201   NeonTypeFlags(unsigned F) : Flags(F) {}
 202   NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
 203     if (IsUnsigned)
 204       Flags |= UnsignedFlag;
 205     if (IsQuad)
 206       Flags |= QuadFlag;
 207   }
 208
 209   uint32_t getFlags() const { return Flags; }
 210 };
 211 } // end anonymous namespace
 212
 213 namespace {
 214 class NeonEmitter {
 215   RecordKeeper &Records;
 216   StringMap<OpKind> OpMap;
 217   DenseMap<Record*, ClassKind> ClassMap;
 218
 219 public:
 220   NeonEmitter(RecordKeeper &R) : Records(R) {
 221     OpMap["OP_NONE"]  = OpNone;
 222     OpMap["OP_UNAVAILABLE"] = OpUnavailable;
 223     OpMap["OP_ADD"]   = OpAdd;
 224     OpMap["OP_ADDL"]  = OpAddl;
 225     OpMap["OP_ADDLHi"] = OpAddlHi;
 226     OpMap["OP_ADDW"]  = OpAddw;
 227     OpMap["OP_ADDWHi"] = OpAddwHi;
 228     OpMap["OP_SUB"]   = OpSub;
 229     OpMap["OP_SUBL"]  = OpSubl;
 230     OpMap["OP_SUBLHi"] = OpSublHi;
 231     OpMap["OP_SUBW"]  = OpSubw;
 232     OpMap["OP_SUBWHi"] = OpSubwHi;
 233     OpMap["OP_MUL"]   = OpMul;
 234     OpMap["OP_MLA"]   = OpMla;
 235     OpMap["OP_MLAL"]  = OpMlal;
 236     OpMap["OP_MULLHi"]  = OpMullHi;
 237     OpMap["OP_MULLHi_N"]  = OpMullHiN;
 238     OpMap["OP_MLALHi"]  = OpMlalHi;
 239     OpMap["OP_MLALHi_N"]  = OpMlalHiN;
 240     OpMap["OP_MLS"]   = OpMls;
 241     OpMap["OP_MLSL"]  = OpMlsl;
 242     OpMap["OP_MLSLHi"] = OpMlslHi;
 243     OpMap["OP_MLSLHi_N"] = OpMlslHiN;
 244     OpMap["OP_MUL_N"] = OpMulN;
 245     OpMap["OP_MLA_N"] = OpMlaN;
 246     OpMap["OP_MLS_N"] = OpMlsN;
 247     OpMap["OP_FMLA_N"] = OpFMlaN;
 248     OpMap["OP_FMLS_N"] = OpFMlsN;
 249     OpMap["OP_MLAL_N"] = OpMlalN;
 250     OpMap["OP_MLSL_N"] = OpMlslN;
 251     OpMap["OP_MUL_LN"]= OpMulLane;
 252     OpMap["OP_MULX_LN"]= OpMulXLane;
 253     OpMap["OP_MULL_LN"] = OpMullLane;
 254     OpMap["OP_MULLHi_LN"] = OpMullHiLane;
 255     OpMap["OP_MLA_LN"]= OpMlaLane;
 256     OpMap["OP_MLS_LN"]= OpMlsLane;
 257     OpMap["OP_MLAL_LN"] = OpMlalLane;
 258     OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
 259     OpMap["OP_MLSL_LN"] = OpMlslLane;
 260     OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
 261     OpMap["OP_QDMULL_LN"] = OpQDMullLane;
 262     OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
 263     OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
 264     OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
 265     OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
 266     OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
 267     OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
 268     OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
 269     OpMap["OP_FMS_LN"] = OpFMSLane;
 270     OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
 271     OpMap["OP_TRN1"]  = OpTrn1;
 272     OpMap["OP_ZIP1"]  = OpZip1;
 273     OpMap["OP_UZP1"]  = OpUzp1;
 274     OpMap["OP_TRN2"]  = OpTrn2;
 275     OpMap["OP_ZIP2"]  = OpZip2;
 276     OpMap["OP_UZP2"]  = OpUzp2;
 277     OpMap["OP_EQ"]    = OpEq;
 278     OpMap["OP_GE"]    = OpGe;
 279     OpMap["OP_LE"]    = OpLe;
 280     OpMap["OP_GT"]    = OpGt;
 281     OpMap["OP_LT"]    = OpLt;
 282     OpMap["OP_NEG"]   = OpNeg;
 283     OpMap["OP_NOT"]   = OpNot;
 284     OpMap["OP_AND"]   = OpAnd;
 285     OpMap["OP_OR"]    = OpOr;
 286     OpMap["OP_XOR"]   = OpXor;
 287     OpMap["OP_ANDN"]  = OpAndNot;
 288     OpMap["OP_ORN"]   = OpOrNot;
 289     OpMap["OP_CAST"]  = OpCast;
 290     OpMap["OP_CONC"]  = OpConcat;
 291     OpMap["OP_HI"]    = OpHi;
 292     OpMap["OP_LO"]    = OpLo;
 293     OpMap["OP_DUP"]   = OpDup;
 294     OpMap["OP_DUP_LN"] = OpDupLane;
 295     OpMap["OP_SEL"]   = OpSelect;
 296     OpMap["OP_REV16"] = OpRev16;
 297     OpMap["OP_REV32"] = OpRev32;
 298     OpMap["OP_REV64"] = OpRev64;
 299     OpMap["OP_XTN"] = OpXtnHi;
 300     OpMap["OP_SQXTUN"] = OpSqxtunHi;
 301     OpMap["OP_QXTN"] = OpQxtnHi;
 302     OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
 303     OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
 304     OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
 305     OpMap["OP_REINT"] = OpReinterpret;
 306     OpMap["OP_ADDHNHi"] = OpAddhnHi;
 307     OpMap["OP_RADDHNHi"] = OpRAddhnHi;
 308     OpMap["OP_SUBHNHi"] = OpSubhnHi;
 309     OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
 310     OpMap["OP_ABDL"]  = OpAbdl;
 311     OpMap["OP_ABDLHi"] = OpAbdlHi;
 312     OpMap["OP_ABA"]   = OpAba;
 313     OpMap["OP_ABAL"]  = OpAbal;
 314     OpMap["OP_ABALHi"] = OpAbalHi;
 315     OpMap["OP_QDMULLHi"] = OpQDMullHi;
 316     OpMap["OP_QDMULLHi_N"] = OpQDMullHiN;
 317     OpMap["OP_QDMLALHi"] = OpQDMlalHi;
 318     OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN;
 319     OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
 320     OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN;
 321     OpMap["OP_DIV"] = OpDiv;
 322     OpMap["OP_LONG_HI"] = OpLongHi;
 323     OpMap["OP_NARROW_HI"] = OpNarrowHi;
 324     OpMap["OP_MOVL_HI"] = OpMovlHi;
 325     OpMap["OP_COPY_LN"] = OpCopyLane;
 326     OpMap["OP_COPYQ_LN"] = OpCopyQLane;
 327     OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
 328     OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
 329     OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
 330     OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
 331     OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
 332     OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
 333     OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
 334     OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
 335     OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
 336     OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
 337     OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
 338     OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
 339     OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
 340     OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane;
 341     OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane;
 342
 343     Record *SI = R.getClass("SInst");
 344     Record *II = R.getClass("IInst");
 345     Record *WI = R.getClass("WInst");
 346     Record *SOpI = R.getClass("SOpInst");
 347     Record *IOpI = R.getClass("IOpInst");
 348     Record *WOpI = R.getClass("WOpInst");
 349     Record *LOpI = R.getClass("LOpInst");
 350     Record *NoTestOpI = R.getClass("NoTestOpInst");
 351
 352     ClassMap[SI] = ClassS;
 353     ClassMap[II] = ClassI;
 354     ClassMap[WI] = ClassW;
 355     ClassMap[SOpI] = ClassS;
 356     ClassMap[IOpI] = ClassI;
 357     ClassMap[WOpI] = ClassW;
 358     ClassMap[LOpI] = ClassL;
 359     ClassMap[NoTestOpI] = ClassNoTest;
 360   }
 361
 362   // run - Emit arm_neon.h.inc
 363   void run(raw_ostream &o);
 364
 365   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
 366   void runHeader(raw_ostream &o);
 367
 368   // runTests - Emit tests for all the Neon intrinsics.
 369   void runTests(raw_ostream &o);
 370
 371 private:
 372   void emitIntrinsic(raw_ostream &OS, Record *R,
 373                      StringMap<ClassKind> &EmittedMap);
 374   void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
 375                       bool isA64GenBuiltinDef);
 376   void genOverloadTypeCheckCode(raw_ostream &OS,
 377                                 StringMap<ClassKind> &A64IntrinsicMap,
 378                                 bool isA64TypeCheck);
 379   void genIntrinsicRangeCheckCode(raw_ostream &OS,
 380                                   StringMap<ClassKind> &A64IntrinsicMap,
 381                                   bool isA64RangeCheck);
 382   void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
 383                      bool isA64TestGen);
 384 };
 385 } // end anonymous namespace
 386
 387 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
 388 /// which each StringRef representing a single type declared in the string.
 389 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
 390 /// 2xfloat and 4xfloat respectively.
 391 static void ParseTypes(Record *r, std::string &s,
 392                        SmallVectorImpl<StringRef> &TV) {
 393   const char *data = s.data();
 394   int len = 0;
 395
 396   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
 397     if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
 398                          || data[len] == 'H' || data[len] == 'S')
 399       continue;
 400
 401     switch (data[len]) {
 402       case 'c':
 403       case 's':
 404       case 'i':
 405       case 'l':
 406       case 'h':
 407       case 'f':
 408       case 'd':
 409         break;
 410       default:
 411         PrintFatalError(r->getLoc(),
 412                       "Unexpected letter: " + std::string(data + len, 1));
 413     }
 414     TV.push_back(StringRef(data, len + 1));
 415     data += len + 1;
 416     len = -1;
 417   }
 418 }
 419
 420 /// Widen - Convert a type code into the next wider type.  char -> short,
 421 /// short -> int, etc.
 422 static char Widen(const char t) {
 423   switch (t) {
 424     case 'c':
 425       return 's';
 426     case 's':
 427       return 'i';
 428     case 'i':
 429       return 'l';
 430     case 'h':
 431       return 'f';
 432     case 'f':
 433       return 'd';
 434     default:
 435       PrintFatalError("unhandled type in widen!");
 436   }
 437 }
 438
 439 /// Narrow - Convert a type code into the next smaller type.  short -> char,
 440 /// float -> half float, etc.
 441 static char Narrow(const char t) {
 442   switch (t) {
 443     case 's':
 444       return 'c';
 445     case 'i':
 446       return 's';
 447     case 'l':
 448       return 'i';
 449     case 'f':
 450       return 'h';
 451     case 'd':
 452       return 'f';
 453     default:
 454       PrintFatalError("unhandled type in narrow!");
 455   }
 456 }
 457
 458 static std::string GetNarrowTypestr(StringRef ty)
 459 {
 460   std::string s;
 461   for (size_t i = 0, end = ty.size(); i < end; i++) {
 462     switch (ty[i]) {
 463       case 's':
 464         s += 'c';
 465         break;
 466       case 'i':
 467         s += 's';
 468         break;
 469       case 'l':
 470         s += 'i';
 471         break;
 472       default:
 473         s += ty[i];
 474         break;
 475     }
 476   }
 477
 478   return s;
 479 }
 480
 481 /// For a particular StringRef, return the base type code, and whether it has
 482 /// the quad-vector, polynomial, or unsigned modifiers set.
 483 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
 484   unsigned off = 0;
 485   // ignore scalar.
 486   if (ty[off] == 'S') {
 487     ++off;
 488   }
 489   // remember quad.
 490   if (ty[off] == 'Q' || ty[off] == 'H') {
 491     quad = true;
 492     ++off;
 493   }
 494
 495   // remember poly.
 496   if (ty[off] == 'P') {
 497     poly = true;
 498     ++off;
 499   }
 500
 501   // remember unsigned.
 502   if (ty[off] == 'U') {
 503     usgn = true;
 504     ++off;
 505   }
 506
 507   // base type to get the type string for.
 508   return ty[off];
 509 }
 510
 511 /// ModType - Transform a type code and its modifiers based on a mod code. The
 512 /// mod code definitions may be found at the top of arm_neon.td.
 513 static char ModType(const char mod, char type, bool &quad, bool &poly,
 514                     bool &usgn, bool &scal, bool &cnst, bool &pntr) {
 515   switch (mod) {
 516     case 't':
 517       if (poly) {
 518         poly = false;
 519         usgn = true;
 520       }
 521       break;
 522     case 'b':
 523       scal = true;
 524     case 'u':
 525       usgn = true;
 526       poly = false;
 527       if (type == 'f')
 528         type = 'i';
 529       if (type == 'd')
 530         type = 'l';
 531       break;
 532     case '$':
 533       scal = true;
 534     case 'x':
 535       usgn = false;
 536       poly = false;
 537       if (type == 'f')
 538         type = 'i';
 539       if (type == 'd')
 540         type = 'l';
 541       break;
 542     case 'o':
 543       scal = true;
 544       type = 'd';
 545       usgn = false;
 546       break;
 547     case 'y':
 548       scal = true;
 549     case 'f':
 550       if (type == 'h')
 551         quad = true;
 552       type = 'f';
 553       usgn = false;
 554       break;
 555     case 'F':
 556       type = 'd';
 557       usgn = false;
 558       break;
 559     case 'g':
 560       quad = false;
 561       break;
 562     case 'B':
 563     case 'C':
 564     case 'D':
 565     case 'j':
 566       quad = true;
 567       break;
 568     case 'w':
 569       type = Widen(type);
 570       quad = true;
 571       break;
 572     case 'n':
 573       type = Widen(type);
 574       break;
 575     case 'i':
 576       type = 'i';
 577       scal = true;
 578       break;
 579     case 'l':
 580       type = 'l';
 581       scal = true;
 582       usgn = true;
 583       break;
 584     case 'z':
 585       type = Narrow(type);
 586       scal = true;
 587       break;
 588     case 'r':
 589       type = Widen(type);
 590       scal = true;
 591       break;
 592     case 's':
 593     case 'a':
 594       scal = true;
 595       break;
 596     case 'k':
 597       quad = true;
 598       break;
 599     case 'c':
 600       cnst = true;
 601     case 'p':
 602       pntr = true;
 603       scal = true;
 604       break;
 605     case 'h':
 606       type = Narrow(type);
 607       if (type == 'h')
 608         quad = false;
 609       break;
 610     case 'q':
 611       type = Narrow(type);
 612       quad = true;
 613       break;
 614     case 'e':
 615       type = Narrow(type);
 616       usgn = true;
 617       break;
 618     case 'm':
 619       type = Narrow(type);
 620       quad = false;
 621       break;
 622     default:
 623       break;
 624   }
 625   return type;
 626 }
 627
 628 static bool IsMultiVecProto(const char p) {
 629   return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
 630 }
 631
 632 /// TypeString - for a modifier and type, generate the name of the typedef for
 633 /// that type.  QUc -> uint8x8_t.
 634 static std::string TypeString(const char mod, StringRef typestr) {
 635   bool quad = false;
 636   bool poly = false;
 637   bool usgn = false;
 638   bool scal = false;
 639   bool cnst = false;
 640   bool pntr = false;
 641
 642   if (mod == 'v')
 643     return "void";
 644   if (mod == 'i')
 645     return "int";
 646
 647   // base type to get the type string for.
 648   char type = ClassifyType(typestr, quad, poly, usgn);
 649
 650   // Based on the modifying character, change the type and width if necessary.
 651   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
 652
 653   SmallString<128> s;
 654
 655   if (usgn)
 656     s.push_back('u');
 657
 658   switch (type) {
 659     case 'c':
 660       s += poly ? "poly8" : "int8";
 661       if (scal)
 662         break;
 663       s += quad ? "x16" : "x8";
 664       break;
 665     case 's':
 666       s += poly ? "poly16" : "int16";
 667       if (scal)
 668         break;
 669       s += quad ? "x8" : "x4";
 670       break;
 671     case 'i':
 672       s += "int32";
 673       if (scal)
 674         break;
 675       s += quad ? "x4" : "x2";
 676       break;
 677     case 'l':
 678       s += (poly && !usgn)? "poly64" : "int64";
 679       if (scal)
 680         break;
 681       s += quad ? "x2" : "x1";
 682       break;
 683     case 'h':
 684       s += "float16";
 685       if (scal)
 686         break;
 687       s += quad ? "x8" : "x4";
 688       break;
 689     case 'f':
 690       s += "float32";
 691       if (scal)
 692         break;
 693       s += quad ? "x4" : "x2";
 694       break;
 695     case 'd':
 696       s += "float64";
 697       if (scal)
 698         break;
 699       s += quad ? "x2" : "x1";
 700       break;
 701
 702     default:
 703       PrintFatalError("unhandled type!");
 704   }
 705
 706   if (mod == '2' || mod == 'B')
 707     s += "x2";
 708   if (mod == '3' || mod == 'C')
 709     s += "x3";
 710   if (mod == '4' || mod == 'D')
 711     s += "x4";
 712
 713   // Append _t, finishing the type string typedef type.
 714   s += "_t";
 715
 716   if (cnst)
 717     s += " const";
 718
 719   if (pntr)
 720     s += " *";
 721
 722   return s.str();
 723 }
 724
 725 /// BuiltinTypeString - for a modifier and type, generate the clang
 726 /// BuiltinsARM.def prototype code for the function.  See the top of clang's
 727 /// Builtins.def for a description of the type strings.
 728 static std::string BuiltinTypeString(const char mod, StringRef typestr,
 729                                      ClassKind ck, bool ret) {
 730   bool quad = false;
 731   bool poly = false;
 732   bool usgn = false;
 733   bool scal = false;
 734   bool cnst = false;
 735   bool pntr = false;
 736
 737   if (mod == 'v')
 738     return "v"; // void
 739   if (mod == 'i')
 740     return "i"; // int
 741
 742   // base type to get the type string for.
 743   char type = ClassifyType(typestr, quad, poly, usgn);
 744
 745   // Based on the modifying character, change the type and width if necessary.
 746   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
 747
 748   // All pointers are void* pointers.  Change type to 'v' now.
 749   if (pntr) {
 750     usgn = false;
 751     poly = false;
 752     type = 'v';
 753   }
 754   // Treat half-float ('h') types as unsigned short ('s') types.
 755   if (type == 'h') {
 756     type = 's';
 757     usgn = true;
 758   }
 759   usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
 760                          scal && type != 'f' && type != 'd');
 761
 762   if (scal) {
 763     SmallString<128> s;
 764
 765     if (usgn)
 766       s.push_back('U');
 767     else if (type == 'c')
 768       s.push_back('S'); // make chars explicitly signed
 769
 770     if (type == 'l') // 64-bit long
 771       s += "LLi";
 772     else
 773       s.push_back(type);
 774
 775     if (cnst)
 776       s.push_back('C');
 777     if (pntr)
 778       s.push_back('*');
 779     return s.str();
 780   }
 781
 782   // Since the return value must be one type, return a vector type of the
 783   // appropriate width which we will bitcast.  An exception is made for
 784   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
 785   // fashion, storing them to a pointer arg.
 786   if (ret) {
 787     if (IsMultiVecProto(mod))
 788       return "vv*"; // void result with void* first argument
 789     if (mod == 'f' || (ck != ClassB && type == 'f'))
 790       return quad ? "V4f" : "V2f";
 791     if (mod == 'F' || (ck != ClassB && type == 'd'))
 792       return quad ? "V2d" : "V1d";
 793     if (ck != ClassB && type == 's')
 794       return quad ? "V8s" : "V4s";
 795     if (ck != ClassB && type == 'i')
 796       return quad ? "V4i" : "V2i";
 797     if (ck != ClassB && type == 'l')
 798       return quad ? "V2LLi" : "V1LLi";
 799
 800     return quad ? "V16Sc" : "V8Sc";
 801   }
 802
 803   // Non-return array types are passed as individual vectors.
 804   if (mod == '2' || mod == 'B')
 805     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
 806   if (mod == '3' || mod == 'C')
 807     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
 808   if (mod == '4' || mod == 'D')
 809     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
 810
 811   if (mod == 'f' || (ck != ClassB && type == 'f'))
 812     return quad ? "V4f" : "V2f";
 813   if (mod == 'F' || (ck != ClassB && type == 'd'))
 814     return quad ? "V2d" : "V1d";
 815   if (ck != ClassB && type == 's')
 816     return quad ? "V8s" : "V4s";
 817   if (ck != ClassB && type == 'i')
 818     return quad ? "V4i" : "V2i";
 819   if (ck != ClassB && type == 'l')
 820     return quad ? "V2LLi" : "V1LLi";
 821
 822   return quad ? "V16Sc" : "V8Sc";
 823 }
 824
 825 /// InstructionTypeCode - Computes the ARM argument character code and
 826 /// quad status for a specific type string and ClassKind.
 827 static void InstructionTypeCode(const StringRef &typeStr,
 828                                 const ClassKind ck,
 829                                 bool &quad,
 830                                 std::string &typeCode) {
 831   bool poly = false;
 832   bool usgn = false;
 833   char type = ClassifyType(typeStr, quad, poly, usgn);
 834
 835   switch (type) {
 836   case 'c':
 837     switch (ck) {
 838     case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
 839     case ClassI: typeCode = "i8"; break;
 840     case ClassW: typeCode = "8"; break;
 841     default: break;
 842     }
 843     break;
 844   case 's':
 845     switch (ck) {
 846     case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
 847     case ClassI: typeCode = "i16"; break;
 848     case ClassW: typeCode = "16"; break;
 849     default: break;
 850     }
 851     break;
 852   case 'i':
 853     switch (ck) {
 854     case ClassS: typeCode = usgn ? "u32" : "s32"; break;
 855     case ClassI: typeCode = "i32"; break;
 856     case ClassW: typeCode = "32"; break;
 857     default: break;
 858     }
 859     break;
 860   case 'l':
 861     switch (ck) {
 862     case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
 863     case ClassI: typeCode = "i64"; break;
 864     case ClassW: typeCode = "64"; break;
 865     default: break;
 866     }
 867     break;
 868   case 'h':
 869     switch (ck) {
 870     case ClassS:
 871     case ClassI: typeCode = "f16"; break;
 872     case ClassW: typeCode = "16"; break;
 873     default: break;
 874     }
 875     break;
 876   case 'f':
 877     switch (ck) {
 878     case ClassS:
 879     case ClassI: typeCode = "f32"; break;
 880     case ClassW: typeCode = "32"; break;
 881     default: break;
 882     }
 883     break;
 884   case 'd':
 885     switch (ck) {
 886     case ClassS:
 887     case ClassI:
 888       typeCode += "f64";
 889       break;
 890     case ClassW:
 891       PrintFatalError("unhandled type!");
 892     default:
 893       break;
 894     }
 895     break;
 896   default:
 897     PrintFatalError("unhandled type!");
 898   }
 899 }
 900
 901 static char Insert_BHSD_Suffix(StringRef typestr){
 902   unsigned off = 0;
 903   if(typestr[off++] == 'S'){
 904     while(typestr[off] == 'Q' || typestr[off] == 'H'||
 905           typestr[off] == 'P' || typestr[off] == 'U')
 906       ++off;
 907     switch (typestr[off]){
 908     default  : break;
 909     case 'c' : return 'b';
 910     case 's' : return 'h';
 911     case 'i' :
 912     case 'f' : return 's';
 913     case 'l' :
 914     case 'd' : return 'd';
 915     }
 916   }
 917   return 0;
 918 }
 919
 920 static bool endsWith_xN(std::string const &name) {
 921   if (name.length() > 3) {
 922     if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
 923         name.compare(name.length() - 3, 3, "_x3") == 0 ||
 924         name.compare(name.length() - 3, 3, "_x4") == 0)
 925       return true;
 926   }
 927   return false;
 928 }
 929
 930 /// MangleName - Append a type or width suffix to a base neon function name,
 931 /// and insert a 'q' in the appropriate location if type string starts with 'Q'.
 932 /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
 933 /// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
 934 static std::string MangleName(const std::string &name, StringRef typestr,
 935                               ClassKind ck) {
 936   if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" ||
 937       name == "vcvt_f64_f32")
 938     return name;
 939
 940   bool quad = false;
 941   std::string typeCode = "";
 942
 943   InstructionTypeCode(typestr, ck, quad, typeCode);
 944
 945   std::string s = name;
 946
 947   if (typeCode.size() > 0) {
 948     // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
 949     if (endsWith_xN(s))
 950       s.insert(s.length() - 3, "_" + typeCode);
 951     else
 952       s += "_" + typeCode;
 953   }
 954
 955   if (ck == ClassB)
 956     s += "_v";
 957
 958   // Insert a 'q' before the first '_' character so that it ends up before
 959   // _lane or _n on vector-scalar operations.
 960   if (typestr.find("Q") != StringRef::npos) {
 961       size_t pos = s.find('_');
 962       s = s.insert(pos, "q");
 963   }
 964   char ins = Insert_BHSD_Suffix(typestr);
 965   if(ins){
 966     size_t pos = s.find('_');
 967     s = s.insert(pos, &ins, 1);
 968   }
 969
 970   return s;
 971 }
 972
 973 static void PreprocessInstruction(const StringRef &Name,
 974                                   const std::string &InstName,
 975                                   std::string &Prefix,
 976                                   bool &HasNPostfix,
 977                                   bool &HasLanePostfix,
 978                                   bool &HasDupPostfix,
 979                                   bool &IsSpecialVCvt,
 980                                   size_t &TBNumber) {
 981   // All of our instruction name fields from arm_neon.td are of the form
 982   //   <instructionname>_...
 983   // Thus we grab our instruction name via computation of said Prefix.
 984   const size_t PrefixEnd = Name.find_first_of('_');
 985   // If InstName is passed in, we use that instead of our name Prefix.
 986   Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
 987
 988   const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
 989
 990   HasNPostfix = Postfix.count("_n");
 991   HasLanePostfix = Postfix.count("_lane");
 992   HasDupPostfix = Postfix.count("_dup");
 993   IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
 994
 995   if (InstName.compare("vtbl") == 0 ||
 996       InstName.compare("vtbx") == 0) {
 997     // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
 998     // encoding to get its true value.
 999     TBNumber = Name[Name.size()-1] - 48;
1000   }
1001 }
1002
1003 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
1004 /// extracted, generate a FileCheck pattern for a Load Or Store
1005 static void
1006 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
1007                                           const std::string& OutTypeCode,
1008                                           const bool &IsQuad,
1009                                           const bool &HasDupPostfix,
1010                                           const bool &HasLanePostfix,
1011                                           const size_t Count,
1012                                           std::string &RegisterSuffix) {
1013   const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
1014   // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
1015   // will output a series of v{ld,st}1s, so we have to handle it specially.
1016   if ((Count == 3 || Count == 4) && IsQuad) {
1017     RegisterSuffix += "{";
1018     for (size_t i = 0; i < Count; i++) {
1019       RegisterSuffix += "d{{[0-9]+}}";
1020       if (HasDupPostfix) {
1021         RegisterSuffix += "[]";
1022       }
1023       if (HasLanePostfix) {
1024         RegisterSuffix += "[{{[0-9]+}}]";
1025       }
1026       if (i < Count-1) {
1027         RegisterSuffix += ", ";
1028       }
1029     }
1030     RegisterSuffix += "}";
1031   } else {
1032
1033     // Handle normal loads and stores.
1034     RegisterSuffix += "{";
1035     for (size_t i = 0; i < Count; i++) {
1036       RegisterSuffix += "d{{[0-9]+}}";
1037       if (HasDupPostfix) {
1038         RegisterSuffix += "[]";
1039       }
1040       if (HasLanePostfix) {
1041         RegisterSuffix += "[{{[0-9]+}}]";
1042       }
1043       if (IsQuad && !HasLanePostfix) {
1044         RegisterSuffix += ", d{{[0-9]+}}";
1045         if (HasDupPostfix) {
1046           RegisterSuffix += "[]";
1047         }
1048       }
1049       if (i < Count-1) {
1050         RegisterSuffix += ", ";
1051       }
1052     }
1053     RegisterSuffix += "}, [r{{[0-9]+}}";
1054
1055     // We only include the alignment hint if we have a vld1.*64 or
1056     // a dup/lane instruction.
1057     if (IsLDSTOne) {
1058       if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
1059         RegisterSuffix += ":" + OutTypeCode;
1060       }
1061     }
1062
1063     RegisterSuffix += "]";
1064   }
1065 }
1066
1067 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
1068                                      const bool &HasNPostfix) {
1069   return (NameRef.count("vmla") ||
1070           NameRef.count("vmlal") ||
1071           NameRef.count("vmlsl") ||
1072           NameRef.count("vmull") ||
1073           NameRef.count("vqdmlal") ||
1074           NameRef.count("vqdmlsl") ||
1075           NameRef.count("vqdmulh") ||
1076           NameRef.count("vqdmull") ||
1077           NameRef.count("vqrdmulh")) && HasNPostfix;
1078 }
1079
1080 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
1081                                          const bool &HasLanePostfix) {
1082   return (NameRef.count("vmla") ||
1083           NameRef.count("vmls") ||
1084           NameRef.count("vmlal") ||
1085           NameRef.count("vmlsl") ||
1086           (NameRef.count("vmul") && NameRef.size() == 3)||
1087           NameRef.count("vqdmlal") ||
1088           NameRef.count("vqdmlsl") ||
1089           NameRef.count("vqdmulh") ||
1090           NameRef.count("vqrdmulh")) && HasLanePostfix;
1091 }
1092
1093 static bool IsSpecialLaneMultiply(const StringRef &NameRef,
1094                                   const bool &HasLanePostfix,
1095                                   const bool &IsQuad) {
1096   const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
1097                                && IsQuad;
1098   const bool IsVMull = NameRef.count("mull") && !IsQuad;
1099   return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
1100 }
1101
1102 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
1103                                                      const std::string &Proto,
1104                                                      const bool &HasNPostfix,
1105                                                      const bool &IsQuad,
1106                                                      const bool &HasLanePostfix,
1107                                                      const bool &HasDupPostfix,
1108                                                      std::string &NormedProto) {
1109   // Handle generic case.
1110   const StringRef NameRef(Name);
1111   for (size_t i = 0, end = Proto.size(); i < end; i++) {
1112     switch (Proto[i]) {
1113     case 'u':
1114     case 'f':
1115     case 'F':
1116     case 'd':
1117     case 's':
1118     case 'x':
1119     case 't':
1120     case 'n':
1121       NormedProto += IsQuad? 'q' : 'd';
1122       break;
1123     case 'w':
1124     case 'k':
1125       NormedProto += 'q';
1126       break;
1127     case 'g':
1128     case 'j':
1129     case 'h':
1130     case 'e':
1131       NormedProto += 'd';
1132       break;
1133     case 'i':
1134       NormedProto += HasLanePostfix? 'a' : 'i';
1135       break;
1136     case 'a':
1137       if (HasLanePostfix) {
1138         NormedProto += 'a';
1139       } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
1140         NormedProto += IsQuad? 'q' : 'd';
1141       } else {
1142         NormedProto += 'i';
1143       }
1144       break;
1145     }
1146   }
1147
1148   // Handle Special Cases.
1149   const bool IsNotVExt = !NameRef.count("vext");
1150   const bool IsVPADAL = NameRef.count("vpadal");
1151   const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1152                                                            HasLanePostfix);
1153   const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1154                                                       IsQuad);
1155
1156   if (IsSpecialLaneMul) {
1157     // If
1158     NormedProto[2] = NormedProto[3];
1159     NormedProto.erase(3);
1160   } else if (NormedProto.size() == 4 &&
1161              NormedProto[0] == NormedProto[1] &&
1162              IsNotVExt) {
1163     // If NormedProto.size() == 4 and the first two proto characters are the
1164     // same, ignore the first.
1165     NormedProto = NormedProto.substr(1, 3);
1166   } else if (Is5OpLaneAccum) {
1167     // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1168     std::string tmp = NormedProto.substr(1,2);
1169     tmp += NormedProto[4];
1170     NormedProto = tmp;
1171   } else if (IsVPADAL) {
1172     // If we have VPADAL, ignore the first character.
1173     NormedProto = NormedProto.substr(0, 2);
1174   } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1175     // If our instruction is a dup instruction, keep only the first and
1176     // last characters.
1177     std::string tmp = "";
1178     tmp += NormedProto[0];
1179     tmp += NormedProto[NormedProto.size()-1];
1180     NormedProto = tmp;
1181   }
1182 }
1183
1184 /// GenerateRegisterCheckPatterns - Given a bunch of data we have
1185 /// extracted, generate a FileCheck pattern to check that an
1186 /// instruction's arguments are correct.
1187 static void GenerateRegisterCheckPattern(const std::string &Name,
1188                                          const std::string &Proto,
1189                                          const std::string &OutTypeCode,
1190                                          const bool &HasNPostfix,
1191                                          const bool &IsQuad,
1192                                          const bool &HasLanePostfix,
1193                                          const bool &HasDupPostfix,
1194                                          const size_t &TBNumber,
1195                                          std::string &RegisterSuffix) {
1196
1197   RegisterSuffix = "";
1198
1199   const StringRef NameRef(Name);
1200   const StringRef ProtoRef(Proto);
1201
1202   if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1203     return;
1204   }
1205
1206   const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1207   const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1208
1209   if (IsLoadStore) {
1210     // Grab N value from  v{ld,st}N using its ascii representation.
1211     const size_t Count = NameRef[3] - 48;
1212
1213     GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1214                                               HasDupPostfix, HasLanePostfix,
1215                                               Count, RegisterSuffix);
1216   } else if (IsTBXOrTBL) {
1217     RegisterSuffix += "d{{[0-9]+}}, {";
1218     for (size_t i = 0; i < TBNumber-1; i++) {
1219       RegisterSuffix += "d{{[0-9]+}}, ";
1220     }
1221     RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1222   } else {
1223     // Handle a normal instruction.
1224     if (NameRef.count("vget") || NameRef.count("vset"))
1225       return;
1226
1227     // We first normalize our proto, since we only need to emit 4
1228     // different types of checks, yet have more than 4 proto types
1229     // that map onto those 4 patterns.
1230     std::string NormalizedProto("");
1231     NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1232                                              HasLanePostfix, HasDupPostfix,
1233                                              NormalizedProto);
1234
1235     for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1236       const char &c = NormalizedProto[i];
1237       switch (c) {
1238       case 'q':
1239         RegisterSuffix += "q{{[0-9]+}}, ";
1240         break;
1241
1242       case 'd':
1243         RegisterSuffix += "d{{[0-9]+}}, ";
1244         break;
1245
1246       case 'i':
1247         RegisterSuffix += "#{{[0-9]+}}, ";
1248         break;
1249
1250       case 'a':
1251         RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1252         break;
1253       }
1254     }
1255
1256     // Remove extra ", ".
1257     RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1258   }
1259 }
1260
1261 /// GenerateChecksForIntrinsic - Given a specific instruction name +
1262 /// typestr + class kind, generate the proper set of FileCheck
1263 /// Patterns to check for. We could just return a string, but instead
1264 /// use a vector since it provides us with the extra flexibility of
1265 /// emitting multiple checks, which comes in handy for certain cases
1266 /// like mla where we want to check for 2 different instructions.
1267 static void GenerateChecksForIntrinsic(const std::string &Name,
1268                                        const std::string &Proto,
1269                                        StringRef &OutTypeStr,
1270                                        StringRef &InTypeStr,
1271                                        ClassKind Ck,
1272                                        const std::string &InstName,
1273                                        bool IsHiddenLOp,
1274                                        std::vector<std::string>& Result) {
1275
1276   // If Ck is a ClassNoTest instruction, just return so no test is
1277   // emitted.
1278   if(Ck == ClassNoTest)
1279     return;
1280
1281   if (Name == "vcvt_f32_f16") {
1282     Result.push_back("vcvt.f32.f16");
1283     return;
1284   }
1285
1286
1287   // Now we preprocess our instruction given the data we have to get the
1288   // data that we need.
1289   // Create a StringRef for String Manipulation of our Name.
1290   const StringRef NameRef(Name);
1291   // Instruction Prefix.
1292   std::string Prefix;
1293   // The type code for our out type string.
1294   std::string OutTypeCode;
1295   // To handle our different cases, we need to check for different postfixes.
1296   // Is our instruction a quad instruction.
1297   bool IsQuad = false;
1298   // Our instruction is of the form <instructionname>_n.
1299   bool HasNPostfix = false;
1300   // Our instruction is of the form <instructionname>_lane.
1301   bool HasLanePostfix = false;
1302   // Our instruction is of the form <instructionname>_dup.
1303   bool HasDupPostfix  = false;
1304   // Our instruction is a vcvt instruction which requires special handling.
1305   bool IsSpecialVCvt = false;
1306   // If we have a vtbxN or vtblN instruction, this is set to N.
1307   size_t TBNumber = -1;
1308   // Register Suffix
1309   std::string RegisterSuffix;
1310
1311   PreprocessInstruction(NameRef, InstName, Prefix,
1312                         HasNPostfix, HasLanePostfix, HasDupPostfix,
1313                         IsSpecialVCvt, TBNumber);
1314
1315   InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1316   GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1317                                HasLanePostfix, HasDupPostfix, TBNumber,
1318                                RegisterSuffix);
1319
1320   // In the following section, we handle a bunch of special cases. You can tell
1321   // a special case by the fact we are returning early.
1322
1323   // If our instruction is a logical instruction without postfix or a
1324   // hidden LOp just return the current Prefix.
1325   if (Ck == ClassL || IsHiddenLOp) {
1326     Result.push_back(Prefix + " " + RegisterSuffix);
1327     return;
1328   }
1329
1330   // If we have a vmov, due to the many different cases, some of which
1331   // vary within the different intrinsics generated for a single
1332   // instruction type, just output a vmov. (e.g. given an instruction
1333   // A, A.u32 might be vmov and A.u8 might be vmov.8).
1334   //
1335   // FIXME: Maybe something can be done about this. The two cases that we care
1336   // about are vmov as an LType and vmov as a WType.
1337   if (Prefix == "vmov") {
1338     Result.push_back(Prefix + " " + RegisterSuffix);
1339     return;
1340   }
1341
1342   // In the following section, we handle special cases.
1343
1344   if (OutTypeCode == "64") {
1345     // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1346     // type, the intrinsic will be optimized away, so just return
1347     // nothing.  On the other hand if we are handling an uint64x2_t
1348     // (i.e. quad instruction), vdup/vmov instructions should be
1349     // emitted.
1350     if (Prefix == "vdup" || Prefix == "vext") {
1351       if (IsQuad) {
1352         Result.push_back("{{vmov|vdup}}");
1353       }
1354       return;
1355     }
1356
1357     // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1358     // multiple register operands.
1359     bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1360                             || Prefix == "vld4";
1361     bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1362                             || Prefix == "vst4";
1363     if (MultiLoadPrefix || MultiStorePrefix) {
1364       Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1365       return;
1366     }
1367
1368     // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1369     // emitting said instructions. So return a check for
1370     // vldr/vstr/vmov/str instead.
1371     if (HasLanePostfix || HasDupPostfix) {
1372       if (Prefix == "vst1") {
1373         Result.push_back("{{str|vstr|vmov}}");
1374         return;
1375       } else if (Prefix == "vld1") {
1376         Result.push_back("{{ldr|vldr|vmov}}");
1377         return;
1378       }
1379     }
1380   }
1381
1382   // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1383   // sometimes disassembled as vtrn.32. We use a regex to handle both
1384   // cases.
1385   if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1386     Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1387     return;
1388   }
1389
1390   // Currently on most ARM processors, we do not use vmla/vmls for
1391   // quad floating point operations. Instead we output vmul + vadd. So
1392   // check if we have one of those instructions and just output a
1393   // check for vmul.
1394   if (OutTypeCode == "f32") {
1395     if (Prefix == "vmls") {
1396       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1397       Result.push_back("vsub." + OutTypeCode);
1398       return;
1399     } else if (Prefix == "vmla") {
1400       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1401       Result.push_back("vadd." + OutTypeCode);
1402       return;
1403     }
1404   }
1405
1406   // If we have vcvt, get the input type from the instruction name
1407   // (which should be of the form instname_inputtype) and append it
1408   // before the output type.
1409   if (Prefix == "vcvt") {
1410     const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1411     Prefix += "." + inTypeCode;
1412   }
1413
1414   // Append output type code to get our final mangled instruction.
1415   Prefix += "." + OutTypeCode;
1416
1417   Result.push_back(Prefix + " " + RegisterSuffix);
1418 }
1419
1420 /// UseMacro - Examine the prototype string to determine if the intrinsic
1421 /// should be defined as a preprocessor macro instead of an inline function.
1422 static bool UseMacro(const std::string &proto) {
1423   // If this builtin takes an immediate argument, we need to #define it rather
1424   // than use a standard declaration, so that SemaChecking can range check
1425   // the immediate passed by the user.
1426   if (proto.find('i') != std::string::npos)
1427     return true;
1428
1429   // Pointer arguments need to use macros to avoid hiding aligned attributes
1430   // from the pointer type.
1431   if (proto.find('p') != std::string::npos ||
1432       proto.find('c') != std::string::npos)
1433     return true;
1434
1435   return false;
1436 }
1437
1438 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1439 /// defined as a macro should be accessed directly instead of being first
1440 /// assigned to a local temporary.
1441 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1442   // True for constant ints (i), pointers (p) and const pointers (c).
1443   return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1444 }
1445
1446 // Generate the string "(argtype a, argtype b, ...)"
1447 static std::string GenArgs(const std::string &proto, StringRef typestr,
1448                            const std::string &name) {
1449   bool define = UseMacro(proto);
1450   char arg = 'a';
1451
1452   std::string s;
1453   s += "(";
1454
1455   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1456     if (define) {
1457       // Some macro arguments are used directly instead of being assigned
1458       // to local temporaries; prepend an underscore prefix to make their
1459       // names consistent with the local temporaries.
1460       if (MacroArgUsedDirectly(proto, i))
1461         s += "__";
1462     } else {
1463       s += TypeString(proto[i], typestr) + " __";
1464     }
1465     s.push_back(arg);
1466     //To avoid argument being multiple defined, add extra number for renaming.
1467     if (name == "vcopy_lane" || name == "vcopy_laneq")
1468       s.push_back('1');
1469     if ((i + 1) < e)
1470       s += ", ";
1471   }
1472
1473   s += ")";
1474   return s;
1475 }
1476
1477 // Macro arguments are not type-checked like inline function arguments, so
1478 // assign them to local temporaries to get the right type checking.
1479 static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
1480                                   const std::string &name ) {
1481   char arg = 'a';
1482   std::string s;
1483   bool generatedLocal = false;
1484
1485   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1486     // Do not create a temporary for an immediate argument.
1487     // That would defeat the whole point of using a macro!
1488     if (MacroArgUsedDirectly(proto, i))
1489       continue;
1490     generatedLocal = true;
1491     bool extranumber = false;
1492     if (name == "vcopy_lane" || name == "vcopy_laneq")
1493       extranumber = true;
1494
1495     s += TypeString(proto[i], typestr) + " __";
1496     s.push_back(arg);
1497     if(extranumber)
1498       s.push_back('1');
1499     s += " = (";
1500     s.push_back(arg);
1501     if(extranumber)
1502       s.push_back('1');
1503     s += "); ";
1504   }
1505
1506   if (generatedLocal)
1507     s += "\\\n  ";
1508   return s;
1509 }
1510
1511 // Use the vmovl builtin to sign-extend or zero-extend a vector.
1512 static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1513   std::string s, high;
1514   high = h ? "_high" : "";
1515   s = MangleName("vmovl" + high, typestr, ClassS);
1516   s += "(" + a + ")";
1517   return s;
1518 }
1519
1520 // Get the high 64-bit part of a vector
1521 static std::string GetHigh(const std::string &a, StringRef typestr) {
1522   std::string s;
1523   s = MangleName("vget_high", typestr, ClassS);
1524   s += "(" + a + ")";
1525   return s;
1526 }
1527
1528 // Gen operation with two operands and get high 64-bit for both of two operands.
1529 static std::string Gen2OpWith2High(StringRef typestr,
1530                                    const std::string &op,
1531                                    const std::string &a,
1532                                    const std::string &b) {
1533   std::string s;
1534   std::string Op1 = GetHigh(a, typestr);
1535   std::string Op2 = GetHigh(b, typestr);
1536   s = MangleName(op, typestr, ClassS);
1537   s += "(" + Op1 + ", " + Op2 + ");";
1538   return s;
1539 }
1540
1541 // Gen operation with three operands and get high 64-bit of the latter
1542 // two operands.
1543 static std::string Gen3OpWith2High(StringRef typestr,
1544                                    const std::string &op,
1545                                    const std::string &a,
1546                                    const std::string &b,
1547                                    const std::string &c) {
1548   std::string s;
1549   std::string Op1 = GetHigh(b, typestr);
1550   std::string Op2 = GetHigh(c, typestr);
1551   s = MangleName(op, typestr, ClassS);
1552   s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1553   return s;
1554 }
1555
1556 // Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1557 static std::string GenCombine(std::string typestr,
1558                               const std::string &a,
1559                               const std::string &b) {
1560   std::string s;
1561   s = MangleName("vcombine", typestr, ClassS);
1562   s += "(" + a + ", " + b + ")";
1563   return s;
1564 }
1565
1566 static std::string Duplicate(unsigned nElts, StringRef typestr,
1567                              const std::string &a) {
1568   std::string s;
1569
1570   s = "(" + TypeString('d', typestr) + "){ ";
1571   for (unsigned i = 0; i != nElts; ++i) {
1572     s += a;
1573     if ((i + 1) < nElts)
1574       s += ", ";
1575   }
1576   s += " }";
1577
1578   return s;
1579 }
1580
1581 static std::string SplatLane(unsigned nElts, const std::string &vec,
1582                              const std::string &lane) {
1583   std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1584   for (unsigned i = 0; i < nElts; ++i)
1585     s += ", " + lane;
1586   s += ")";
1587   return s;
1588 }
1589
1590 static std::string RemoveHigh(const std::string &name) {
1591   std::string s = name;
1592   std::size_t found = s.find("_high_");
1593   if (found == std::string::npos)
1594     PrintFatalError("name should contain \"_high_\" for high intrinsics");
1595   s.replace(found, 5, "");
1596   return s;
1597 }
1598
1599 static unsigned GetNumElements(StringRef typestr, bool &quad) {
1600   quad = false;
1601   bool dummy = false;
1602   char type = ClassifyType(typestr, quad, dummy, dummy);
1603   unsigned nElts = 0;
1604   switch (type) {
1605   case 'c': nElts = 8; break;
1606   case 's': nElts = 4; break;
1607   case 'i': nElts = 2; break;
1608   case 'l': nElts = 1; break;
1609   case 'h': nElts = 4; break;
1610   case 'f': nElts = 2; break;
1611   case 'd':
1612     nElts = 1;
1613     break;
1614   default:
1615     PrintFatalError("unhandled type!");
1616   }
1617   if (quad) nElts <<= 1;
1618   return nElts;
1619 }
1620
1621 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1622 static std::string GenOpString(const std::string &name, OpKind op,
1623                                const std::string &proto, StringRef typestr) {
1624   bool quad;
1625   unsigned nElts = GetNumElements(typestr, quad);
1626   bool define = UseMacro(proto);
1627
1628   std::string ts = TypeString(proto[0], typestr);
1629   std::string s;
1630   if (!define) {
1631     s = "return ";
1632   }
1633
1634   switch(op) {
1635   case OpAdd:
1636     s += "__a + __b;";
1637     break;
1638   case OpAddl:
1639     s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1640     break;
1641   case OpAddlHi:
1642     s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1643     break;
1644   case OpAddw:
1645     s += "__a + " + Extend(typestr, "__b") + ";";
1646     break;
1647   case OpAddwHi:
1648     s += "__a + " + Extend(typestr, "__b", 1) + ";";
1649     break;
1650   case OpSub:
1651     s += "__a - __b;";
1652     break;
1653   case OpSubl:
1654     s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1655     break;
1656   case OpSublHi:
1657     s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1658     break;
1659   case OpSubw:
1660     s += "__a - " + Extend(typestr, "__b") + ";";
1661     break;
1662   case OpSubwHi:
1663     s += "__a - " + Extend(typestr, "__b", 1) + ";";
1664     break;
1665   case OpMulN:
1666     s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1667     break;
1668   case OpMulLane:
1669     s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1670     break;
1671   case OpMulXLane:
1672     s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
1673       SplatLane(nElts, "__b", "__c") + ");";
1674     break;
1675   case OpMul:
1676     s += "__a * __b;";
1677     break;
1678   case OpFMlaN:
1679     s += MangleName("vfma", typestr, ClassS);
1680     s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
1681     break;
1682   case OpFMlsN:
1683     s += MangleName("vfms", typestr, ClassS);
1684     s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
1685     break;
1686   case OpMullLane:
1687     s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1688       SplatLane(nElts, "__b", "__c") + ");";
1689     break;
1690   case OpMullHiLane:
1691     s += MangleName("vmull", typestr, ClassS) + "(" +
1692       GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1693     break;
1694   case OpMlaN:
1695     s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1696     break;
1697   case OpMlaLane:
1698     s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1699     break;
1700   case OpMla:
1701     s += "__a + (__b * __c);";
1702     break;
1703   case OpMlalN:
1704     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1705       Duplicate(nElts, typestr, "__c") + ");";
1706     break;
1707   case OpMlalLane:
1708     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1709       SplatLane(nElts, "__c", "__d") + ");";
1710     break;
1711   case OpMlalHiLane:
1712     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
1713       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1714     break;
1715   case OpMlal:
1716     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1717     break;
1718   case OpMullHi:
1719     s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1720     break;
1721   case OpMullHiN:
1722     s += MangleName("vmull_n", typestr, ClassS);
1723     s += "(" + GetHigh("__a", typestr) + ", __b);";
1724     return s;
1725   case OpMlalHi:
1726     s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1727     break;
1728   case OpMlalHiN:
1729     s += MangleName("vmlal_n", typestr, ClassS);
1730     s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
1731     return s;
1732   case OpMlsN:
1733     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1734     break;
1735   case OpMlsLane:
1736     s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1737     break;
1738   case OpFMSLane:
1739     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1740     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1741     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1742     s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1743     break;
1744   case OpFMSLaneQ:
1745     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1746     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1747     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1748     s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1749     break;
1750   case OpMls:
1751     s += "__a - (__b * __c);";
1752     break;
1753   case OpMlslN:
1754     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1755       Duplicate(nElts, typestr, "__c") + ");";
1756     break;
1757   case OpMlslLane:
1758     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1759       SplatLane(nElts, "__c", "__d") + ");";
1760     break;
1761   case OpMlslHiLane:
1762     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
1763       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1764     break;
1765   case OpMlsl:
1766     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1767     break;
1768   case OpMlslHi:
1769     s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1770     break;
1771   case OpMlslHiN:
1772     s += MangleName("vmlsl_n", typestr, ClassS);
1773     s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
1774     break;
1775   case OpQDMullLane:
1776     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1777       SplatLane(nElts, "__b", "__c") + ");";
1778     break;
1779   case OpQDMullHiLane:
1780     s += MangleName("vqdmull", typestr, ClassS) + "(" +
1781       GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1782     break;
1783   case OpQDMlalLane:
1784     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1785       SplatLane(nElts, "__c", "__d") + ");";
1786     break;
1787   case OpQDMlalHiLane:
1788     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
1789       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1790     break;
1791   case OpQDMlslLane:
1792     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1793       SplatLane(nElts, "__c", "__d") + ");";
1794     break;
1795   case OpQDMlslHiLane:
1796     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
1797       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1798     break;
1799   case OpQDMulhLane:
1800     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1801       SplatLane(nElts, "__b", "__c") + ");";
1802     break;
1803   case OpQRDMulhLane:
1804     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1805       SplatLane(nElts, "__b", "__c") + ");";
1806     break;
1807   case OpEq:
1808     s += "(" + ts + ")(__a == __b);";
1809     break;
1810   case OpGe:
1811     s += "(" + ts + ")(__a >= __b);";
1812     break;
1813   case OpLe:
1814     s += "(" + ts + ")(__a <= __b);";
1815     break;
1816   case OpGt:
1817     s += "(" + ts + ")(__a > __b);";
1818     break;
1819   case OpLt:
1820     s += "(" + ts + ")(__a < __b);";
1821     break;
1822   case OpNeg:
1823     s += " -__a;";
1824     break;
1825   case OpNot:
1826     s += " ~__a;";
1827     break;
1828   case OpAnd:
1829     s += "__a & __b;";
1830     break;
1831   case OpOr:
1832     s += "__a | __b;";
1833     break;
1834   case OpXor:
1835     s += "__a ^ __b;";
1836     break;
1837   case OpAndNot:
1838     s += "__a & ~__b;";
1839     break;
1840   case OpOrNot:
1841     s += "__a | ~__b;";
1842     break;
1843   case OpCast:
1844     s += "(" + ts + ")__a;";
1845     break;
1846   case OpConcat:
1847     s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1848     s += ", (int64x1_t)__b, 0, 1);";
1849     break;
1850   case OpHi:
1851     // nElts is for the result vector, so the source is twice that number.
1852     s += "__builtin_shufflevector(__a, __a";
1853     for (unsigned i = nElts; i < nElts * 2; ++i)
1854       s += ", " + utostr(i);
1855     s+= ");";
1856     break;
1857   case OpLo:
1858     s += "__builtin_shufflevector(__a, __a";
1859     for (unsigned i = 0; i < nElts; ++i)
1860       s += ", " + utostr(i);
1861     s+= ");";
1862     break;
1863   case OpDup:
1864     s += Duplicate(nElts, typestr, "__a") + ";";
1865     break;
1866   case OpDupLane:
1867     s += SplatLane(nElts, "__a", "__b") + ";";
1868     break;
1869   case OpSelect:
1870     // ((0 & 1) | (~0 & 2))
1871     s += "(" + ts + ")";
1872     ts = TypeString(proto[1], typestr);
1873     s += "((__a & (" + ts + ")__b) | ";
1874     s += "(~__a & (" + ts + ")__c));";
1875     break;
1876   case OpRev16:
1877     s += "__builtin_shufflevector(__a, __a";
1878     for (unsigned i = 2; i <= nElts; i += 2)
1879       for (unsigned j = 0; j != 2; ++j)
1880         s += ", " + utostr(i - j - 1);
1881     s += ");";
1882     break;
1883   case OpRev32: {
1884     unsigned WordElts = nElts >> (1 + (int)quad);
1885     s += "__builtin_shufflevector(__a, __a";
1886     for (unsigned i = WordElts; i <= nElts; i += WordElts)
1887       for (unsigned j = 0; j != WordElts; ++j)
1888         s += ", " + utostr(i - j - 1);
1889     s += ");";
1890     break;
1891   }
1892   case OpRev64: {
1893     unsigned DblWordElts = nElts >> (int)quad;
1894     s += "__builtin_shufflevector(__a, __a";
1895     for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1896       for (unsigned j = 0; j != DblWordElts; ++j)
1897         s += ", " + utostr(i - j - 1);
1898     s += ");";
1899     break;
1900   }
1901   case OpXtnHi: {
1902     s = TypeString(proto[1], typestr) + " __a1 = " +
1903         MangleName("vmovn", typestr, ClassS) + "(__b);\n  " +
1904         "return __builtin_shufflevector(__a, __a1";
1905     for (unsigned i = 0; i < nElts * 4; ++i)
1906       s += ", " + utostr(i);
1907     s += ");";
1908     break;
1909   }
1910   case OpSqxtunHi: {
1911     s = TypeString(proto[1], typestr) + " __a1 = " +
1912         MangleName("vqmovun", typestr, ClassS) + "(__b);\n  " +
1913         "return __builtin_shufflevector(__a, __a1";
1914     for (unsigned i = 0; i < nElts * 4; ++i)
1915       s += ", " + utostr(i);
1916     s += ");";
1917     break;
1918   }
1919   case OpQxtnHi: {
1920     s = TypeString(proto[1], typestr) + " __a1 = " +
1921         MangleName("vqmovn", typestr, ClassS) + "(__b);\n  " +
1922         "return __builtin_shufflevector(__a, __a1";
1923     for (unsigned i = 0; i < nElts * 4; ++i)
1924       s += ", " + utostr(i);
1925     s += ");";
1926     break;
1927   }
1928   case OpFcvtnHi: {
1929     std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
1930     s = TypeString(proto[1], typestr) + " __a1 = " +
1931         MangleName(FName, typestr, ClassS) + "(__b);\n  " +
1932         "return __builtin_shufflevector(__a, __a1";
1933     for (unsigned i = 0; i < nElts * 4; ++i)
1934       s += ", " + utostr(i);
1935     s += ");";
1936     break;
1937   }
1938   case OpFcvtlHi: {
1939     std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
1940     s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
1941         ";\n  return " + MangleName(FName, typestr, ClassS) + "(__a1);";
1942     break;
1943   }
1944   case OpFcvtxnHi: {
1945     s = TypeString(proto[1], typestr) + " __a1 = " +
1946         MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n  " +
1947         "return __builtin_shufflevector(__a, __a1";
1948     for (unsigned i = 0; i < nElts * 4; ++i)
1949       s += ", " + utostr(i);
1950     s += ");";
1951     break;
1952   }
1953   case OpUzp1:
1954     s += "__builtin_shufflevector(__a, __b";
1955     for (unsigned i = 0; i < nElts; i++)
1956       s += ", " + utostr(2*i);
1957     s += ");";
1958     break;
1959   case OpUzp2:
1960     s += "__builtin_shufflevector(__a, __b";
1961     for (unsigned i = 0; i < nElts; i++)
1962       s += ", " + utostr(2*i+1);
1963     s += ");";
1964     break;
1965   case OpZip1:
1966     s += "__builtin_shufflevector(__a, __b";
1967     for (unsigned i = 0; i < (nElts/2); i++)
1968        s += ", " + utostr(i) + ", " + utostr(i+nElts);
1969     s += ");";
1970     break;
1971   case OpZip2:
1972     s += "__builtin_shufflevector(__a, __b";
1973     for (unsigned i = nElts/2; i < nElts; i++)
1974        s += ", " + utostr(i) + ", " + utostr(i+nElts);
1975     s += ");";
1976     break;
1977   case OpTrn1:
1978     s += "__builtin_shufflevector(__a, __b";
1979     for (unsigned i = 0; i < (nElts/2); i++)
1980        s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
1981     s += ");";
1982     break;
1983   case OpTrn2:
1984     s += "__builtin_shufflevector(__a, __b";
1985     for (unsigned i = 0; i < (nElts/2); i++)
1986        s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
1987     s += ");";
1988     break;
1989   case OpAbdl: {
1990     std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1991     if (typestr[0] != 'U') {
1992       // vabd results are always unsigned and must be zero-extended.
1993       std::string utype = "U" + typestr.str();
1994       s += "(" + TypeString(proto[0], typestr) + ")";
1995       abd = "(" + TypeString('d', utype) + ")" + abd;
1996       s += Extend(utype, abd) + ";";
1997     } else {
1998       s += Extend(typestr, abd) + ";";
1999     }
2000     break;
2001   }
2002   case OpAbdlHi:
2003     s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
2004     break;
2005   case OpAddhnHi: {
2006     std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
2007     s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
2008     s += ";";
2009     break;
2010   }
2011   case OpRAddhnHi: {
2012     std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
2013     s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
2014     s += ";";
2015     break;
2016   }
2017   case OpSubhnHi: {
2018     std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
2019     s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
2020     s += ";";
2021     break;
2022   }
2023   case OpRSubhnHi: {
2024     std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
2025     s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
2026     s += ";";
2027     break;
2028   }
2029   case OpAba:
2030     s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
2031     break;
2032   case OpAbal:
2033     s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
2034     break;
2035   case OpAbalHi:
2036     s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
2037     break;
2038   case OpQDMullHi:
2039     s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
2040     break;
2041   case OpQDMullHiN:
2042     s += MangleName("vqdmull_n", typestr, ClassS);
2043     s += "(" + GetHigh("__a", typestr) + ", __b);";
2044     return s;
2045   case OpQDMlalHi:
2046     s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
2047     break;
2048   case OpQDMlalHiN:
2049     s += MangleName("vqdmlal_n", typestr, ClassS);
2050     s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
2051     return s;
2052   case OpQDMlslHi:
2053     s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
2054     break;
2055   case OpQDMlslHiN:
2056     s += MangleName("vqdmlsl_n", typestr, ClassS);
2057     s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
2058     return s;
2059   case OpDiv:
2060     s += "__a / __b;";
2061     break;
2062   case OpMovlHi: {
2063     s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2064         MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
2065     s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
2066     s += "(__a1, 0);";
2067     break;
2068   }
2069   case OpLongHi: {
2070     // Another local variable __a1 is needed for calling a Macro,
2071     // or using __a will have naming conflict when Macro expanding.
2072     s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2073          MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
2074     s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
2075          "(__a1, __b);";
2076     break;
2077   }
2078   case OpNarrowHi: {
2079     s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
2080          MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
2081     break;
2082   }
2083   case OpCopyLane: {
2084     s += TypeString('s', typestr) + " __c2 = " +
2085          MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n  " +
2086          MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
2087     break;
2088   }
2089   case OpCopyQLane: {
2090     std::string typeCode = "";
2091     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2092     s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
2093          "(__c1, __d1); \\\n  vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
2094     break;
2095   }
2096   case OpCopyLaneQ: {
2097     std::string typeCode = "";
2098     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2099     s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
2100          "(__c1, __d1); \\\n  vset_lane_" + typeCode + "(__c2, __a1, __b1);";
2101     break;
2102   }
2103   case OpScalarMulLane: {
2104     std::string typeCode = "";
2105     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2106     s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2107       "(__b, __c);\\\n  __a * __d1;";
2108     break;
2109   }
2110   case OpScalarMulLaneQ: {
2111     std::string typeCode = "";
2112     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2113         s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
2114           "(__b, __c);\\\n  __a * __d1;";
2115     break;
2116   }
2117   case OpScalarMulXLane: {
2118     bool dummy = false;
2119     char type = ClassifyType(typestr, dummy, dummy, dummy);
2120     if (type == 'f') type = 's';
2121     std::string typeCode = "";
2122     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2123     s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2124       "(__b, __c);\\\n  vmulx" + type + "_" +
2125       typeCode +  "(__a, __d1);";
2126     break;
2127   }
2128   case OpScalarMulXLaneQ: {
2129     bool dummy = false;
2130     char type = ClassifyType(typestr, dummy, dummy, dummy);
2131     if (type == 'f') type = 's';
2132     std::string typeCode = "";
2133     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2134     s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
2135       typeCode + "(__b, __c);\\\n  vmulx" + type +
2136       "_" + typeCode +  "(__a, __d1);";
2137     break;
2138   }
2139
2140   case OpScalarVMulXLane: {
2141     bool dummy = false;
2142     char type = ClassifyType(typestr, dummy, dummy, dummy);
2143     if (type == 'f') type = 's';
2144     std::string typeCode = "";
2145     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2146     s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2147       typeCode + "(__a, 0);\\\n" +
2148       "  " + TypeString('s', typestr) + " __e1 = vget_lane_" +
2149       typeCode + "(__b, __c);\\\n" +
2150       "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2151       typeCode + "(__d1, __e1);\\\n" +
2152       "  " + TypeString('d', typestr) + " __g1;\\\n" +
2153       "  vset_lane_" + typeCode + "(__f1, __g1, __c);";
2154     break;
2155   }
2156
2157   case OpScalarVMulXLaneQ: {
2158     bool dummy = false;
2159     char type = ClassifyType(typestr, dummy, dummy, dummy);
2160     if (type == 'f') type = 's';
2161     std::string typeCode = "";
2162     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2163     s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2164       typeCode + "(__a, 0);\\\n" +
2165       "  " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
2166       typeCode + "(__b, __c);\\\n" +
2167       "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2168       typeCode + "(__d1, __e1);\\\n" +
2169       "  " + TypeString('d', typestr) + " __g1;\\\n" +
2170       "  vset_lane_" + typeCode + "(__f1, __g1, 0);";
2171     break;
2172   }
2173   case OpScalarQDMullLane: {
2174     std::string typeCode = "";
2175     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2176     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
2177     "vget_lane_" + typeCode + "(b, __c));";
2178     break;
2179   }
2180   case OpScalarQDMullLaneQ: {
2181     std::string typeCode = "";
2182     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2183     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
2184     "vgetq_lane_" + typeCode + "(b, __c));";
2185     break;
2186   }
2187   case OpScalarQDMulHiLane: {
2188     std::string typeCode = "";
2189     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2190     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
2191     "vget_lane_" + typeCode + "(__b, __c));";
2192     break;
2193   }
2194   case OpScalarQDMulHiLaneQ: {
2195     std::string typeCode = "";
2196     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2197     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
2198     "vgetq_lane_" + typeCode + "(__b, __c));";
2199     break;
2200   }
2201   case OpScalarQRDMulHiLane: {
2202     std::string typeCode = "";
2203     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2204     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
2205     "vget_lane_" + typeCode + "(__b, __c));";
2206     break;
2207   }
2208   case OpScalarQRDMulHiLaneQ: {
2209     std::string typeCode = "";
2210     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2211     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
2212     "vgetq_lane_" + typeCode + "(__b, __c));";
2213     break;
2214   }
2215   case OpScalarGetLane:{
2216     std::string typeCode = "";
2217     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2218     if (quad) {
2219      s += "int16x8_t __a1 = vreinterpretq_s16_f16(__a);\\\n";
2220      s += "  vgetq_lane_s16(__a1, __b);";
2221     } else {
2222      s += "int16x4_t __a1 = vreinterpret_s16_f16(__a);\\\n";
2223      s += "  vget_lane_s16(__a1, __b);";
2224     }
2225     break;
2226   }
2227   case OpScalarSetLane:{
2228     std::string typeCode = "";
2229     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2230     s += "int16_t __a1 = (int16_t)__a;\\\n";
2231     if (quad) {
2232      s += "  int16x8_t __b1 = vreinterpretq_s16_f16(b);\\\n";
2233      s += "  int16x8_t __b2 = vsetq_lane_s16(__a1, __b1, __c);\\\n";
2234      s += "  vreinterpretq_f16_s16(__b2);";
2235     } else {
2236      s += "  int16x4_t __b1 = vreinterpret_s16_f16(b);\\\n";
2237      s += "  int16x4_t __b2 = vset_lane_s16(__a1, __b1, __c);\\\n";
2238      s += "  vreinterpret_f16_s16(__b2);";
2239     }
2240     break;
2241   }
2242
2243   default:
2244     PrintFatalError("unknown OpKind!");
2245   }
2246   return s;
2247 }
2248
2249 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
2250   unsigned mod = proto[0];
2251
2252   if (mod == 'v' || mod == 'f' || mod == 'F')
2253     mod = proto[1];
2254
2255   bool quad = false;
2256   bool poly = false;
2257   bool usgn = false;
2258   bool scal = false;
2259   bool cnst = false;
2260   bool pntr = false;
2261
2262   // Base type to get the type string for.
2263   char type = ClassifyType(typestr, quad, poly, usgn);
2264
2265   // Based on the modifying character, change the type and width if necessary.
2266   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
2267
2268   NeonTypeFlags::EltType ET;
2269   switch (type) {
2270     case 'c':
2271       ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
2272       break;
2273     case 's':
2274       ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
2275       break;
2276     case 'i':
2277       ET = NeonTypeFlags::Int32;
2278       break;
2279     case 'l':
2280       ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
2281       break;
2282     case 'h':
2283       ET = NeonTypeFlags::Float16;
2284       break;
2285     case 'f':
2286       ET = NeonTypeFlags::Float32;
2287       break;
2288     case 'd':
2289       ET = NeonTypeFlags::Float64;
2290       break;
2291     default:
2292       PrintFatalError("unhandled type!");
2293   }
2294   NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
2295   return Flags.getFlags();
2296 }
2297
2298 // We don't check 'a' in this function, because for builtin function the
2299 // argument matching to 'a' uses a vector type splatted from a scalar type.
2300 static bool ProtoHasScalar(const std::string proto)
2301 {
2302   return (proto.find('s') != std::string::npos
2303           || proto.find('z') != std::string::npos
2304           || proto.find('r') != std::string::npos
2305           || proto.find('b') != std::string::npos
2306           || proto.find('$') != std::string::npos
2307           || proto.find('y') != std::string::npos
2308           || proto.find('o') != std::string::npos);
2309 }
2310
2311 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
2312 static std::string GenBuiltin(const std::string &name, const std::string &proto,
2313                               StringRef typestr, ClassKind ck) {
2314   std::string s;
2315
2316   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
2317   // sret-like argument.
2318   bool sret = IsMultiVecProto(proto[0]);
2319
2320   bool define = UseMacro(proto);
2321
2322   // Check if the prototype has a scalar operand with the type of the vector
2323   // elements.  If not, bitcasting the args will take care of arg checking.
2324   // The actual signedness etc. will be taken care of with special enums.
2325   if (!ProtoHasScalar(proto))
2326     ck = ClassB;
2327
2328   if (proto[0] != 'v') {
2329     std::string ts = TypeString(proto[0], typestr);
2330
2331     if (define) {
2332       if (sret)
2333         s += ts + " r; ";
2334       else
2335         s += "(" + ts + ")";
2336     } else if (sret) {
2337       s += ts + " r; ";
2338     } else {
2339       s += "return (" + ts + ")";
2340     }
2341   }
2342
2343   bool splat = proto.find('a') != std::string::npos;
2344
2345   s += "__builtin_neon_";
2346   if (splat) {
2347     // Call the non-splat builtin: chop off the "_n" suffix from the name.
2348     std::string vname(name, 0, name.size()-2);
2349     s += MangleName(vname, typestr, ck);
2350   } else {
2351     s += MangleName(name, typestr, ck);
2352   }
2353   s += "(";
2354
2355   // Pass the address of the return variable as the first argument to sret-like
2356   // builtins.
2357   if (sret)
2358     s += "&r, ";
2359
2360   char arg = 'a';
2361   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2362     std::string args = std::string(&arg, 1);
2363
2364     // Use the local temporaries instead of the macro arguments.
2365     args = "__" + args;
2366
2367     bool argQuad = false;
2368     bool argPoly = false;
2369     bool argUsgn = false;
2370     bool argScalar = false;
2371     bool dummy = false;
2372     char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
2373     argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
2374                       dummy, dummy);
2375
2376     // Handle multiple-vector values specially, emitting each subvector as an
2377     // argument to the __builtin.
2378     unsigned NumOfVec = 0;
2379     if (proto[i] >= '2' && proto[i] <= '4') {
2380       NumOfVec = proto[i] - '0';
2381     } else if (proto[i] >= 'B' && proto[i] <= 'D') {
2382       NumOfVec = proto[i] - 'A' + 1;
2383     }
2384
2385     if (NumOfVec > 0) {
2386       // Check if an explicit cast is needed.
2387       if (argType != 'c' || argPoly || argUsgn)
2388         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
2389
2390       for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
2391         s += args + ".val[" + utostr(vi) + "]";
2392         if ((vi + 1) < ve)
2393           s += ", ";
2394       }
2395       if ((i + 1) < e)
2396         s += ", ";
2397
2398       continue;
2399     }
2400
2401     if (splat && (i + 1) == e)
2402       args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
2403
2404     // Check if an explicit cast is needed.
2405     if ((splat || !argScalar) &&
2406         ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
2407       std::string argTypeStr = "c";
2408       if (ck != ClassB)
2409         argTypeStr = argType;
2410       if (argQuad)
2411         argTypeStr = "Q" + argTypeStr;
2412       args = "(" + TypeString('d', argTypeStr) + ")" + args;
2413     }
2414
2415     s += args;
2416     if ((i + 1) < e)
2417       s += ", ";
2418   }
2419
2420   // Extra constant integer to hold type class enum for this function, e.g. s8
2421   if (ck == ClassB)
2422     s += ", " + utostr(GetNeonEnum(proto, typestr));
2423
2424   s += ");";
2425
2426   if (proto[0] != 'v' && sret) {
2427     if (define)
2428       s += " r;";
2429     else
2430       s += " return r;";
2431   }
2432   return s;
2433 }
2434
2435 static std::string GenBuiltinDef(const std::string &name,
2436                                  const std::string &proto,
2437                                  StringRef typestr, ClassKind ck) {
2438   std::string s("BUILTIN(__builtin_neon_");
2439
2440   // If all types are the same size, bitcasting the args will take care
2441   // of arg checking.  The actual signedness etc. will be taken care of with
2442   // special enums.
2443   if (!ProtoHasScalar(proto))
2444     ck = ClassB;
2445
2446   s += MangleName(name, typestr, ck);
2447   s += ", \"";
2448
2449   for (unsigned i = 0, e = proto.size(); i != e; ++i)
2450     s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
2451
2452   // Extra constant integer to hold type class enum for this function, e.g. s8
2453   if (ck == ClassB)
2454     s += "i";
2455
2456   s += "\", \"n\")";
2457   return s;
2458 }
2459
2460 static std::string GenIntrinsic(const std::string &name,
2461                                 const std::string &proto,
2462                                 StringRef outTypeStr, StringRef inTypeStr,
2463                                 OpKind kind, ClassKind classKind) {
2464   assert(!proto.empty() && "");
2465   bool define = UseMacro(proto) && kind != OpUnavailable;
2466   std::string s;
2467
2468   // static always inline + return type
2469   if (define)
2470     s += "#define ";
2471   else
2472     s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
2473
2474   // Function name with type suffix
2475   std::string mangledName = MangleName(name, outTypeStr, ClassS);
2476   if (outTypeStr != inTypeStr) {
2477     // If the input type is different (e.g., for vreinterpret), append a suffix
2478     // for the input type.  String off a "Q" (quad) prefix so that MangleName
2479     // does not insert another "q" in the name.
2480     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2481     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2482     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2483   }
2484   s += mangledName;
2485
2486   // Function arguments
2487   s += GenArgs(proto, inTypeStr, name);
2488
2489   // Definition.
2490   if (define) {
2491     s += " __extension__ ({ \\\n  ";
2492     s += GenMacroLocals(proto, inTypeStr, name);
2493   } else if (kind == OpUnavailable) {
2494     s += " __attribute__((unavailable));\n";
2495     return s;
2496   } else
2497     s += " {\n  ";
2498
2499   if (kind != OpNone)
2500     s += GenOpString(name, kind, proto, outTypeStr);
2501   else
2502     s += GenBuiltin(name, proto, outTypeStr, classKind);
2503   if (define)
2504     s += " })";
2505   else
2506     s += " }";
2507   s += "\n";
2508   return s;
2509 }
2510
2511 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2512 /// is comprised of type definitions and function declarations.
2513 void NeonEmitter::run(raw_ostream &OS) {
2514   OS <<
2515     "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2516     "---===\n"
2517     " *\n"
2518     " * Permission is hereby granted, free of charge, to any person obtaining "
2519     "a copy\n"
2520     " * of this software and associated documentation files (the \"Software\"),"
2521     " to deal\n"
2522     " * in the Software without restriction, including without limitation the "
2523     "rights\n"
2524     " * to use, copy, modify, merge, publish, distribute, sublicense, "
2525     "and/or sell\n"
2526     " * copies of the Software, and to permit persons to whom the Software is\n"
2527     " * furnished to do so, subject to the following conditions:\n"
2528     " *\n"
2529     " * The above copyright notice and this permission notice shall be "
2530     "included in\n"
2531     " * all copies or substantial portions of the Software.\n"
2532     " *\n"
2533     " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2534     "EXPRESS OR\n"
2535     " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2536     "MERCHANTABILITY,\n"
2537     " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2538     "SHALL THE\n"
2539     " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2540     "OTHER\n"
2541     " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2542     "ARISING FROM,\n"
2543     " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2544     "DEALINGS IN\n"
2545     " * THE SOFTWARE.\n"
2546     " *\n"
2547     " *===--------------------------------------------------------------------"
2548     "---===\n"
2549     " */\n\n";
2550
2551   OS << "#ifndef __ARM_NEON_H\n";
2552   OS << "#define __ARM_NEON_H\n\n";
2553
2554   OS << "#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)\n";
2555   OS << "#error \"NEON support not enabled\"\n";
2556   OS << "#endif\n\n";
2557
2558   OS << "#include <stdint.h>\n\n";
2559
2560   // Emit NEON-specific scalar typedefs.
2561   OS << "typedef float float32_t;\n";
2562   OS << "typedef __fp16 float16_t;\n";
2563
2564   OS << "#ifdef __aarch64__\n";
2565   OS << "typedef double float64_t;\n";
2566   OS << "#endif\n\n";
2567
2568   // For now, signedness of polynomial types depends on target
2569   OS << "#ifdef __aarch64__\n";
2570   OS << "typedef uint8_t poly8_t;\n";
2571   OS << "typedef uint16_t poly16_t;\n";
2572   OS << "typedef uint64_t poly64_t;\n";
2573   OS << "#else\n";
2574   OS << "typedef int8_t poly8_t;\n";
2575   OS << "typedef int16_t poly16_t;\n";
2576   OS << "#endif\n";
2577
2578   // Emit Neon vector typedefs.
2579   std::string TypedefTypes(
2580       "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
2581   SmallVector<StringRef, 24> TDTypeVec;
2582   ParseTypes(0, TypedefTypes, TDTypeVec);
2583
2584   // Emit vector typedefs.
2585   bool isA64 = false;
2586   bool preinsert;
2587   bool postinsert;
2588   for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2589     bool dummy, quad = false, poly = false;
2590     char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2591     preinsert = false;
2592     postinsert = false;
2593
2594     if (type == 'd' || (type == 'l' && poly)) {
2595       preinsert = isA64? false: true;
2596       isA64 = true;
2597     } else {
2598       postinsert = isA64? true: false;
2599       isA64 = false;
2600     }
2601     if (postinsert)
2602       OS << "#endif\n";
2603     if (preinsert)
2604       OS << "#ifdef __aarch64__\n";
2605
2606     if (poly)
2607       OS << "typedef __attribute__((neon_polyvector_type(";
2608     else
2609       OS << "typedef __attribute__((neon_vector_type(";
2610
2611     unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2612     OS << utostr(nElts) << "))) ";
2613     if (nElts < 10)
2614       OS << " ";
2615
2616     OS << TypeString('s', TDTypeVec[i]);
2617     OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2618
2619   }
2620   postinsert = isA64? true: false;
2621   if (postinsert)
2622     OS << "#endif\n";
2623   OS << "\n";
2624
2625   // Emit struct typedefs.
2626   isA64 = false;
2627   for (unsigned vi = 2; vi != 5; ++vi) {
2628     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2629       bool dummy, quad = false, poly = false;
2630       char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2631       preinsert = false;
2632       postinsert = false;
2633
2634       if (type == 'd' || (type == 'l' && poly)) {
2635         preinsert = isA64? false: true;
2636         isA64 = true;
2637       } else {
2638         postinsert = isA64? true: false;
2639         isA64 = false;
2640       }
2641       if (postinsert)
2642         OS << "#endif\n";
2643       if (preinsert)
2644         OS << "#ifdef __aarch64__\n";
2645
2646       std::string ts = TypeString('d', TDTypeVec[i]);
2647       std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2648       OS << "typedef struct " << vs << " {\n";
2649       OS << "  " << ts << " val";
2650       OS << "[" << utostr(vi) << "]";
2651       OS << ";\n} ";
2652       OS << vs << ";\n";
2653       OS << "\n";
2654     }
2655   }
2656   postinsert = isA64? true: false;
2657   if (postinsert)
2658     OS << "#endif\n";
2659   OS << "\n";
2660
2661   OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2662
2663   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2664
2665   StringMap<ClassKind> EmittedMap;
2666
2667   // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
2668   // intrinsics.  (Some of the saturating multiply instructions are also
2669   // used to implement the corresponding "_lane" variants, but tablegen
2670   // sorts the records into alphabetical order so that the "_lane" variants
2671   // come after the intrinsics they use.)
2672   emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
2673   emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
2674   emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
2675   emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
2676
2677   // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
2678   // common intrinsics appear only once in the output stream.
2679   // The check for uniquiness is done in emitIntrinsic.
2680   // Emit ARM intrinsics.
2681   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2682     Record *R = RV[i];
2683
2684     // Skip AArch64 intrinsics; they will be emitted at the end.
2685     bool isA64 = R->getValueAsBit("isA64");
2686     if (isA64)
2687       continue;
2688
2689     if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
2690         R->getName() != "VABD")
2691       emitIntrinsic(OS, R, EmittedMap);
2692   }
2693
2694   // Emit AArch64-specific intrinsics.
2695   OS << "#ifdef __aarch64__\n";
2696
2697   emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
2698   emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
2699   emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
2700
2701   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2702     Record *R = RV[i];
2703
2704     // Skip ARM intrinsics already included above.
2705     bool isA64 = R->getValueAsBit("isA64");
2706     if (!isA64)
2707       continue;
2708
2709     // Skip crypto temporarily, and will emit them all together at the end.
2710     bool isCrypto = R->getValueAsBit("isCrypto");
2711     if (isCrypto)
2712       continue;
2713
2714     emitIntrinsic(OS, R, EmittedMap);
2715   }
2716
2717   OS << "#ifdef __ARM_FEATURE_CRYPTO\n";
2718
2719   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2720     Record *R = RV[i];
2721
2722     // Skip crypto temporarily, and will emit them all together at the end.
2723     bool isCrypto = R->getValueAsBit("isCrypto");
2724     if (!isCrypto)
2725       continue;
2726
2727     emitIntrinsic(OS, R, EmittedMap);
2728   }
2729
2730   OS << "#endif\n\n";
2731
2732   OS << "#endif\n\n";
2733
2734   OS << "#undef __ai\n\n";
2735   OS << "#endif /* __ARM_NEON_H */\n";
2736 }
2737
2738 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2739 /// intrinsics specified by record R checking for intrinsic uniqueness.
2740 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2741                                 StringMap<ClassKind> &EmittedMap) {
2742   std::string name = R->getValueAsString("Name");
2743   std::string Proto = R->getValueAsString("Prototype");
2744   std::string Types = R->getValueAsString("Types");
2745
2746   SmallVector<StringRef, 16> TypeVec;
2747   ParseTypes(R, Types, TypeVec);
2748
2749   OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2750
2751   ClassKind classKind = ClassNone;
2752   if (R->getSuperClasses().size() >= 2)
2753     classKind = ClassMap[R->getSuperClasses()[1]];
2754   if (classKind == ClassNone && kind == OpNone)
2755     PrintFatalError(R->getLoc(), "Builtin has no class kind");
2756
2757   for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2758     if (kind == OpReinterpret) {
2759       bool outQuad = false;
2760       bool dummy = false;
2761       (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2762       for (unsigned srcti = 0, srcte = TypeVec.size();
2763            srcti != srcte; ++srcti) {
2764         bool inQuad = false;
2765         (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2766         if (srcti == ti || inQuad != outQuad)
2767           continue;
2768         std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2769                                      OpCast, ClassS);
2770         if (EmittedMap.count(s))
2771           continue;
2772         EmittedMap[s] = ClassS;
2773         OS << s;
2774       }
2775     } else {
2776       std::string s =
2777           GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2778       if (EmittedMap.count(s))
2779         continue;
2780       EmittedMap[s] = classKind;
2781       OS << s;
2782     }
2783   }
2784   OS << "\n";
2785 }
2786
2787 static unsigned RangeFromType(const char mod, StringRef typestr) {
2788   // base type to get the type string for.
2789   bool quad = false, dummy = false;
2790   char type = ClassifyType(typestr, quad, dummy, dummy);
2791   type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2792
2793   switch (type) {
2794     case 'c':
2795       return (8 << (int)quad) - 1;
2796     case 'h':
2797     case 's':
2798       return (4 << (int)quad) - 1;
2799     case 'f':
2800     case 'i':
2801       return (2 << (int)quad) - 1;
2802     case 'd':
2803     case 'l':
2804       return (1 << (int)quad) - 1;
2805     default:
2806       PrintFatalError("unhandled type!");
2807   }
2808 }
2809
2810 static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
2811   // base type to get the type string for.
2812   bool dummy = false;
2813   char type = ClassifyType(typestr, dummy, dummy, dummy);
2814   type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
2815
2816   switch (type) {
2817     case 'c':
2818       return 7;
2819     case 'h':
2820     case 's':
2821       return 15;
2822     case 'f':
2823     case 'i':
2824       return 31;
2825     case 'd':
2826     case 'l':
2827       return 63;
2828     default:
2829       PrintFatalError("unhandled type!");
2830   }
2831 }
2832
2833 /// Generate the ARM and AArch64 intrinsic range checking code for
2834 /// shift/lane immediates, checking for unique declarations.
2835 void
2836 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2837                                         StringMap<ClassKind> &A64IntrinsicMap,
2838                                         bool isA64RangeCheck) {
2839   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2840   StringMap<OpKind> EmittedMap;
2841
2842   // Generate the intrinsic range checking code for shift/lane immediates.
2843   if (isA64RangeCheck)
2844     OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2845   else
2846     OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2847
2848   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2849     Record *R = RV[i];
2850
2851     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2852     if (k != OpNone)
2853       continue;
2854
2855     std::string name = R->getValueAsString("Name");
2856     std::string Proto = R->getValueAsString("Prototype");
2857     std::string Types = R->getValueAsString("Types");
2858     std::string Rename = name + "@" + Proto;
2859
2860     // Functions with 'a' (the splat code) in the type prototype should not get
2861     // their own builtin as they use the non-splat variant.
2862     if (Proto.find('a') != std::string::npos)
2863       continue;
2864
2865     // Functions which do not have an immediate do not need to have range
2866     // checking code emitted.
2867     size_t immPos = Proto.find('i');
2868     if (immPos == std::string::npos)
2869       continue;
2870
2871     SmallVector<StringRef, 16> TypeVec;
2872     ParseTypes(R, Types, TypeVec);
2873
2874     if (R->getSuperClasses().size() < 2)
2875       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2876
2877     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2878     if (!ProtoHasScalar(Proto))
2879       ck = ClassB;
2880
2881     // Do not include AArch64 range checks if not generating code for AArch64.
2882     bool isA64 = R->getValueAsBit("isA64");
2883     if (!isA64RangeCheck && isA64)
2884       continue;
2885
2886     // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2887     // redefined by AArch64 to handle new types.
2888     if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2889       ClassKind &A64CK = A64IntrinsicMap[Rename];
2890       if (A64CK == ck && ck != ClassNone)
2891         continue;
2892     }
2893
2894     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2895       std::string namestr, shiftstr, rangestr;
2896
2897       if (R->getValueAsBit("isVCVT_N")) {
2898         // VCVT between floating- and fixed-point values takes an immediate
2899         // in the range [1, 32] for f32, or [1, 64] for f64.
2900         ck = ClassB;
2901         if (name.find("32") != std::string::npos)
2902           rangestr = "l = 1; u = 31"; // upper bound = l + u
2903         else if (name.find("64") != std::string::npos)
2904           rangestr = "l = 1; u = 63";
2905         else
2906           PrintFatalError(R->getLoc(),
2907               "Fixed point convert name should contains \"32\" or \"64\"");
2908
2909       } else if (R->getValueAsBit("isScalarShift")) {
2910         // Right shifts have an 'r' in the name, left shifts do not.  Convert
2911         // instructions have the same bounds and right shifts.
2912         if (name.find('r') != std::string::npos ||
2913             name.find("cvt") != std::string::npos)
2914           rangestr = "l = 1; ";
2915
2916         unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]);
2917         // Narrow shift has half the upper bound
2918         if (R->getValueAsBit("isScalarNarrowShift"))
2919           upBound /= 2;
2920
2921         rangestr += "u = " + utostr(upBound);
2922       } else if (R->getValueAsBit("isShift")) {
2923         // Builtins which are overloaded by type will need to have their upper
2924         // bound computed at Sema time based on the type constant.
2925         shiftstr = ", true";
2926
2927         // Right shifts have an 'r' in the name, left shifts do not.
2928         if (name.find('r') != std::string::npos)
2929           rangestr = "l = 1; ";
2930
2931         rangestr += "u = RFT(TV" + shiftstr + ")";
2932       } else {
2933         // The immediate generally refers to a lane in the preceding argument.
2934         assert(immPos > 0 && "unexpected immediate operand");
2935         rangestr =
2936             "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2937       }
2938       // Make sure cases appear only once by uniquing them in a string map.
2939       namestr = MangleName(name, TypeVec[ti], ck);
2940       if (EmittedMap.count(namestr))
2941         continue;
2942       EmittedMap[namestr] = OpNone;
2943
2944       // Calculate the index of the immediate that should be range checked.
2945       unsigned immidx = 0;
2946
2947       // Builtins that return a struct of multiple vectors have an extra
2948       // leading arg for the struct return.
2949       if (IsMultiVecProto(Proto[0]))
2950         ++immidx;
2951
2952       // Add one to the index for each argument until we reach the immediate
2953       // to be checked.  Structs of vectors are passed as multiple arguments.
2954       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2955         switch (Proto[ii]) {
2956         default:
2957           immidx += 1;
2958           break;
2959         case '2':
2960         case 'B':
2961           immidx += 2;
2962           break;
2963         case '3':
2964         case 'C':
2965           immidx += 3;
2966           break;
2967         case '4':
2968         case 'D':
2969           immidx += 4;
2970           break;
2971         case 'i':
2972           ie = ii + 1;
2973           break;
2974         }
2975       }
2976       if (isA64RangeCheck)
2977         OS << "case AArch64::BI__builtin_neon_";
2978       else
2979         OS << "case ARM::BI__builtin_neon_";
2980       OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2981          << rangestr << "; break;\n";
2982     }
2983   }
2984   OS << "#endif\n\n";
2985 }
2986
2987 /// Generate the ARM and AArch64 overloaded type checking code for
2988 /// SemaChecking.cpp, checking for unique builtin declarations.
2989 void
2990 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2991                                       StringMap<ClassKind> &A64IntrinsicMap,
2992                                       bool isA64TypeCheck) {
2993   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2994   StringMap<OpKind> EmittedMap;
2995
2996   // Generate the overloaded type checking code for SemaChecking.cpp
2997   if (isA64TypeCheck)
2998     OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2999   else
3000     OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
3001
3002   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3003     Record *R = RV[i];
3004     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3005     if (k != OpNone)
3006       continue;
3007
3008     std::string Proto = R->getValueAsString("Prototype");
3009     std::string Types = R->getValueAsString("Types");
3010     std::string name = R->getValueAsString("Name");
3011     std::string Rename = name + "@" + Proto;
3012
3013     // Functions with 'a' (the splat code) in the type prototype should not get
3014     // their own builtin as they use the non-splat variant.
3015     if (Proto.find('a') != std::string::npos)
3016       continue;
3017
3018     // Functions which have a scalar argument cannot be overloaded, no need to
3019     // check them if we are emitting the type checking code.
3020     if (ProtoHasScalar(Proto))
3021       continue;
3022
3023     SmallVector<StringRef, 16> TypeVec;
3024     ParseTypes(R, Types, TypeVec);
3025
3026     if (R->getSuperClasses().size() < 2)
3027       PrintFatalError(R->getLoc(), "Builtin has no class kind");
3028
3029     // Do not include AArch64 type checks if not generating code for AArch64.
3030     bool isA64 = R->getValueAsBit("isA64");
3031     if (!isA64TypeCheck && isA64)
3032       continue;
3033
3034     // Include ARM  type check in AArch64 but only if ARM intrinsics
3035     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
3036     // redefined in AArch64 to handle an additional 2 x f64 type.
3037     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3038     if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
3039       ClassKind &A64CK = A64IntrinsicMap[Rename];
3040       if (A64CK == ck && ck != ClassNone)
3041         continue;
3042     }
3043
3044     int si = -1, qi = -1;
3045     uint64_t mask = 0, qmask = 0;
3046     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3047       // Generate the switch case(s) for this builtin for the type validation.
3048       bool quad = false, poly = false, usgn = false;
3049       (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
3050
3051       if (quad) {
3052         qi = ti;
3053         qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
3054       } else {
3055         si = ti;
3056         mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
3057       }
3058     }
3059
3060     // Check if the builtin function has a pointer or const pointer argument.
3061     int PtrArgNum = -1;
3062     bool HasConstPtr = false;
3063     for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
3064       char ArgType = Proto[arg];
3065       if (ArgType == 'c') {
3066         HasConstPtr = true;
3067         PtrArgNum = arg - 1;
3068         break;
3069       }
3070       if (ArgType == 'p') {
3071         PtrArgNum = arg - 1;
3072         break;
3073       }
3074     }
3075     // For sret builtins, adjust the pointer argument index.
3076     if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
3077       PtrArgNum += 1;
3078
3079     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
3080     // and vst1_lane intrinsics.  Using a pointer to the vector element
3081     // type with one of those operations causes codegen to select an aligned
3082     // load/store instruction.  If you want an unaligned operation,
3083     // the pointer argument needs to have less alignment than element type,
3084     // so just accept any pointer type.
3085     if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
3086       PtrArgNum = -1;
3087       HasConstPtr = false;
3088     }
3089
3090     if (mask) {
3091       if (isA64TypeCheck)
3092         OS << "case AArch64::BI__builtin_neon_";
3093       else
3094         OS << "case ARM::BI__builtin_neon_";
3095       OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
3096          << "0x" << utohexstr(mask) << "ULL";
3097       if (PtrArgNum >= 0)
3098         OS << "; PtrArgNum = " << PtrArgNum;
3099       if (HasConstPtr)
3100         OS << "; HasConstPtr = true";
3101       OS << "; break;\n";
3102     }
3103     if (qmask) {
3104       if (isA64TypeCheck)
3105         OS << "case AArch64::BI__builtin_neon_";
3106       else
3107         OS << "case ARM::BI__builtin_neon_";
3108       OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
3109          << "0x" << utohexstr(qmask) << "ULL";
3110       if (PtrArgNum >= 0)
3111         OS << "; PtrArgNum = " << PtrArgNum;
3112       if (HasConstPtr)
3113         OS << "; HasConstPtr = true";
3114       OS << "; break;\n";
3115     }
3116   }
3117   OS << "#endif\n\n";
3118 }
3119
3120 /// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
3121 /// declaration of builtins, checking for unique builtin declarations.
3122 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
3123                                  StringMap<ClassKind> &A64IntrinsicMap,
3124                                  bool isA64GenBuiltinDef) {
3125   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3126   StringMap<OpKind> EmittedMap;
3127
3128   // Generate BuiltinsARM.def and BuiltinsAArch64.def
3129   if (isA64GenBuiltinDef)
3130     OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
3131   else
3132     OS << "#ifdef GET_NEON_BUILTINS\n";
3133
3134   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3135     Record *R = RV[i];
3136     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3137     if (k != OpNone)
3138       continue;
3139
3140     std::string Proto = R->getValueAsString("Prototype");
3141     std::string name = R->getValueAsString("Name");
3142     std::string Rename = name + "@" + Proto;
3143
3144     // Functions with 'a' (the splat code) in the type prototype should not get
3145     // their own builtin as they use the non-splat variant.
3146     if (Proto.find('a') != std::string::npos)
3147       continue;
3148
3149     std::string Types = R->getValueAsString("Types");
3150     SmallVector<StringRef, 16> TypeVec;
3151     ParseTypes(R, Types, TypeVec);
3152
3153     if (R->getSuperClasses().size() < 2)
3154       PrintFatalError(R->getLoc(), "Builtin has no class kind");
3155
3156     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3157
3158     // Do not include AArch64 BUILTIN() macros if not generating
3159     // code for AArch64
3160     bool isA64 = R->getValueAsBit("isA64");
3161     if (!isA64GenBuiltinDef && isA64)
3162       continue;
3163
3164     // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
3165     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
3166     // redefined in AArch64 to handle an additional 2 x f64 type.
3167     if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
3168       ClassKind &A64CK = A64IntrinsicMap[Rename];
3169       if (A64CK == ck && ck != ClassNone)
3170         continue;
3171     }
3172
3173     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3174       // Generate the declaration for this builtin, ensuring
3175       // that each unique BUILTIN() macro appears only once in the output
3176       // stream.
3177       std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
3178       if (EmittedMap.count(bd))
3179         continue;
3180
3181       EmittedMap[bd] = OpNone;
3182       OS << bd << "\n";
3183     }
3184   }
3185   OS << "#endif\n\n";
3186 }
3187
3188 /// runHeader - Emit a file with sections defining:
3189 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
3190 /// 2. the SemaChecking code for the type overload checking.
3191 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
3192 void NeonEmitter::runHeader(raw_ostream &OS) {
3193   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3194
3195   // build a map of AArch64 intriniscs to be used in uniqueness checks.
3196   StringMap<ClassKind> A64IntrinsicMap;
3197   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3198     Record *R = RV[i];
3199
3200     bool isA64 = R->getValueAsBit("isA64");
3201     if (!isA64)
3202       continue;
3203
3204     ClassKind CK = ClassNone;
3205     if (R->getSuperClasses().size() >= 2)
3206       CK = ClassMap[R->getSuperClasses()[1]];
3207
3208     std::string Name = R->getValueAsString("Name");
3209     std::string Proto = R->getValueAsString("Prototype");
3210     std::string Rename = Name + "@" + Proto;
3211     if (A64IntrinsicMap.count(Rename))
3212       continue;
3213     A64IntrinsicMap[Rename] = CK;
3214   }
3215
3216   // Generate BuiltinsARM.def for ARM
3217   genBuiltinsDef(OS, A64IntrinsicMap, false);
3218
3219   // Generate BuiltinsAArch64.def for AArch64
3220   genBuiltinsDef(OS, A64IntrinsicMap, true);
3221
3222   // Generate ARM overloaded type checking code for SemaChecking.cpp
3223   genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
3224
3225   // Generate AArch64 overloaded type checking code for SemaChecking.cpp
3226   genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
3227
3228   // Generate ARM range checking code for shift/lane immediates.
3229   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
3230
3231   // Generate the AArch64 range checking code for shift/lane immediates.
3232   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
3233 }
3234
3235 /// GenTest - Write out a test for the intrinsic specified by the name and
3236 /// type strings, including the embedded patterns for FileCheck to match.
3237 static std::string GenTest(const std::string &name,
3238                            const std::string &proto,
3239                            StringRef outTypeStr, StringRef inTypeStr,
3240                            bool isShift, bool isHiddenLOp,
3241                            ClassKind ck, const std::string &InstName,
3242                            bool isA64,
3243                            std::string & testFuncProto) {
3244   assert(!proto.empty() && "");
3245   std::string s;
3246
3247   // Function name with type suffix
3248   std::string mangledName = MangleName(name, outTypeStr, ClassS);
3249   if (outTypeStr != inTypeStr) {
3250     // If the input type is different (e.g., for vreinterpret), append a suffix
3251     // for the input type.  String off a "Q" (quad) prefix so that MangleName
3252     // does not insert another "q" in the name.
3253     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
3254     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
3255     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
3256   }
3257
3258   // todo: GenerateChecksForIntrinsic does not generate CHECK
3259   // for aarch64 instructions yet
3260   std::vector<std::string> FileCheckPatterns;
3261   if (!isA64) {
3262         GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
3263                                                            isHiddenLOp, FileCheckPatterns);
3264         s+= "// CHECK_ARM: test_" + mangledName + "\n";
3265   }
3266   s += "// CHECK_AARCH64: test_" + mangledName + "\n";
3267
3268   // Emit the FileCheck patterns.
3269   // If for any reason we do not want to emit a check, mangledInst
3270   // will be the empty string.
3271   if (FileCheckPatterns.size()) {
3272     for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
3273                                                   e = FileCheckPatterns.end();
3274          i != e;
3275          ++i) {
3276       s += "// CHECK_ARM: " + *i + "\n";
3277     }
3278   }
3279
3280   // Emit the start of the test function.
3281
3282   testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
3283   char arg = 'a';
3284   std::string comma;
3285   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3286     // Do not create arguments for values that must be immediate constants.
3287     if (proto[i] == 'i')
3288       continue;
3289     testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
3290     testFuncProto.push_back(arg);
3291     comma = ", ";
3292   }
3293   testFuncProto += ")";
3294
3295   s+= testFuncProto;
3296   s+= " {\n  ";
3297
3298   if (proto[0] != 'v')
3299     s += "return ";
3300   s += mangledName + "(";
3301   arg = 'a';
3302   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3303     if (proto[i] == 'i') {
3304       // For immediate operands, test the maximum value.
3305       if (isShift)
3306         s += "1"; // FIXME
3307       else
3308         // The immediate generally refers to a lane in the preceding argument.
3309         s += utostr(RangeFromType(proto[i-1], inTypeStr));
3310     } else {
3311       s.push_back(arg);
3312     }
3313     if ((i + 1) < e)
3314       s += ", ";
3315   }
3316   s += ");\n}\n\n";
3317   return s;
3318 }
3319
3320 /// Write out all intrinsic tests for the specified target, checking
3321 /// for intrinsic test uniqueness.
3322 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
3323                                 bool isA64GenTest) {
3324   if (isA64GenTest)
3325         OS << "#ifdef __aarch64__\n";
3326
3327   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3328   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3329     Record *R = RV[i];
3330     std::string name = R->getValueAsString("Name");
3331     std::string Proto = R->getValueAsString("Prototype");
3332     std::string Types = R->getValueAsString("Types");
3333     bool isShift = R->getValueAsBit("isShift");
3334     std::string InstName = R->getValueAsString("InstName");
3335     bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
3336     bool isA64 = R->getValueAsBit("isA64");
3337
3338     // do not include AArch64 intrinsic test if not generating
3339     // code for AArch64
3340     if (!isA64GenTest && isA64)
3341       continue;
3342
3343     SmallVector<StringRef, 16> TypeVec;
3344     ParseTypes(R, Types, TypeVec);
3345
3346     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3347     OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
3348     if (kind == OpUnavailable)
3349       continue;
3350     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3351       if (kind == OpReinterpret) {
3352         bool outQuad = false;
3353         bool dummy = false;
3354         (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
3355         for (unsigned srcti = 0, srcte = TypeVec.size();
3356              srcti != srcte; ++srcti) {
3357           bool inQuad = false;
3358           (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
3359           if (srcti == ti || inQuad != outQuad)
3360             continue;
3361                   std::string testFuncProto;
3362           std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
3363                                   isShift, isHiddenLOp, ck, InstName, isA64,
3364                                                                   testFuncProto);
3365           if (EmittedMap.count(testFuncProto))
3366             continue;
3367           EmittedMap[testFuncProto] = kind;
3368           OS << s << "\n";
3369         }
3370       } else {
3371                 std::string testFuncProto;
3372         std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
3373                                 isHiddenLOp, ck, InstName, isA64, testFuncProto);
3374         if (EmittedMap.count(testFuncProto))
3375           continue;
3376         EmittedMap[testFuncProto] = kind;
3377         OS << s << "\n";
3378       }
3379     }
3380   }
3381
3382   if (isA64GenTest)
3383         OS << "#endif\n";
3384 }
3385 /// runTests - Write out a complete set of tests for all of the Neon
3386 /// intrinsics.
3387 void NeonEmitter::runTests(raw_ostream &OS) {
3388   OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
3389         "apcs-gnu\\\n"
3390         "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
3391         "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
3392                 "\n"
3393             "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
3394             "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
3395             "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
3396         "\n"
3397         "// REQUIRES: long_tests\n"
3398         "\n"
3399         "#include <arm_neon.h>\n"
3400         "\n";
3401
3402   // ARM tests must be emitted before AArch64 tests to ensure
3403   // tests for intrinsics that are common to ARM and AArch64
3404   // appear only once in the output stream.
3405   // The check for uniqueness is done in genTargetTest.
3406   StringMap<OpKind> EmittedMap;
3407
3408   genTargetTest(OS, EmittedMap, false);
3409
3410   genTargetTest(OS, EmittedMap, true);
3411 }
3412
3413 namespace clang {
3414 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
3415   NeonEmitter(Records).run(OS);
3416 }
3417 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
3418   NeonEmitter(Records).runHeader(OS);
3419 }
3420 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
3421   NeonEmitter(Records).runTests(OS);
3422 }
3423 } // End namespace clang