Home | History | Annotate | Download | only in TableGen
      1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
     11 // a declaration and definition of each function specified by the ARM NEON
     12 // compiler interface.  See ARM document DUI0348B.
     13 //
     14 // Each NEON instruction is implemented in terms of 1 or more functions which
     15 // are suffixed with the element type of the input vectors.  Functions may be
     16 // implemented in terms of generic vector operations such as +, *, -, etc. or
     17 // by calling a __builtin_-prefixed function which will be handled by clang's
     18 // CodeGen library.
     19 //
     20 // Additional validation code can be generated by this file when runHeader() is
     21 // called, rather than the normal run() entry point.  A complete set of tests
     22 // for Neon intrinsics can be generated by calling the runTests() entry point.
     23 //
     24 //===----------------------------------------------------------------------===//
     25 
     26 #include "llvm/ADT/DenseMap.h"
     27 #include "llvm/ADT/SmallString.h"
     28 #include "llvm/ADT/SmallVector.h"
     29 #include "llvm/ADT/StringExtras.h"
     30 #include "llvm/ADT/StringMap.h"
     31 #include "llvm/Support/ErrorHandling.h"
     32 #include "llvm/TableGen/Error.h"
     33 #include "llvm/TableGen/Record.h"
     34 #include "llvm/TableGen/TableGenBackend.h"
     35 #include <string>
     36 using namespace llvm;
     37 
     38 enum OpKind {
     39   OpNone,
     40   OpUnavailable,
     41   OpAdd,
     42   OpAddl,
     43   OpAddw,
     44   OpSub,
     45   OpSubl,
     46   OpSubw,
     47   OpMul,
     48   OpMla,
     49   OpMlal,
     50   OpMls,
     51   OpMlsl,
     52   OpMulN,
     53   OpMlaN,
     54   OpMlsN,
     55   OpMlalN,
     56   OpMlslN,
     57   OpMulLane,
     58   OpMullLane,
     59   OpMlaLane,
     60   OpMlsLane,
     61   OpMlalLane,
     62   OpMlslLane,
     63   OpQDMullLane,
     64   OpQDMlalLane,
     65   OpQDMlslLane,
     66   OpQDMulhLane,
     67   OpQRDMulhLane,
     68   OpEq,
     69   OpGe,
     70   OpLe,
     71   OpGt,
     72   OpLt,
     73   OpNeg,
     74   OpNot,
     75   OpAnd,
     76   OpOr,
     77   OpXor,
     78   OpAndNot,
     79   OpOrNot,
     80   OpCast,
     81   OpConcat,
     82   OpDup,
     83   OpDupLane,
     84   OpHi,
     85   OpLo,
     86   OpSelect,
     87   OpRev16,
     88   OpRev32,
     89   OpRev64,
     90   OpReinterpret,
     91   OpAbdl,
     92   OpAba,
     93   OpAbal
     94 };
     95 
     96 enum ClassKind {
     97   ClassNone,
     98   ClassI,           // generic integer instruction, e.g., "i8" suffix
     99   ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
    100   ClassW,           // width-specific instruction, e.g., "8" suffix
    101   ClassB            // bitcast arguments with enum argument to specify type
    102 };
    103 
    104 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
    105 /// builtins.  These must be kept in sync with the flags in
    106 /// include/clang/Basic/TargetBuiltins.h.
    107 namespace {
    108 class NeonTypeFlags {
    109   enum {
    110     EltTypeMask = 0xf,
    111     UnsignedFlag = 0x10,
    112     QuadFlag = 0x20
    113   };
    114   uint32_t Flags;
    115 
    116 public:
    117   enum EltType {
    118     Int8,
    119     Int16,
    120     Int32,
    121     Int64,
    122     Poly8,
    123     Poly16,
    124     Float16,
    125     Float32
    126   };
    127 
    128   NeonTypeFlags(unsigned F) : Flags(F) {}
    129   NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
    130     if (IsUnsigned)
    131       Flags |= UnsignedFlag;
    132     if (IsQuad)
    133       Flags |= QuadFlag;
    134   }
    135 
    136   uint32_t getFlags() const { return Flags; }
    137 };
    138 } // end anonymous namespace
    139 
    140 namespace {
    141 class NeonEmitter {
    142   RecordKeeper &Records;
    143   StringMap<OpKind> OpMap;
    144   DenseMap<Record*, ClassKind> ClassMap;
    145 
    146 public:
    147   NeonEmitter(RecordKeeper &R) : Records(R) {
    148     OpMap["OP_NONE"]  = OpNone;
    149     OpMap["OP_UNAVAILABLE"] = OpUnavailable;
    150     OpMap["OP_ADD"]   = OpAdd;
    151     OpMap["OP_ADDL"]  = OpAddl;
    152     OpMap["OP_ADDW"]  = OpAddw;
    153     OpMap["OP_SUB"]   = OpSub;
    154     OpMap["OP_SUBL"]  = OpSubl;
    155     OpMap["OP_SUBW"]  = OpSubw;
    156     OpMap["OP_MUL"]   = OpMul;
    157     OpMap["OP_MLA"]   = OpMla;
    158     OpMap["OP_MLAL"]  = OpMlal;
    159     OpMap["OP_MLS"]   = OpMls;
    160     OpMap["OP_MLSL"]  = OpMlsl;
    161     OpMap["OP_MUL_N"] = OpMulN;
    162     OpMap["OP_MLA_N"] = OpMlaN;
    163     OpMap["OP_MLS_N"] = OpMlsN;
    164     OpMap["OP_MLAL_N"] = OpMlalN;
    165     OpMap["OP_MLSL_N"] = OpMlslN;
    166     OpMap["OP_MUL_LN"]= OpMulLane;
    167     OpMap["OP_MULL_LN"] = OpMullLane;
    168     OpMap["OP_MLA_LN"]= OpMlaLane;
    169     OpMap["OP_MLS_LN"]= OpMlsLane;
    170     OpMap["OP_MLAL_LN"] = OpMlalLane;
    171     OpMap["OP_MLSL_LN"] = OpMlslLane;
    172     OpMap["OP_QDMULL_LN"] = OpQDMullLane;
    173     OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
    174     OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
    175     OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
    176     OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
    177     OpMap["OP_EQ"]    = OpEq;
    178     OpMap["OP_GE"]    = OpGe;
    179     OpMap["OP_LE"]    = OpLe;
    180     OpMap["OP_GT"]    = OpGt;
    181     OpMap["OP_LT"]    = OpLt;
    182     OpMap["OP_NEG"]   = OpNeg;
    183     OpMap["OP_NOT"]   = OpNot;
    184     OpMap["OP_AND"]   = OpAnd;
    185     OpMap["OP_OR"]    = OpOr;
    186     OpMap["OP_XOR"]   = OpXor;
    187     OpMap["OP_ANDN"]  = OpAndNot;
    188     OpMap["OP_ORN"]   = OpOrNot;
    189     OpMap["OP_CAST"]  = OpCast;
    190     OpMap["OP_CONC"]  = OpConcat;
    191     OpMap["OP_HI"]    = OpHi;
    192     OpMap["OP_LO"]    = OpLo;
    193     OpMap["OP_DUP"]   = OpDup;
    194     OpMap["OP_DUP_LN"] = OpDupLane;
    195     OpMap["OP_SEL"]   = OpSelect;
    196     OpMap["OP_REV16"] = OpRev16;
    197     OpMap["OP_REV32"] = OpRev32;
    198     OpMap["OP_REV64"] = OpRev64;
    199     OpMap["OP_REINT"] = OpReinterpret;
    200     OpMap["OP_ABDL"]  = OpAbdl;
    201     OpMap["OP_ABA"]   = OpAba;
    202     OpMap["OP_ABAL"]  = OpAbal;
    203 
    204     Record *SI = R.getClass("SInst");
    205     Record *II = R.getClass("IInst");
    206     Record *WI = R.getClass("WInst");
    207     ClassMap[SI] = ClassS;
    208     ClassMap[II] = ClassI;
    209     ClassMap[WI] = ClassW;
    210   }
    211 
    212   // run - Emit arm_neon.h.inc
    213   void run(raw_ostream &o);
    214 
    215   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
    216   void runHeader(raw_ostream &o);
    217 
    218   // runTests - Emit tests for all the Neon intrinsics.
    219   void runTests(raw_ostream &o);
    220 
    221 private:
    222   void emitIntrinsic(raw_ostream &OS, Record *R);
    223 };
    224 } // end anonymous namespace
    225 
    226 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
    227 /// which each StringRef representing a single type declared in the string.
    228 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
    229 /// 2xfloat and 4xfloat respectively.
    230 static void ParseTypes(Record *r, std::string &s,
    231                        SmallVectorImpl<StringRef> &TV) {
    232   const char *data = s.data();
    233   int len = 0;
    234 
    235   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
    236     if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
    237       continue;
    238 
    239     switch (data[len]) {
    240       case 'c':
    241       case 's':
    242       case 'i':
    243       case 'l':
    244       case 'h':
    245       case 'f':
    246         break;
    247       default:
    248         throw TGError(r->getLoc(),
    249                       "Unexpected letter: " + std::string(data + len, 1));
    250     }
    251     TV.push_back(StringRef(data, len + 1));
    252     data += len + 1;
    253     len = -1;
    254   }
    255 }
    256 
    257 /// Widen - Convert a type code into the next wider type.  char -> short,
    258 /// short -> int, etc.
    259 static char Widen(const char t) {
    260   switch (t) {
    261     case 'c':
    262       return 's';
    263     case 's':
    264       return 'i';
    265     case 'i':
    266       return 'l';
    267     case 'h':
    268       return 'f';
    269     default: throw "unhandled type in widen!";
    270   }
    271 }
    272 
    273 /// Narrow - Convert a type code into the next smaller type.  short -> char,
    274 /// float -> half float, etc.
    275 static char Narrow(const char t) {
    276   switch (t) {
    277     case 's':
    278       return 'c';
    279     case 'i':
    280       return 's';
    281     case 'l':
    282       return 'i';
    283     case 'f':
    284       return 'h';
    285     default: throw "unhandled type in narrow!";
    286   }
    287 }
    288 
    289 /// For a particular StringRef, return the base type code, and whether it has
    290 /// the quad-vector, polynomial, or unsigned modifiers set.
    291 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
    292   unsigned off = 0;
    293 
    294   // remember quad.
    295   if (ty[off] == 'Q') {
    296     quad = true;
    297     ++off;
    298   }
    299 
    300   // remember poly.
    301   if (ty[off] == 'P') {
    302     poly = true;
    303     ++off;
    304   }
    305 
    306   // remember unsigned.
    307   if (ty[off] == 'U') {
    308     usgn = true;
    309     ++off;
    310   }
    311 
    312   // base type to get the type string for.
    313   return ty[off];
    314 }
    315 
    316 /// ModType - Transform a type code and its modifiers based on a mod code. The
    317 /// mod code definitions may be found at the top of arm_neon.td.
    318 static char ModType(const char mod, char type, bool &quad, bool &poly,
    319                     bool &usgn, bool &scal, bool &cnst, bool &pntr) {
    320   switch (mod) {
    321     case 't':
    322       if (poly) {
    323         poly = false;
    324         usgn = true;
    325       }
    326       break;
    327     case 'u':
    328       usgn = true;
    329       poly = false;
    330       if (type == 'f')
    331         type = 'i';
    332       break;
    333     case 'x':
    334       usgn = false;
    335       poly = false;
    336       if (type == 'f')
    337         type = 'i';
    338       break;
    339     case 'f':
    340       if (type == 'h')
    341         quad = true;
    342       type = 'f';
    343       usgn = false;
    344       break;
    345     case 'g':
    346       quad = false;
    347       break;
    348     case 'w':
    349       type = Widen(type);
    350       quad = true;
    351       break;
    352     case 'n':
    353       type = Widen(type);
    354       break;
    355     case 'i':
    356       type = 'i';
    357       scal = true;
    358       break;
    359     case 'l':
    360       type = 'l';
    361       scal = true;
    362       usgn = true;
    363       break;
    364     case 's':
    365     case 'a':
    366       scal = true;
    367       break;
    368     case 'k':
    369       quad = true;
    370       break;
    371     case 'c':
    372       cnst = true;
    373     case 'p':
    374       pntr = true;
    375       scal = true;
    376       break;
    377     case 'h':
    378       type = Narrow(type);
    379       if (type == 'h')
    380         quad = false;
    381       break;
    382     case 'e':
    383       type = Narrow(type);
    384       usgn = true;
    385       break;
    386     default:
    387       break;
    388   }
    389   return type;
    390 }
    391 
    392 /// TypeString - for a modifier and type, generate the name of the typedef for
    393 /// that type.  QUc -> uint8x8_t.
    394 static std::string TypeString(const char mod, StringRef typestr) {
    395   bool quad = false;
    396   bool poly = false;
    397   bool usgn = false;
    398   bool scal = false;
    399   bool cnst = false;
    400   bool pntr = false;
    401 
    402   if (mod == 'v')
    403     return "void";
    404   if (mod == 'i')
    405     return "int";
    406 
    407   // base type to get the type string for.
    408   char type = ClassifyType(typestr, quad, poly, usgn);
    409 
    410   // Based on the modifying character, change the type and width if necessary.
    411   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
    412 
    413   SmallString<128> s;
    414 
    415   if (usgn)
    416     s.push_back('u');
    417 
    418   switch (type) {
    419     case 'c':
    420       s += poly ? "poly8" : "int8";
    421       if (scal)
    422         break;
    423       s += quad ? "x16" : "x8";
    424       break;
    425     case 's':
    426       s += poly ? "poly16" : "int16";
    427       if (scal)
    428         break;
    429       s += quad ? "x8" : "x4";
    430       break;
    431     case 'i':
    432       s += "int32";
    433       if (scal)
    434         break;
    435       s += quad ? "x4" : "x2";
    436       break;
    437     case 'l':
    438       s += "int64";
    439       if (scal)
    440         break;
    441       s += quad ? "x2" : "x1";
    442       break;
    443     case 'h':
    444       s += "float16";
    445       if (scal)
    446         break;
    447       s += quad ? "x8" : "x4";
    448       break;
    449     case 'f':
    450       s += "float32";
    451       if (scal)
    452         break;
    453       s += quad ? "x4" : "x2";
    454       break;
    455     default:
    456       throw "unhandled type!";
    457   }
    458 
    459   if (mod == '2')
    460     s += "x2";
    461   if (mod == '3')
    462     s += "x3";
    463   if (mod == '4')
    464     s += "x4";
    465 
    466   // Append _t, finishing the type string typedef type.
    467   s += "_t";
    468 
    469   if (cnst)
    470     s += " const";
    471 
    472   if (pntr)
    473     s += " *";
    474 
    475   return s.str();
    476 }
    477 
    478 /// BuiltinTypeString - for a modifier and type, generate the clang
    479 /// BuiltinsARM.def prototype code for the function.  See the top of clang's
    480 /// Builtins.def for a description of the type strings.
    481 static std::string BuiltinTypeString(const char mod, StringRef typestr,
    482                                      ClassKind ck, bool ret) {
    483   bool quad = false;
    484   bool poly = false;
    485   bool usgn = false;
    486   bool scal = false;
    487   bool cnst = false;
    488   bool pntr = false;
    489 
    490   if (mod == 'v')
    491     return "v"; // void
    492   if (mod == 'i')
    493     return "i"; // int
    494 
    495   // base type to get the type string for.
    496   char type = ClassifyType(typestr, quad, poly, usgn);
    497 
    498   // Based on the modifying character, change the type and width if necessary.
    499   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
    500 
    501   // All pointers are void* pointers.  Change type to 'v' now.
    502   if (pntr) {
    503     usgn = false;
    504     poly = false;
    505     type = 'v';
    506   }
    507   // Treat half-float ('h') types as unsigned short ('s') types.
    508   if (type == 'h') {
    509     type = 's';
    510     usgn = true;
    511   }
    512   usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
    513 
    514   if (scal) {
    515     SmallString<128> s;
    516 
    517     if (usgn)
    518       s.push_back('U');
    519     else if (type == 'c')
    520       s.push_back('S'); // make chars explicitly signed
    521 
    522     if (type == 'l') // 64-bit long
    523       s += "LLi";
    524     else
    525       s.push_back(type);
    526 
    527     if (cnst)
    528       s.push_back('C');
    529     if (pntr)
    530       s.push_back('*');
    531     return s.str();
    532   }
    533 
    534   // Since the return value must be one type, return a vector type of the
    535   // appropriate width which we will bitcast.  An exception is made for
    536   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
    537   // fashion, storing them to a pointer arg.
    538   if (ret) {
    539     if (mod >= '2' && mod <= '4')
    540       return "vv*"; // void result with void* first argument
    541     if (mod == 'f' || (ck != ClassB && type == 'f'))
    542       return quad ? "V4f" : "V2f";
    543     if (ck != ClassB && type == 's')
    544       return quad ? "V8s" : "V4s";
    545     if (ck != ClassB && type == 'i')
    546       return quad ? "V4i" : "V2i";
    547     if (ck != ClassB && type == 'l')
    548       return quad ? "V2LLi" : "V1LLi";
    549 
    550     return quad ? "V16Sc" : "V8Sc";
    551   }
    552 
    553   // Non-return array types are passed as individual vectors.
    554   if (mod == '2')
    555     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
    556   if (mod == '3')
    557     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
    558   if (mod == '4')
    559     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
    560 
    561   if (mod == 'f' || (ck != ClassB && type == 'f'))
    562     return quad ? "V4f" : "V2f";
    563   if (ck != ClassB && type == 's')
    564     return quad ? "V8s" : "V4s";
    565   if (ck != ClassB && type == 'i')
    566     return quad ? "V4i" : "V2i";
    567   if (ck != ClassB && type == 'l')
    568     return quad ? "V2LLi" : "V1LLi";
    569 
    570   return quad ? "V16Sc" : "V8Sc";
    571 }
    572 
    573 /// MangleName - Append a type or width suffix to a base neon function name,
    574 /// and insert a 'q' in the appropriate location if the operation works on
    575 /// 128b rather than 64b.   E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
    576 static std::string MangleName(const std::string &name, StringRef typestr,
    577                               ClassKind ck) {
    578   if (name == "vcvt_f32_f16")
    579     return name;
    580 
    581   bool quad = false;
    582   bool poly = false;
    583   bool usgn = false;
    584   char type = ClassifyType(typestr, quad, poly, usgn);
    585 
    586   std::string s = name;
    587 
    588   switch (type) {
    589   case 'c':
    590     switch (ck) {
    591     case ClassS: s += poly ? "_p8" : usgn ? "_u8" : "_s8"; break;
    592     case ClassI: s += "_i8"; break;
    593     case ClassW: s += "_8"; break;
    594     default: break;
    595     }
    596     break;
    597   case 's':
    598     switch (ck) {
    599     case ClassS: s += poly ? "_p16" : usgn ? "_u16" : "_s16"; break;
    600     case ClassI: s += "_i16"; break;
    601     case ClassW: s += "_16"; break;
    602     default: break;
    603     }
    604     break;
    605   case 'i':
    606     switch (ck) {
    607     case ClassS: s += usgn ? "_u32" : "_s32"; break;
    608     case ClassI: s += "_i32"; break;
    609     case ClassW: s += "_32"; break;
    610     default: break;
    611     }
    612     break;
    613   case 'l':
    614     switch (ck) {
    615     case ClassS: s += usgn ? "_u64" : "_s64"; break;
    616     case ClassI: s += "_i64"; break;
    617     case ClassW: s += "_64"; break;
    618     default: break;
    619     }
    620     break;
    621   case 'h':
    622     switch (ck) {
    623     case ClassS:
    624     case ClassI: s += "_f16"; break;
    625     case ClassW: s += "_16"; break;
    626     default: break;
    627     }
    628     break;
    629   case 'f':
    630     switch (ck) {
    631     case ClassS:
    632     case ClassI: s += "_f32"; break;
    633     case ClassW: s += "_32"; break;
    634     default: break;
    635     }
    636     break;
    637   default:
    638     throw "unhandled type!";
    639   }
    640   if (ck == ClassB)
    641     s += "_v";
    642 
    643   // Insert a 'q' before the first '_' character so that it ends up before
    644   // _lane or _n on vector-scalar operations.
    645   if (quad) {
    646     size_t pos = s.find('_');
    647     s = s.insert(pos, "q");
    648   }
    649   return s;
    650 }
    651 
    652 /// UseMacro - Examine the prototype string to determine if the intrinsic
    653 /// should be defined as a preprocessor macro instead of an inline function.
    654 static bool UseMacro(const std::string &proto) {
    655   // If this builtin takes an immediate argument, we need to #define it rather
    656   // than use a standard declaration, so that SemaChecking can range check
    657   // the immediate passed by the user.
    658   if (proto.find('i') != std::string::npos)
    659     return true;
    660 
    661   // Pointer arguments need to use macros to avoid hiding aligned attributes
    662   // from the pointer type.
    663   if (proto.find('p') != std::string::npos ||
    664       proto.find('c') != std::string::npos)
    665     return true;
    666 
    667   return false;
    668 }
    669 
    670 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
    671 /// defined as a macro should be accessed directly instead of being first
    672 /// assigned to a local temporary.
    673 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
    674   // True for constant ints (i), pointers (p) and const pointers (c).
    675   return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
    676 }
    677 
    678 // Generate the string "(argtype a, argtype b, ...)"
    679 static std::string GenArgs(const std::string &proto, StringRef typestr) {
    680   bool define = UseMacro(proto);
    681   char arg = 'a';
    682 
    683   std::string s;
    684   s += "(";
    685 
    686   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    687     if (define) {
    688       // Some macro arguments are used directly instead of being assigned
    689       // to local temporaries; prepend an underscore prefix to make their
    690       // names consistent with the local temporaries.
    691       if (MacroArgUsedDirectly(proto, i))
    692         s += "__";
    693     } else {
    694       s += TypeString(proto[i], typestr) + " __";
    695     }
    696     s.push_back(arg);
    697     if ((i + 1) < e)
    698       s += ", ";
    699   }
    700 
    701   s += ")";
    702   return s;
    703 }
    704 
    705 // Macro arguments are not type-checked like inline function arguments, so
    706 // assign them to local temporaries to get the right type checking.
    707 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
    708   char arg = 'a';
    709   std::string s;
    710   bool generatedLocal = false;
    711 
    712   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    713     // Do not create a temporary for an immediate argument.
    714     // That would defeat the whole point of using a macro!
    715     if (MacroArgUsedDirectly(proto, i))
    716       continue;
    717     generatedLocal = true;
    718 
    719     s += TypeString(proto[i], typestr) + " __";
    720     s.push_back(arg);
    721     s += " = (";
    722     s.push_back(arg);
    723     s += "); ";
    724   }
    725 
    726   if (generatedLocal)
    727     s += "\\\n  ";
    728   return s;
    729 }
    730 
    731 // Use the vmovl builtin to sign-extend or zero-extend a vector.
    732 static std::string Extend(StringRef typestr, const std::string &a) {
    733   std::string s;
    734   s = MangleName("vmovl", typestr, ClassS);
    735   s += "(" + a + ")";
    736   return s;
    737 }
    738 
    739 static std::string Duplicate(unsigned nElts, StringRef typestr,
    740                              const std::string &a) {
    741   std::string s;
    742 
    743   s = "(" + TypeString('d', typestr) + "){ ";
    744   for (unsigned i = 0; i != nElts; ++i) {
    745     s += a;
    746     if ((i + 1) < nElts)
    747       s += ", ";
    748   }
    749   s += " }";
    750 
    751   return s;
    752 }
    753 
    754 static std::string SplatLane(unsigned nElts, const std::string &vec,
    755                              const std::string &lane) {
    756   std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
    757   for (unsigned i = 0; i < nElts; ++i)
    758     s += ", " + lane;
    759   s += ")";
    760   return s;
    761 }
    762 
    763 static unsigned GetNumElements(StringRef typestr, bool &quad) {
    764   quad = false;
    765   bool dummy = false;
    766   char type = ClassifyType(typestr, quad, dummy, dummy);
    767   unsigned nElts = 0;
    768   switch (type) {
    769   case 'c': nElts = 8; break;
    770   case 's': nElts = 4; break;
    771   case 'i': nElts = 2; break;
    772   case 'l': nElts = 1; break;
    773   case 'h': nElts = 4; break;
    774   case 'f': nElts = 2; break;
    775   default:
    776     throw "unhandled type!";
    777   }
    778   if (quad) nElts <<= 1;
    779   return nElts;
    780 }
    781 
    782 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
    783 static std::string GenOpString(OpKind op, const std::string &proto,
    784                                StringRef typestr) {
    785   bool quad;
    786   unsigned nElts = GetNumElements(typestr, quad);
    787   bool define = UseMacro(proto);
    788 
    789   std::string ts = TypeString(proto[0], typestr);
    790   std::string s;
    791   if (!define) {
    792     s = "return ";
    793   }
    794 
    795   switch(op) {
    796   case OpAdd:
    797     s += "__a + __b;";
    798     break;
    799   case OpAddl:
    800     s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
    801     break;
    802   case OpAddw:
    803     s += "__a + " + Extend(typestr, "__b") + ";";
    804     break;
    805   case OpSub:
    806     s += "__a - __b;";
    807     break;
    808   case OpSubl:
    809     s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
    810     break;
    811   case OpSubw:
    812     s += "__a - " + Extend(typestr, "__b") + ";";
    813     break;
    814   case OpMulN:
    815     s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
    816     break;
    817   case OpMulLane:
    818     s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
    819     break;
    820   case OpMul:
    821     s += "__a * __b;";
    822     break;
    823   case OpMullLane:
    824     s += MangleName("vmull", typestr, ClassS) + "(__a, " +
    825       SplatLane(nElts, "__b", "__c") + ");";
    826     break;
    827   case OpMlaN:
    828     s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
    829     break;
    830   case OpMlaLane:
    831     s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
    832     break;
    833   case OpMla:
    834     s += "__a + (__b * __c);";
    835     break;
    836   case OpMlalN:
    837     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
    838       Duplicate(nElts, typestr, "__c") + ");";
    839     break;
    840   case OpMlalLane:
    841     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
    842       SplatLane(nElts, "__c", "__d") + ");";
    843     break;
    844   case OpMlal:
    845     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
    846     break;
    847   case OpMlsN:
    848     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
    849     break;
    850   case OpMlsLane:
    851     s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
    852     break;
    853   case OpMls:
    854     s += "__a - (__b * __c);";
    855     break;
    856   case OpMlslN:
    857     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
    858       Duplicate(nElts, typestr, "__c") + ");";
    859     break;
    860   case OpMlslLane:
    861     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
    862       SplatLane(nElts, "__c", "__d") + ");";
    863     break;
    864   case OpMlsl:
    865     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
    866     break;
    867   case OpQDMullLane:
    868     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
    869       SplatLane(nElts, "__b", "__c") + ");";
    870     break;
    871   case OpQDMlalLane:
    872     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
    873       SplatLane(nElts, "__c", "__d") + ");";
    874     break;
    875   case OpQDMlslLane:
    876     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
    877       SplatLane(nElts, "__c", "__d") + ");";
    878     break;
    879   case OpQDMulhLane:
    880     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
    881       SplatLane(nElts, "__b", "__c") + ");";
    882     break;
    883   case OpQRDMulhLane:
    884     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
    885       SplatLane(nElts, "__b", "__c") + ");";
    886     break;
    887   case OpEq:
    888     s += "(" + ts + ")(__a == __b);";
    889     break;
    890   case OpGe:
    891     s += "(" + ts + ")(__a >= __b);";
    892     break;
    893   case OpLe:
    894     s += "(" + ts + ")(__a <= __b);";
    895     break;
    896   case OpGt:
    897     s += "(" + ts + ")(__a > __b);";
    898     break;
    899   case OpLt:
    900     s += "(" + ts + ")(__a < __b);";
    901     break;
    902   case OpNeg:
    903     s += " -__a;";
    904     break;
    905   case OpNot:
    906     s += " ~__a;";
    907     break;
    908   case OpAnd:
    909     s += "__a & __b;";
    910     break;
    911   case OpOr:
    912     s += "__a | __b;";
    913     break;
    914   case OpXor:
    915     s += "__a ^ __b;";
    916     break;
    917   case OpAndNot:
    918     s += "__a & ~__b;";
    919     break;
    920   case OpOrNot:
    921     s += "__a | ~__b;";
    922     break;
    923   case OpCast:
    924     s += "(" + ts + ")__a;";
    925     break;
    926   case OpConcat:
    927     s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
    928     s += ", (int64x1_t)__b, 0, 1);";
    929     break;
    930   case OpHi:
    931     s += "(" + ts +
    932       ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);";
    933     break;
    934   case OpLo:
    935     s += "(" + ts +
    936       ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);";
    937     break;
    938   case OpDup:
    939     s += Duplicate(nElts, typestr, "__a") + ";";
    940     break;
    941   case OpDupLane:
    942     s += SplatLane(nElts, "__a", "__b") + ";";
    943     break;
    944   case OpSelect:
    945     // ((0 & 1) | (~0 & 2))
    946     s += "(" + ts + ")";
    947     ts = TypeString(proto[1], typestr);
    948     s += "((__a & (" + ts + ")__b) | ";
    949     s += "(~__a & (" + ts + ")__c));";
    950     break;
    951   case OpRev16:
    952     s += "__builtin_shufflevector(__a, __a";
    953     for (unsigned i = 2; i <= nElts; i += 2)
    954       for (unsigned j = 0; j != 2; ++j)
    955         s += ", " + utostr(i - j - 1);
    956     s += ");";
    957     break;
    958   case OpRev32: {
    959     unsigned WordElts = nElts >> (1 + (int)quad);
    960     s += "__builtin_shufflevector(__a, __a";
    961     for (unsigned i = WordElts; i <= nElts; i += WordElts)
    962       for (unsigned j = 0; j != WordElts; ++j)
    963         s += ", " + utostr(i - j - 1);
    964     s += ");";
    965     break;
    966   }
    967   case OpRev64: {
    968     unsigned DblWordElts = nElts >> (int)quad;
    969     s += "__builtin_shufflevector(__a, __a";
    970     for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
    971       for (unsigned j = 0; j != DblWordElts; ++j)
    972         s += ", " + utostr(i - j - 1);
    973     s += ");";
    974     break;
    975   }
    976   case OpAbdl: {
    977     std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
    978     if (typestr[0] != 'U') {
    979       // vabd results are always unsigned and must be zero-extended.
    980       std::string utype = "U" + typestr.str();
    981       s += "(" + TypeString(proto[0], typestr) + ")";
    982       abd = "(" + TypeString('d', utype) + ")" + abd;
    983       s += Extend(utype, abd) + ";";
    984     } else {
    985       s += Extend(typestr, abd) + ";";
    986     }
    987     break;
    988   }
    989   case OpAba:
    990     s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
    991     break;
    992   case OpAbal: {
    993     s += "__a + ";
    994     std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
    995     if (typestr[0] != 'U') {
    996       // vabd results are always unsigned and must be zero-extended.
    997       std::string utype = "U" + typestr.str();
    998       s += "(" + TypeString(proto[0], typestr) + ")";
    999       abd = "(" + TypeString('d', utype) + ")" + abd;
   1000       s += Extend(utype, abd) + ";";
   1001     } else {
   1002       s += Extend(typestr, abd) + ";";
   1003     }
   1004     break;
   1005   }
   1006   default:
   1007     throw "unknown OpKind!";
   1008   }
   1009   return s;
   1010 }
   1011 
   1012 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
   1013   unsigned mod = proto[0];
   1014 
   1015   if (mod == 'v' || mod == 'f')
   1016     mod = proto[1];
   1017 
   1018   bool quad = false;
   1019   bool poly = false;
   1020   bool usgn = false;
   1021   bool scal = false;
   1022   bool cnst = false;
   1023   bool pntr = false;
   1024 
   1025   // Base type to get the type string for.
   1026   char type = ClassifyType(typestr, quad, poly, usgn);
   1027 
   1028   // Based on the modifying character, change the type and width if necessary.
   1029   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
   1030 
   1031   NeonTypeFlags::EltType ET;
   1032   switch (type) {
   1033     case 'c':
   1034       ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
   1035       break;
   1036     case 's':
   1037       ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
   1038       break;
   1039     case 'i':
   1040       ET = NeonTypeFlags::Int32;
   1041       break;
   1042     case 'l':
   1043       ET = NeonTypeFlags::Int64;
   1044       break;
   1045     case 'h':
   1046       ET = NeonTypeFlags::Float16;
   1047       break;
   1048     case 'f':
   1049       ET = NeonTypeFlags::Float32;
   1050       break;
   1051     default:
   1052       throw "unhandled type!";
   1053   }
   1054   NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
   1055   return Flags.getFlags();
   1056 }
   1057 
   1058 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
   1059 static std::string GenBuiltin(const std::string &name, const std::string &proto,
   1060                               StringRef typestr, ClassKind ck) {
   1061   std::string s;
   1062 
   1063   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
   1064   // sret-like argument.
   1065   bool sret = (proto[0] >= '2' && proto[0] <= '4');
   1066 
   1067   bool define = UseMacro(proto);
   1068 
   1069   // Check if the prototype has a scalar operand with the type of the vector
   1070   // elements.  If not, bitcasting the args will take care of arg checking.
   1071   // The actual signedness etc. will be taken care of with special enums.
   1072   if (proto.find('s') == std::string::npos)
   1073     ck = ClassB;
   1074 
   1075   if (proto[0] != 'v') {
   1076     std::string ts = TypeString(proto[0], typestr);
   1077 
   1078     if (define) {
   1079       if (sret)
   1080         s += ts + " r; ";
   1081       else
   1082         s += "(" + ts + ")";
   1083     } else if (sret) {
   1084       s += ts + " r; ";
   1085     } else {
   1086       s += "return (" + ts + ")";
   1087     }
   1088   }
   1089 
   1090   bool splat = proto.find('a') != std::string::npos;
   1091 
   1092   s += "__builtin_neon_";
   1093   if (splat) {
   1094     // Call the non-splat builtin: chop off the "_n" suffix from the name.
   1095     std::string vname(name, 0, name.size()-2);
   1096     s += MangleName(vname, typestr, ck);
   1097   } else {
   1098     s += MangleName(name, typestr, ck);
   1099   }
   1100   s += "(";
   1101 
   1102   // Pass the address of the return variable as the first argument to sret-like
   1103   // builtins.
   1104   if (sret)
   1105     s += "&r, ";
   1106 
   1107   char arg = 'a';
   1108   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
   1109     std::string args = std::string(&arg, 1);
   1110 
   1111     // Use the local temporaries instead of the macro arguments.
   1112     args = "__" + args;
   1113 
   1114     bool argQuad = false;
   1115     bool argPoly = false;
   1116     bool argUsgn = false;
   1117     bool argScalar = false;
   1118     bool dummy = false;
   1119     char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
   1120     argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
   1121                       dummy, dummy);
   1122 
   1123     // Handle multiple-vector values specially, emitting each subvector as an
   1124     // argument to the __builtin.
   1125     if (proto[i] >= '2' && proto[i] <= '4') {
   1126       // Check if an explicit cast is needed.
   1127       if (argType != 'c' || argPoly || argUsgn)
   1128         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
   1129 
   1130       for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
   1131         s += args + ".val[" + utostr(vi) + "]";
   1132         if ((vi + 1) < ve)
   1133           s += ", ";
   1134       }
   1135       if ((i + 1) < e)
   1136         s += ", ";
   1137 
   1138       continue;
   1139     }
   1140 
   1141     if (splat && (i + 1) == e)
   1142       args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
   1143 
   1144     // Check if an explicit cast is needed.
   1145     if ((splat || !argScalar) &&
   1146         ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
   1147       std::string argTypeStr = "c";
   1148       if (ck != ClassB)
   1149         argTypeStr = argType;
   1150       if (argQuad)
   1151         argTypeStr = "Q" + argTypeStr;
   1152       args = "(" + TypeString('d', argTypeStr) + ")" + args;
   1153     }
   1154 
   1155     s += args;
   1156     if ((i + 1) < e)
   1157       s += ", ";
   1158   }
   1159 
   1160   // Extra constant integer to hold type class enum for this function, e.g. s8
   1161   if (ck == ClassB)
   1162     s += ", " + utostr(GetNeonEnum(proto, typestr));
   1163 
   1164   s += ");";
   1165 
   1166   if (proto[0] != 'v' && sret) {
   1167     if (define)
   1168       s += " r;";
   1169     else
   1170       s += " return r;";
   1171   }
   1172   return s;
   1173 }
   1174 
   1175 static std::string GenBuiltinDef(const std::string &name,
   1176                                  const std::string &proto,
   1177                                  StringRef typestr, ClassKind ck) {
   1178   std::string s("BUILTIN(__builtin_neon_");
   1179 
   1180   // If all types are the same size, bitcasting the args will take care
   1181   // of arg checking.  The actual signedness etc. will be taken care of with
   1182   // special enums.
   1183   if (proto.find('s') == std::string::npos)
   1184     ck = ClassB;
   1185 
   1186   s += MangleName(name, typestr, ck);
   1187   s += ", \"";
   1188 
   1189   for (unsigned i = 0, e = proto.size(); i != e; ++i)
   1190     s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
   1191 
   1192   // Extra constant integer to hold type class enum for this function, e.g. s8
   1193   if (ck == ClassB)
   1194     s += "i";
   1195 
   1196   s += "\", \"n\")";
   1197   return s;
   1198 }
   1199 
   1200 static std::string GenIntrinsic(const std::string &name,
   1201                                 const std::string &proto,
   1202                                 StringRef outTypeStr, StringRef inTypeStr,
   1203                                 OpKind kind, ClassKind classKind) {
   1204   assert(!proto.empty() && "");
   1205   bool define = UseMacro(proto) && kind != OpUnavailable;
   1206   std::string s;
   1207 
   1208   // static always inline + return type
   1209   if (define)
   1210     s += "#define ";
   1211   else
   1212     s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
   1213 
   1214   // Function name with type suffix
   1215   std::string mangledName = MangleName(name, outTypeStr, ClassS);
   1216   if (outTypeStr != inTypeStr) {
   1217     // If the input type is different (e.g., for vreinterpret), append a suffix
   1218     // for the input type.  String off a "Q" (quad) prefix so that MangleName
   1219     // does not insert another "q" in the name.
   1220     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
   1221     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
   1222     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
   1223   }
   1224   s += mangledName;
   1225 
   1226   // Function arguments
   1227   s += GenArgs(proto, inTypeStr);
   1228 
   1229   // Definition.
   1230   if (define) {
   1231     s += " __extension__ ({ \\\n  ";
   1232     s += GenMacroLocals(proto, inTypeStr);
   1233   } else if (kind == OpUnavailable) {
   1234     s += " __attribute__((unavailable));\n";
   1235     return s;
   1236   } else
   1237     s += " {\n  ";
   1238 
   1239   if (kind != OpNone)
   1240     s += GenOpString(kind, proto, outTypeStr);
   1241   else
   1242     s += GenBuiltin(name, proto, outTypeStr, classKind);
   1243   if (define)
   1244     s += " })";
   1245   else
   1246     s += " }";
   1247   s += "\n";
   1248   return s;
   1249 }
   1250 
   1251 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
   1252 /// is comprised of type definitions and function declarations.
   1253 void NeonEmitter::run(raw_ostream &OS) {
   1254   OS <<
   1255     "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
   1256     "---===\n"
   1257     " *\n"
   1258     " * Permission is hereby granted, free of charge, to any person obtaining "
   1259     "a copy\n"
   1260     " * of this software and associated documentation files (the \"Software\"),"
   1261     " to deal\n"
   1262     " * in the Software without restriction, including without limitation the "
   1263     "rights\n"
   1264     " * to use, copy, modify, merge, publish, distribute, sublicense, "
   1265     "and/or sell\n"
   1266     " * copies of the Software, and to permit persons to whom the Software is\n"
   1267     " * furnished to do so, subject to the following conditions:\n"
   1268     " *\n"
   1269     " * The above copyright notice and this permission notice shall be "
   1270     "included in\n"
   1271     " * all copies or substantial portions of the Software.\n"
   1272     " *\n"
   1273     " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
   1274     "EXPRESS OR\n"
   1275     " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
   1276     "MERCHANTABILITY,\n"
   1277     " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
   1278     "SHALL THE\n"
   1279     " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
   1280     "OTHER\n"
   1281     " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
   1282     "ARISING FROM,\n"
   1283     " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
   1284     "DEALINGS IN\n"
   1285     " * THE SOFTWARE.\n"
   1286     " *\n"
   1287     " *===--------------------------------------------------------------------"
   1288     "---===\n"
   1289     " */\n\n";
   1290 
   1291   OS << "#ifndef __ARM_NEON_H\n";
   1292   OS << "#define __ARM_NEON_H\n\n";
   1293 
   1294   OS << "#ifndef __ARM_NEON__\n";
   1295   OS << "#error \"NEON support not enabled\"\n";
   1296   OS << "#endif\n\n";
   1297 
   1298   OS << "#include <stdint.h>\n\n";
   1299 
   1300   // Emit NEON-specific scalar typedefs.
   1301   OS << "typedef float float32_t;\n";
   1302   OS << "typedef int8_t poly8_t;\n";
   1303   OS << "typedef int16_t poly16_t;\n";
   1304   OS << "typedef uint16_t float16_t;\n";
   1305 
   1306   // Emit Neon vector typedefs.
   1307   std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs");
   1308   SmallVector<StringRef, 24> TDTypeVec;
   1309   ParseTypes(0, TypedefTypes, TDTypeVec);
   1310 
   1311   // Emit vector typedefs.
   1312   for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
   1313     bool dummy, quad = false, poly = false;
   1314     (void) ClassifyType(TDTypeVec[i], quad, poly, dummy);
   1315     if (poly)
   1316       OS << "typedef __attribute__((neon_polyvector_type(";
   1317     else
   1318       OS << "typedef __attribute__((neon_vector_type(";
   1319 
   1320     unsigned nElts = GetNumElements(TDTypeVec[i], quad);
   1321     OS << utostr(nElts) << "))) ";
   1322     if (nElts < 10)
   1323       OS << " ";
   1324 
   1325     OS << TypeString('s', TDTypeVec[i]);
   1326     OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
   1327   }
   1328   OS << "\n";
   1329 
   1330   // Emit struct typedefs.
   1331   for (unsigned vi = 2; vi != 5; ++vi) {
   1332     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
   1333       std::string ts = TypeString('d', TDTypeVec[i]);
   1334       std::string vs = TypeString('0' + vi, TDTypeVec[i]);
   1335       OS << "typedef struct " << vs << " {\n";
   1336       OS << "  " << ts << " val";
   1337       OS << "[" << utostr(vi) << "]";
   1338       OS << ";\n} ";
   1339       OS << vs << ";\n\n";
   1340     }
   1341   }
   1342 
   1343   OS<<"#define __ai static __attribute__((__always_inline__, __nodebug__))\n\n";
   1344 
   1345   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
   1346 
   1347   // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
   1348   // intrinsics.  (Some of the saturating multiply instructions are also
   1349   // used to implement the corresponding "_lane" variants, but tablegen
   1350   // sorts the records into alphabetical order so that the "_lane" variants
   1351   // come after the intrinsics they use.)
   1352   emitIntrinsic(OS, Records.getDef("VMOVL"));
   1353   emitIntrinsic(OS, Records.getDef("VMULL"));
   1354   emitIntrinsic(OS, Records.getDef("VABD"));
   1355 
   1356   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   1357     Record *R = RV[i];
   1358     if (R->getName() != "VMOVL" &&
   1359         R->getName() != "VMULL" &&
   1360         R->getName() != "VABD")
   1361       emitIntrinsic(OS, R);
   1362   }
   1363 
   1364   OS << "#undef __ai\n\n";
   1365   OS << "#endif /* __ARM_NEON_H */\n";
   1366 }
   1367 
   1368 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
   1369 /// intrinsics specified by record R.
   1370 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
   1371   std::string name = R->getValueAsString("Name");
   1372   std::string Proto = R->getValueAsString("Prototype");
   1373   std::string Types = R->getValueAsString("Types");
   1374 
   1375   SmallVector<StringRef, 16> TypeVec;
   1376   ParseTypes(R, Types, TypeVec);
   1377 
   1378   OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
   1379 
   1380   ClassKind classKind = ClassNone;
   1381   if (R->getSuperClasses().size() >= 2)
   1382     classKind = ClassMap[R->getSuperClasses()[1]];
   1383   if (classKind == ClassNone && kind == OpNone)
   1384     throw TGError(R->getLoc(), "Builtin has no class kind");
   1385 
   1386   for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   1387     if (kind == OpReinterpret) {
   1388       bool outQuad = false;
   1389       bool dummy = false;
   1390       (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
   1391       for (unsigned srcti = 0, srcte = TypeVec.size();
   1392            srcti != srcte; ++srcti) {
   1393         bool inQuad = false;
   1394         (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
   1395         if (srcti == ti || inQuad != outQuad)
   1396           continue;
   1397         OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
   1398                            OpCast, ClassS);
   1399       }
   1400     } else {
   1401       OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti],
   1402                          kind, classKind);
   1403     }
   1404   }
   1405   OS << "\n";
   1406 }
   1407 
   1408 static unsigned RangeFromType(const char mod, StringRef typestr) {
   1409   // base type to get the type string for.
   1410   bool quad = false, dummy = false;
   1411   char type = ClassifyType(typestr, quad, dummy, dummy);
   1412   type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
   1413 
   1414   switch (type) {
   1415     case 'c':
   1416       return (8 << (int)quad) - 1;
   1417     case 'h':
   1418     case 's':
   1419       return (4 << (int)quad) - 1;
   1420     case 'f':
   1421     case 'i':
   1422       return (2 << (int)quad) - 1;
   1423     case 'l':
   1424       return (1 << (int)quad) - 1;
   1425     default:
   1426       throw "unhandled type!";
   1427   }
   1428 }
   1429 
   1430 /// runHeader - Emit a file with sections defining:
   1431 /// 1. the NEON section of BuiltinsARM.def.
   1432 /// 2. the SemaChecking code for the type overload checking.
   1433 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
   1434 void NeonEmitter::runHeader(raw_ostream &OS) {
   1435   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
   1436 
   1437   StringMap<OpKind> EmittedMap;
   1438 
   1439   // Generate BuiltinsARM.def for NEON
   1440   OS << "#ifdef GET_NEON_BUILTINS\n";
   1441   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   1442     Record *R = RV[i];
   1443     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
   1444     if (k != OpNone)
   1445       continue;
   1446 
   1447     std::string Proto = R->getValueAsString("Prototype");
   1448 
   1449     // Functions with 'a' (the splat code) in the type prototype should not get
   1450     // their own builtin as they use the non-splat variant.
   1451     if (Proto.find('a') != std::string::npos)
   1452       continue;
   1453 
   1454     std::string Types = R->getValueAsString("Types");
   1455     SmallVector<StringRef, 16> TypeVec;
   1456     ParseTypes(R, Types, TypeVec);
   1457 
   1458     if (R->getSuperClasses().size() < 2)
   1459       throw TGError(R->getLoc(), "Builtin has no class kind");
   1460 
   1461     std::string name = R->getValueAsString("Name");
   1462     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
   1463 
   1464     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   1465       // Generate the BuiltinsARM.def declaration for this builtin, ensuring
   1466       // that each unique BUILTIN() macro appears only once in the output
   1467       // stream.
   1468       std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
   1469       if (EmittedMap.count(bd))
   1470         continue;
   1471 
   1472       EmittedMap[bd] = OpNone;
   1473       OS << bd << "\n";
   1474     }
   1475   }
   1476   OS << "#endif\n\n";
   1477 
   1478   // Generate the overloaded type checking code for SemaChecking.cpp
   1479   OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
   1480   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   1481     Record *R = RV[i];
   1482     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
   1483     if (k != OpNone)
   1484       continue;
   1485 
   1486     std::string Proto = R->getValueAsString("Prototype");
   1487     std::string Types = R->getValueAsString("Types");
   1488     std::string name = R->getValueAsString("Name");
   1489 
   1490     // Functions with 'a' (the splat code) in the type prototype should not get
   1491     // their own builtin as they use the non-splat variant.
   1492     if (Proto.find('a') != std::string::npos)
   1493       continue;
   1494 
   1495     // Functions which have a scalar argument cannot be overloaded, no need to
   1496     // check them if we are emitting the type checking code.
   1497     if (Proto.find('s') != std::string::npos)
   1498       continue;
   1499 
   1500     SmallVector<StringRef, 16> TypeVec;
   1501     ParseTypes(R, Types, TypeVec);
   1502 
   1503     if (R->getSuperClasses().size() < 2)
   1504       throw TGError(R->getLoc(), "Builtin has no class kind");
   1505 
   1506     int si = -1, qi = -1;
   1507     uint64_t mask = 0, qmask = 0;
   1508     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   1509       // Generate the switch case(s) for this builtin for the type validation.
   1510       bool quad = false, poly = false, usgn = false;
   1511       (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
   1512 
   1513       if (quad) {
   1514         qi = ti;
   1515         qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
   1516       } else {
   1517         si = ti;
   1518         mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
   1519       }
   1520     }
   1521 
   1522     // Check if the builtin function has a pointer or const pointer argument.
   1523     int PtrArgNum = -1;
   1524     bool HasConstPtr = false;
   1525     for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
   1526       char ArgType = Proto[arg];
   1527       if (ArgType == 'c') {
   1528         HasConstPtr = true;
   1529         PtrArgNum = arg - 1;
   1530         break;
   1531       }
   1532       if (ArgType == 'p') {
   1533         PtrArgNum = arg - 1;
   1534         break;
   1535       }
   1536     }
   1537     // For sret builtins, adjust the pointer argument index.
   1538     if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
   1539       PtrArgNum += 1;
   1540 
   1541     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
   1542     // and vst1_lane intrinsics.  Using a pointer to the vector element
   1543     // type with one of those operations causes codegen to select an aligned
   1544     // load/store instruction.  If you want an unaligned operation,
   1545     // the pointer argument needs to have less alignment than element type,
   1546     // so just accept any pointer type.
   1547     if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
   1548       PtrArgNum = -1;
   1549       HasConstPtr = false;
   1550     }
   1551 
   1552     if (mask) {
   1553       OS << "case ARM::BI__builtin_neon_"
   1554          << MangleName(name, TypeVec[si], ClassB)
   1555          << ": mask = " << "0x" << utohexstr(mask) << "ULL";
   1556       if (PtrArgNum >= 0)
   1557         OS << "; PtrArgNum = " << PtrArgNum;
   1558       if (HasConstPtr)
   1559         OS << "; HasConstPtr = true";
   1560       OS << "; break;\n";
   1561     }
   1562     if (qmask) {
   1563       OS << "case ARM::BI__builtin_neon_"
   1564          << MangleName(name, TypeVec[qi], ClassB)
   1565          << ": mask = " << "0x" << utohexstr(qmask) << "ULL";
   1566       if (PtrArgNum >= 0)
   1567         OS << "; PtrArgNum = " << PtrArgNum;
   1568       if (HasConstPtr)
   1569         OS << "; HasConstPtr = true";
   1570       OS << "; break;\n";
   1571     }
   1572   }
   1573   OS << "#endif\n\n";
   1574 
   1575   // Generate the intrinsic range checking code for shift/lane immediates.
   1576   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
   1577   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   1578     Record *R = RV[i];
   1579 
   1580     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
   1581     if (k != OpNone)
   1582       continue;
   1583 
   1584     std::string name = R->getValueAsString("Name");
   1585     std::string Proto = R->getValueAsString("Prototype");
   1586     std::string Types = R->getValueAsString("Types");
   1587 
   1588     // Functions with 'a' (the splat code) in the type prototype should not get
   1589     // their own builtin as they use the non-splat variant.
   1590     if (Proto.find('a') != std::string::npos)
   1591       continue;
   1592 
   1593     // Functions which do not have an immediate do not need to have range
   1594     // checking code emitted.
   1595     size_t immPos = Proto.find('i');
   1596     if (immPos == std::string::npos)
   1597       continue;
   1598 
   1599     SmallVector<StringRef, 16> TypeVec;
   1600     ParseTypes(R, Types, TypeVec);
   1601 
   1602     if (R->getSuperClasses().size() < 2)
   1603       throw TGError(R->getLoc(), "Builtin has no class kind");
   1604 
   1605     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
   1606 
   1607     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   1608       std::string namestr, shiftstr, rangestr;
   1609 
   1610       if (R->getValueAsBit("isVCVT_N")) {
   1611         // VCVT between floating- and fixed-point values takes an immediate
   1612         // in the range 1 to 32.
   1613         ck = ClassB;
   1614         rangestr = "l = 1; u = 31"; // upper bound = l + u
   1615       } else if (Proto.find('s') == std::string::npos) {
   1616         // Builtins which are overloaded by type will need to have their upper
   1617         // bound computed at Sema time based on the type constant.
   1618         ck = ClassB;
   1619         if (R->getValueAsBit("isShift")) {
   1620           shiftstr = ", true";
   1621 
   1622           // Right shifts have an 'r' in the name, left shifts do not.
   1623           if (name.find('r') != std::string::npos)
   1624             rangestr = "l = 1; ";
   1625         }
   1626         rangestr += "u = RFT(TV" + shiftstr + ")";
   1627       } else {
   1628         // The immediate generally refers to a lane in the preceding argument.
   1629         assert(immPos > 0 && "unexpected immediate operand");
   1630         rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti]));
   1631       }
   1632       // Make sure cases appear only once by uniquing them in a string map.
   1633       namestr = MangleName(name, TypeVec[ti], ck);
   1634       if (EmittedMap.count(namestr))
   1635         continue;
   1636       EmittedMap[namestr] = OpNone;
   1637 
   1638       // Calculate the index of the immediate that should be range checked.
   1639       unsigned immidx = 0;
   1640 
   1641       // Builtins that return a struct of multiple vectors have an extra
   1642       // leading arg for the struct return.
   1643       if (Proto[0] >= '2' && Proto[0] <= '4')
   1644         ++immidx;
   1645 
   1646       // Add one to the index for each argument until we reach the immediate
   1647       // to be checked.  Structs of vectors are passed as multiple arguments.
   1648       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
   1649         switch (Proto[ii]) {
   1650           default:  immidx += 1; break;
   1651           case '2': immidx += 2; break;
   1652           case '3': immidx += 3; break;
   1653           case '4': immidx += 4; break;
   1654           case 'i': ie = ii + 1; break;
   1655         }
   1656       }
   1657       OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck)
   1658          << ": i = " << immidx << "; " << rangestr << "; break;\n";
   1659     }
   1660   }
   1661   OS << "#endif\n\n";
   1662 }
   1663 
   1664 /// GenTest - Write out a test for the intrinsic specified by the name and
   1665 /// type strings, including the embedded patterns for FileCheck to match.
   1666 static std::string GenTest(const std::string &name,
   1667                            const std::string &proto,
   1668                            StringRef outTypeStr, StringRef inTypeStr,
   1669                            bool isShift) {
   1670   assert(!proto.empty() && "");
   1671   std::string s;
   1672 
   1673   // Function name with type suffix
   1674   std::string mangledName = MangleName(name, outTypeStr, ClassS);
   1675   if (outTypeStr != inTypeStr) {
   1676     // If the input type is different (e.g., for vreinterpret), append a suffix
   1677     // for the input type.  String off a "Q" (quad) prefix so that MangleName
   1678     // does not insert another "q" in the name.
   1679     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
   1680     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
   1681     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
   1682   }
   1683 
   1684   // Emit the FileCheck patterns.
   1685   s += "// CHECK: test_" + mangledName + "\n";
   1686   // s += "// CHECK: \n"; // FIXME: + expected instruction opcode.
   1687 
   1688   // Emit the start of the test function.
   1689   s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
   1690   char arg = 'a';
   1691   std::string comma;
   1692   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
   1693     // Do not create arguments for values that must be immediate constants.
   1694     if (proto[i] == 'i')
   1695       continue;
   1696     s += comma + TypeString(proto[i], inTypeStr) + " ";
   1697     s.push_back(arg);
   1698     comma = ", ";
   1699   }
   1700   s += ") {\n  ";
   1701 
   1702   if (proto[0] != 'v')
   1703     s += "return ";
   1704   s += mangledName + "(";
   1705   arg = 'a';
   1706   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
   1707     if (proto[i] == 'i') {
   1708       // For immediate operands, test the maximum value.
   1709       if (isShift)
   1710         s += "1"; // FIXME
   1711       else
   1712         // The immediate generally refers to a lane in the preceding argument.
   1713         s += utostr(RangeFromType(proto[i-1], inTypeStr));
   1714     } else {
   1715       s.push_back(arg);
   1716     }
   1717     if ((i + 1) < e)
   1718       s += ", ";
   1719   }
   1720   s += ");\n}\n\n";
   1721   return s;
   1722 }
   1723 
   1724 /// runTests - Write out a complete set of tests for all of the Neon
   1725 /// intrinsics.
   1726 void NeonEmitter::runTests(raw_ostream &OS) {
   1727   OS <<
   1728     "// RUN: %clang_cc1 -triple thumbv7-apple-darwin \\\n"
   1729     "// RUN:  -target-cpu cortex-a9 -ffreestanding -S -o - %s | FileCheck %s\n"
   1730     "\n"
   1731     "#include <arm_neon.h>\n"
   1732     "\n";
   1733 
   1734   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
   1735   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   1736     Record *R = RV[i];
   1737     std::string name = R->getValueAsString("Name");
   1738     std::string Proto = R->getValueAsString("Prototype");
   1739     std::string Types = R->getValueAsString("Types");
   1740     bool isShift = R->getValueAsBit("isShift");
   1741 
   1742     SmallVector<StringRef, 16> TypeVec;
   1743     ParseTypes(R, Types, TypeVec);
   1744 
   1745     OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
   1746     if (kind == OpUnavailable)
   1747       continue;
   1748     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   1749       if (kind == OpReinterpret) {
   1750         bool outQuad = false;
   1751         bool dummy = false;
   1752         (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
   1753         for (unsigned srcti = 0, srcte = TypeVec.size();
   1754              srcti != srcte; ++srcti) {
   1755           bool inQuad = false;
   1756           (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
   1757           if (srcti == ti || inQuad != outQuad)
   1758             continue;
   1759           OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], isShift);
   1760         }
   1761       } else {
   1762         OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift);
   1763       }
   1764     }
   1765     OS << "\n";
   1766   }
   1767 }
   1768 
   1769 namespace clang {
   1770 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
   1771   NeonEmitter(Records).run(OS);
   1772 }
   1773 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
   1774   NeonEmitter(Records).runHeader(OS);
   1775 }
   1776 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
   1777   NeonEmitter(Records).runTests(OS);
   1778 }
   1779 } // End namespace clang
   1780