Home | History | Annotate | Download | only in TableGen
      1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
     11 // a declaration and definition of each function specified by the ARM NEON
     12 // compiler interface.  See ARM document DUI0348B.
     13 //
     14 // Each NEON instruction is implemented in terms of 1 or more functions which
     15 // are suffixed with the element type of the input vectors.  Functions may be
     16 // implemented in terms of generic vector operations such as +, *, -, etc. or
     17 // by calling a __builtin_-prefixed function which will be handled by clang's
     18 // CodeGen library.
     19 //
     20 // Additional validation code can be generated by this file when runHeader() is
     21 // called, rather than the normal run() entry point.  A complete set of tests
     22 // for Neon intrinsics can be generated by calling the runTests() entry point.
     23 //
     24 //===----------------------------------------------------------------------===//
     25 
     26 #include "llvm/ADT/DenseMap.h"
     27 #include "llvm/ADT/SmallString.h"
     28 #include "llvm/ADT/SmallVector.h"
     29 #include "llvm/ADT/StringExtras.h"
     30 #include "llvm/ADT/StringMap.h"
     31 #include "llvm/Support/ErrorHandling.h"
     32 #include "llvm/TableGen/Error.h"
     33 #include "llvm/TableGen/Record.h"
     34 #include "llvm/TableGen/TableGenBackend.h"
     35 #include <string>
     36 using namespace llvm;
     37 
     38 enum OpKind {
     39   OpNone,
     40   OpUnavailable,
     41   OpAdd,
     42   OpAddl,
     43   OpAddw,
     44   OpSub,
     45   OpSubl,
     46   OpSubw,
     47   OpMul,
     48   OpMla,
     49   OpMlal,
     50   OpMls,
     51   OpMlsl,
     52   OpMulN,
     53   OpMlaN,
     54   OpMlsN,
     55   OpMlalN,
     56   OpMlslN,
     57   OpMulLane,
     58   OpMullLane,
     59   OpMlaLane,
     60   OpMlsLane,
     61   OpMlalLane,
     62   OpMlslLane,
     63   OpQDMullLane,
     64   OpQDMlalLane,
     65   OpQDMlslLane,
     66   OpQDMulhLane,
     67   OpQRDMulhLane,
     68   OpEq,
     69   OpGe,
     70   OpLe,
     71   OpGt,
     72   OpLt,
     73   OpNeg,
     74   OpNot,
     75   OpAnd,
     76   OpOr,
     77   OpXor,
     78   OpAndNot,
     79   OpOrNot,
     80   OpCast,
     81   OpConcat,
     82   OpDup,
     83   OpDupLane,
     84   OpHi,
     85   OpLo,
     86   OpSelect,
     87   OpRev16,
     88   OpRev32,
     89   OpRev64,
     90   OpReinterpret,
     91   OpAbdl,
     92   OpAba,
     93   OpAbal
     94 };
     95 
     96 enum ClassKind {
     97   ClassNone,
     98   ClassI,           // generic integer instruction, e.g., "i8" suffix
     99   ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
    100   ClassW,           // width-specific instruction, e.g., "8" suffix
    101   ClassB            // bitcast arguments with enum argument to specify type
    102 };
    103 
    104 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
    105 /// builtins.  These must be kept in sync with the flags in
    106 /// include/clang/Basic/TargetBuiltins.h.
    107 namespace {
    108 class NeonTypeFlags {
    109   enum {
    110     EltTypeMask = 0xf,
    111     UnsignedFlag = 0x10,
    112     QuadFlag = 0x20
    113   };
    114   uint32_t Flags;
    115 
    116 public:
    117   enum EltType {
    118     Int8,
    119     Int16,
    120     Int32,
    121     Int64,
    122     Poly8,
    123     Poly16,
    124     Float16,
    125     Float32
    126   };
    127 
    128   NeonTypeFlags(unsigned F) : Flags(F) {}
    129   NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
    130     if (IsUnsigned)
    131       Flags |= UnsignedFlag;
    132     if (IsQuad)
    133       Flags |= QuadFlag;
    134   }
    135 
    136   uint32_t getFlags() const { return Flags; }
    137 };
    138 } // end anonymous namespace
    139 
    140 namespace {
    141 class NeonEmitter {
    142   RecordKeeper &Records;
    143   StringMap<OpKind> OpMap;
    144   DenseMap<Record*, ClassKind> ClassMap;
    145 
    146 public:
    147   NeonEmitter(RecordKeeper &R) : Records(R) {
    148     OpMap["OP_NONE"]  = OpNone;
    149     OpMap["OP_UNAVAILABLE"] = OpUnavailable;
    150     OpMap["OP_ADD"]   = OpAdd;
    151     OpMap["OP_ADDL"]  = OpAddl;
    152     OpMap["OP_ADDW"]  = OpAddw;
    153     OpMap["OP_SUB"]   = OpSub;
    154     OpMap["OP_SUBL"]  = OpSubl;
    155     OpMap["OP_SUBW"]  = OpSubw;
    156     OpMap["OP_MUL"]   = OpMul;
    157     OpMap["OP_MLA"]   = OpMla;
    158     OpMap["OP_MLAL"]  = OpMlal;
    159     OpMap["OP_MLS"]   = OpMls;
    160     OpMap["OP_MLSL"]  = OpMlsl;
    161     OpMap["OP_MUL_N"] = OpMulN;
    162     OpMap["OP_MLA_N"] = OpMlaN;
    163     OpMap["OP_MLS_N"] = OpMlsN;
    164     OpMap["OP_MLAL_N"] = OpMlalN;
    165     OpMap["OP_MLSL_N"] = OpMlslN;
    166     OpMap["OP_MUL_LN"]= OpMulLane;
    167     OpMap["OP_MULL_LN"] = OpMullLane;
    168     OpMap["OP_MLA_LN"]= OpMlaLane;
    169     OpMap["OP_MLS_LN"]= OpMlsLane;
    170     OpMap["OP_MLAL_LN"] = OpMlalLane;
    171     OpMap["OP_MLSL_LN"] = OpMlslLane;
    172     OpMap["OP_QDMULL_LN"] = OpQDMullLane;
    173     OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
    174     OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
    175     OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
    176     OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
    177     OpMap["OP_EQ"]    = OpEq;
    178     OpMap["OP_GE"]    = OpGe;
    179     OpMap["OP_LE"]    = OpLe;
    180     OpMap["OP_GT"]    = OpGt;
    181     OpMap["OP_LT"]    = OpLt;
    182     OpMap["OP_NEG"]   = OpNeg;
    183     OpMap["OP_NOT"]   = OpNot;
    184     OpMap["OP_AND"]   = OpAnd;
    185     OpMap["OP_OR"]    = OpOr;
    186     OpMap["OP_XOR"]   = OpXor;
    187     OpMap["OP_ANDN"]  = OpAndNot;
    188     OpMap["OP_ORN"]   = OpOrNot;
    189     OpMap["OP_CAST"]  = OpCast;
    190     OpMap["OP_CONC"]  = OpConcat;
    191     OpMap["OP_HI"]    = OpHi;
    192     OpMap["OP_LO"]    = OpLo;
    193     OpMap["OP_DUP"]   = OpDup;
    194     OpMap["OP_DUP_LN"] = OpDupLane;
    195     OpMap["OP_SEL"]   = OpSelect;
    196     OpMap["OP_REV16"] = OpRev16;
    197     OpMap["OP_REV32"] = OpRev32;
    198     OpMap["OP_REV64"] = OpRev64;
    199     OpMap["OP_REINT"] = OpReinterpret;
    200     OpMap["OP_ABDL"]  = OpAbdl;
    201     OpMap["OP_ABA"]   = OpAba;
    202     OpMap["OP_ABAL"]  = OpAbal;
    203 
    204     Record *SI = R.getClass("SInst");
    205     Record *II = R.getClass("IInst");
    206     Record *WI = R.getClass("WInst");
    207     ClassMap[SI] = ClassS;
    208     ClassMap[II] = ClassI;
    209     ClassMap[WI] = ClassW;
    210   }
    211 
    212   // run - Emit arm_neon.h.inc
    213   void run(raw_ostream &o);
    214 
    215   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
    216   void runHeader(raw_ostream &o);
    217 
    218   // runTests - Emit tests for all the Neon intrinsics.
    219   void runTests(raw_ostream &o);
    220 
    221 private:
    222   void emitIntrinsic(raw_ostream &OS, Record *R);
    223 };
    224 } // end anonymous namespace
    225 
    226 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
    227 /// which each StringRef representing a single type declared in the string.
    228 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
    229 /// 2xfloat and 4xfloat respectively.
    230 static void ParseTypes(Record *r, std::string &s,
    231                        SmallVectorImpl<StringRef> &TV) {
    232   const char *data = s.data();
    233   int len = 0;
    234 
    235   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
    236     if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
    237       continue;
    238 
    239     switch (data[len]) {
    240       case 'c':
    241       case 's':
    242       case 'i':
    243       case 'l':
    244       case 'h':
    245       case 'f':
    246         break;
    247       default:
    248         PrintFatalError(r->getLoc(),
    249                       "Unexpected letter: " + std::string(data + len, 1));
    250     }
    251     TV.push_back(StringRef(data, len + 1));
    252     data += len + 1;
    253     len = -1;
    254   }
    255 }
    256 
    257 /// Widen - Convert a type code into the next wider type.  char -> short,
    258 /// short -> int, etc.
    259 static char Widen(const char t) {
    260   switch (t) {
    261     case 'c':
    262       return 's';
    263     case 's':
    264       return 'i';
    265     case 'i':
    266       return 'l';
    267     case 'h':
    268       return 'f';
    269     default:
    270       PrintFatalError("unhandled type in widen!");
    271   }
    272 }
    273 
    274 /// Narrow - Convert a type code into the next smaller type.  short -> char,
    275 /// float -> half float, etc.
    276 static char Narrow(const char t) {
    277   switch (t) {
    278     case 's':
    279       return 'c';
    280     case 'i':
    281       return 's';
    282     case 'l':
    283       return 'i';
    284     case 'f':
    285       return 'h';
    286     default:
    287       PrintFatalError("unhandled type in narrow!");
    288   }
    289 }
    290 
    291 /// For a particular StringRef, return the base type code, and whether it has
    292 /// the quad-vector, polynomial, or unsigned modifiers set.
    293 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
    294   unsigned off = 0;
    295 
    296   // remember quad.
    297   if (ty[off] == 'Q') {
    298     quad = true;
    299     ++off;
    300   }
    301 
    302   // remember poly.
    303   if (ty[off] == 'P') {
    304     poly = true;
    305     ++off;
    306   }
    307 
    308   // remember unsigned.
    309   if (ty[off] == 'U') {
    310     usgn = true;
    311     ++off;
    312   }
    313 
    314   // base type to get the type string for.
    315   return ty[off];
    316 }
    317 
    318 /// ModType - Transform a type code and its modifiers based on a mod code. The
    319 /// mod code definitions may be found at the top of arm_neon.td.
    320 static char ModType(const char mod, char type, bool &quad, bool &poly,
    321                     bool &usgn, bool &scal, bool &cnst, bool &pntr) {
    322   switch (mod) {
    323     case 't':
    324       if (poly) {
    325         poly = false;
    326         usgn = true;
    327       }
    328       break;
    329     case 'u':
    330       usgn = true;
    331       poly = false;
    332       if (type == 'f')
    333         type = 'i';
    334       break;
    335     case 'x':
    336       usgn = false;
    337       poly = false;
    338       if (type == 'f')
    339         type = 'i';
    340       break;
    341     case 'f':
    342       if (type == 'h')
    343         quad = true;
    344       type = 'f';
    345       usgn = false;
    346       break;
    347     case 'g':
    348       quad = false;
    349       break;
    350     case 'w':
    351       type = Widen(type);
    352       quad = true;
    353       break;
    354     case 'n':
    355       type = Widen(type);
    356       break;
    357     case 'i':
    358       type = 'i';
    359       scal = true;
    360       break;
    361     case 'l':
    362       type = 'l';
    363       scal = true;
    364       usgn = true;
    365       break;
    366     case 's':
    367     case 'a':
    368       scal = true;
    369       break;
    370     case 'k':
    371       quad = true;
    372       break;
    373     case 'c':
    374       cnst = true;
    375     case 'p':
    376       pntr = true;
    377       scal = true;
    378       break;
    379     case 'h':
    380       type = Narrow(type);
    381       if (type == 'h')
    382         quad = false;
    383       break;
    384     case 'e':
    385       type = Narrow(type);
    386       usgn = true;
    387       break;
    388     default:
    389       break;
    390   }
    391   return type;
    392 }
    393 
    394 /// TypeString - for a modifier and type, generate the name of the typedef for
    395 /// that type.  QUc -> uint8x8_t.
    396 static std::string TypeString(const char mod, StringRef typestr) {
    397   bool quad = false;
    398   bool poly = false;
    399   bool usgn = false;
    400   bool scal = false;
    401   bool cnst = false;
    402   bool pntr = false;
    403 
    404   if (mod == 'v')
    405     return "void";
    406   if (mod == 'i')
    407     return "int";
    408 
    409   // base type to get the type string for.
    410   char type = ClassifyType(typestr, quad, poly, usgn);
    411 
    412   // Based on the modifying character, change the type and width if necessary.
    413   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
    414 
    415   SmallString<128> s;
    416 
    417   if (usgn)
    418     s.push_back('u');
    419 
    420   switch (type) {
    421     case 'c':
    422       s += poly ? "poly8" : "int8";
    423       if (scal)
    424         break;
    425       s += quad ? "x16" : "x8";
    426       break;
    427     case 's':
    428       s += poly ? "poly16" : "int16";
    429       if (scal)
    430         break;
    431       s += quad ? "x8" : "x4";
    432       break;
    433     case 'i':
    434       s += "int32";
    435       if (scal)
    436         break;
    437       s += quad ? "x4" : "x2";
    438       break;
    439     case 'l':
    440       s += "int64";
    441       if (scal)
    442         break;
    443       s += quad ? "x2" : "x1";
    444       break;
    445     case 'h':
    446       s += "float16";
    447       if (scal)
    448         break;
    449       s += quad ? "x8" : "x4";
    450       break;
    451     case 'f':
    452       s += "float32";
    453       if (scal)
    454         break;
    455       s += quad ? "x4" : "x2";
    456       break;
    457     default:
    458       PrintFatalError("unhandled type!");
    459   }
    460 
    461   if (mod == '2')
    462     s += "x2";
    463   if (mod == '3')
    464     s += "x3";
    465   if (mod == '4')
    466     s += "x4";
    467 
    468   // Append _t, finishing the type string typedef type.
    469   s += "_t";
    470 
    471   if (cnst)
    472     s += " const";
    473 
    474   if (pntr)
    475     s += " *";
    476 
    477   return s.str();
    478 }
    479 
    480 /// BuiltinTypeString - for a modifier and type, generate the clang
    481 /// BuiltinsARM.def prototype code for the function.  See the top of clang's
    482 /// Builtins.def for a description of the type strings.
    483 static std::string BuiltinTypeString(const char mod, StringRef typestr,
    484                                      ClassKind ck, bool ret) {
    485   bool quad = false;
    486   bool poly = false;
    487   bool usgn = false;
    488   bool scal = false;
    489   bool cnst = false;
    490   bool pntr = false;
    491 
    492   if (mod == 'v')
    493     return "v"; // void
    494   if (mod == 'i')
    495     return "i"; // int
    496 
    497   // base type to get the type string for.
    498   char type = ClassifyType(typestr, quad, poly, usgn);
    499 
    500   // Based on the modifying character, change the type and width if necessary.
    501   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
    502 
    503   // All pointers are void* pointers.  Change type to 'v' now.
    504   if (pntr) {
    505     usgn = false;
    506     poly = false;
    507     type = 'v';
    508   }
    509   // Treat half-float ('h') types as unsigned short ('s') types.
    510   if (type == 'h') {
    511     type = 's';
    512     usgn = true;
    513   }
    514   usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
    515 
    516   if (scal) {
    517     SmallString<128> s;
    518 
    519     if (usgn)
    520       s.push_back('U');
    521     else if (type == 'c')
    522       s.push_back('S'); // make chars explicitly signed
    523 
    524     if (type == 'l') // 64-bit long
    525       s += "LLi";
    526     else
    527       s.push_back(type);
    528 
    529     if (cnst)
    530       s.push_back('C');
    531     if (pntr)
    532       s.push_back('*');
    533     return s.str();
    534   }
    535 
    536   // Since the return value must be one type, return a vector type of the
    537   // appropriate width which we will bitcast.  An exception is made for
    538   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
    539   // fashion, storing them to a pointer arg.
    540   if (ret) {
    541     if (mod >= '2' && mod <= '4')
    542       return "vv*"; // void result with void* first argument
    543     if (mod == 'f' || (ck != ClassB && type == 'f'))
    544       return quad ? "V4f" : "V2f";
    545     if (ck != ClassB && type == 's')
    546       return quad ? "V8s" : "V4s";
    547     if (ck != ClassB && type == 'i')
    548       return quad ? "V4i" : "V2i";
    549     if (ck != ClassB && type == 'l')
    550       return quad ? "V2LLi" : "V1LLi";
    551 
    552     return quad ? "V16Sc" : "V8Sc";
    553   }
    554 
    555   // Non-return array types are passed as individual vectors.
    556   if (mod == '2')
    557     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
    558   if (mod == '3')
    559     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
    560   if (mod == '4')
    561     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
    562 
    563   if (mod == 'f' || (ck != ClassB && type == 'f'))
    564     return quad ? "V4f" : "V2f";
    565   if (ck != ClassB && type == 's')
    566     return quad ? "V8s" : "V4s";
    567   if (ck != ClassB && type == 'i')
    568     return quad ? "V4i" : "V2i";
    569   if (ck != ClassB && type == 'l')
    570     return quad ? "V2LLi" : "V1LLi";
    571 
    572   return quad ? "V16Sc" : "V8Sc";
    573 }
    574 
    575 /// MangleName - Append a type or width suffix to a base neon function name,
    576 /// and insert a 'q' in the appropriate location if the operation works on
    577 /// 128b rather than 64b.   E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
    578 static std::string MangleName(const std::string &name, StringRef typestr,
    579                               ClassKind ck) {
    580   if (name == "vcvt_f32_f16")
    581     return name;
    582 
    583   bool quad = false;
    584   bool poly = false;
    585   bool usgn = false;
    586   char type = ClassifyType(typestr, quad, poly, usgn);
    587 
    588   std::string s = name;
    589 
    590   switch (type) {
    591   case 'c':
    592     switch (ck) {
    593     case ClassS: s += poly ? "_p8" : usgn ? "_u8" : "_s8"; break;
    594     case ClassI: s += "_i8"; break;
    595     case ClassW: s += "_8"; break;
    596     default: break;
    597     }
    598     break;
    599   case 's':
    600     switch (ck) {
    601     case ClassS: s += poly ? "_p16" : usgn ? "_u16" : "_s16"; break;
    602     case ClassI: s += "_i16"; break;
    603     case ClassW: s += "_16"; break;
    604     default: break;
    605     }
    606     break;
    607   case 'i':
    608     switch (ck) {
    609     case ClassS: s += usgn ? "_u32" : "_s32"; break;
    610     case ClassI: s += "_i32"; break;
    611     case ClassW: s += "_32"; break;
    612     default: break;
    613     }
    614     break;
    615   case 'l':
    616     switch (ck) {
    617     case ClassS: s += usgn ? "_u64" : "_s64"; break;
    618     case ClassI: s += "_i64"; break;
    619     case ClassW: s += "_64"; break;
    620     default: break;
    621     }
    622     break;
    623   case 'h':
    624     switch (ck) {
    625     case ClassS:
    626     case ClassI: s += "_f16"; break;
    627     case ClassW: s += "_16"; break;
    628     default: break;
    629     }
    630     break;
    631   case 'f':
    632     switch (ck) {
    633     case ClassS:
    634     case ClassI: s += "_f32"; break;
    635     case ClassW: s += "_32"; break;
    636     default: break;
    637     }
    638     break;
    639   default:
    640     PrintFatalError("unhandled type!");
    641   }
    642   if (ck == ClassB)
    643     s += "_v";
    644 
    645   // Insert a 'q' before the first '_' character so that it ends up before
    646   // _lane or _n on vector-scalar operations.
    647   if (quad) {
    648     size_t pos = s.find('_');
    649     s = s.insert(pos, "q");
    650   }
    651   return s;
    652 }
    653 
    654 /// UseMacro - Examine the prototype string to determine if the intrinsic
    655 /// should be defined as a preprocessor macro instead of an inline function.
    656 static bool UseMacro(const std::string &proto) {
    657   // If this builtin takes an immediate argument, we need to #define it rather
    658   // than use a standard declaration, so that SemaChecking can range check
    659   // the immediate passed by the user.
    660   if (proto.find('i') != std::string::npos)
    661     return true;
    662 
    663   // Pointer arguments need to use macros to avoid hiding aligned attributes
    664   // from the pointer type.
    665   if (proto.find('p') != std::string::npos ||
    666       proto.find('c') != std::string::npos)
    667     return true;
    668 
    669   return false;
    670 }
    671 
    672 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
    673 /// defined as a macro should be accessed directly instead of being first
    674 /// assigned to a local temporary.
    675 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
    676   // True for constant ints (i), pointers (p) and const pointers (c).
    677   return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
    678 }
    679 
    680 // Generate the string "(argtype a, argtype b, ...)"
    681 static std::string GenArgs(const std::string &proto, StringRef typestr) {
    682   bool define = UseMacro(proto);
    683   char arg = 'a';
    684 
    685   std::string s;
    686   s += "(";
    687 
    688   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    689     if (define) {
    690       // Some macro arguments are used directly instead of being assigned
    691       // to local temporaries; prepend an underscore prefix to make their
    692       // names consistent with the local temporaries.
    693       if (MacroArgUsedDirectly(proto, i))
    694         s += "__";
    695     } else {
    696       s += TypeString(proto[i], typestr) + " __";
    697     }
    698     s.push_back(arg);
    699     if ((i + 1) < e)
    700       s += ", ";
    701   }
    702 
    703   s += ")";
    704   return s;
    705 }
    706 
    707 // Macro arguments are not type-checked like inline function arguments, so
    708 // assign them to local temporaries to get the right type checking.
    709 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
    710   char arg = 'a';
    711   std::string s;
    712   bool generatedLocal = false;
    713 
    714   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
    715     // Do not create a temporary for an immediate argument.
    716     // That would defeat the whole point of using a macro!
    717     if (MacroArgUsedDirectly(proto, i))
    718       continue;
    719     generatedLocal = true;
    720 
    721     s += TypeString(proto[i], typestr) + " __";
    722     s.push_back(arg);
    723     s += " = (";
    724     s.push_back(arg);
    725     s += "); ";
    726   }
    727 
    728   if (generatedLocal)
    729     s += "\\\n  ";
    730   return s;
    731 }
    732 
    733 // Use the vmovl builtin to sign-extend or zero-extend a vector.
    734 static std::string Extend(StringRef typestr, const std::string &a) {
    735   std::string s;
    736   s = MangleName("vmovl", typestr, ClassS);
    737   s += "(" + a + ")";
    738   return s;
    739 }
    740 
    741 static std::string Duplicate(unsigned nElts, StringRef typestr,
    742                              const std::string &a) {
    743   std::string s;
    744 
    745   s = "(" + TypeString('d', typestr) + "){ ";
    746   for (unsigned i = 0; i != nElts; ++i) {
    747     s += a;
    748     if ((i + 1) < nElts)
    749       s += ", ";
    750   }
    751   s += " }";
    752 
    753   return s;
    754 }
    755 
    756 static std::string SplatLane(unsigned nElts, const std::string &vec,
    757                              const std::string &lane) {
    758   std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
    759   for (unsigned i = 0; i < nElts; ++i)
    760     s += ", " + lane;
    761   s += ")";
    762   return s;
    763 }
    764 
    765 static unsigned GetNumElements(StringRef typestr, bool &quad) {
    766   quad = false;
    767   bool dummy = false;
    768   char type = ClassifyType(typestr, quad, dummy, dummy);
    769   unsigned nElts = 0;
    770   switch (type) {
    771   case 'c': nElts = 8; break;
    772   case 's': nElts = 4; break;
    773   case 'i': nElts = 2; break;
    774   case 'l': nElts = 1; break;
    775   case 'h': nElts = 4; break;
    776   case 'f': nElts = 2; break;
    777   default:
    778     PrintFatalError("unhandled type!");
    779   }
    780   if (quad) nElts <<= 1;
    781   return nElts;
    782 }
    783 
    784 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
    785 static std::string GenOpString(OpKind op, const std::string &proto,
    786                                StringRef typestr) {
    787   bool quad;
    788   unsigned nElts = GetNumElements(typestr, quad);
    789   bool define = UseMacro(proto);
    790 
    791   std::string ts = TypeString(proto[0], typestr);
    792   std::string s;
    793   if (!define) {
    794     s = "return ";
    795   }
    796 
    797   switch(op) {
    798   case OpAdd:
    799     s += "__a + __b;";
    800     break;
    801   case OpAddl:
    802     s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
    803     break;
    804   case OpAddw:
    805     s += "__a + " + Extend(typestr, "__b") + ";";
    806     break;
    807   case OpSub:
    808     s += "__a - __b;";
    809     break;
    810   case OpSubl:
    811     s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
    812     break;
    813   case OpSubw:
    814     s += "__a - " + Extend(typestr, "__b") + ";";
    815     break;
    816   case OpMulN:
    817     s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
    818     break;
    819   case OpMulLane:
    820     s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
    821     break;
    822   case OpMul:
    823     s += "__a * __b;";
    824     break;
    825   case OpMullLane:
    826     s += MangleName("vmull", typestr, ClassS) + "(__a, " +
    827       SplatLane(nElts, "__b", "__c") + ");";
    828     break;
    829   case OpMlaN:
    830     s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
    831     break;
    832   case OpMlaLane:
    833     s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
    834     break;
    835   case OpMla:
    836     s += "__a + (__b * __c);";
    837     break;
    838   case OpMlalN:
    839     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
    840       Duplicate(nElts, typestr, "__c") + ");";
    841     break;
    842   case OpMlalLane:
    843     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
    844       SplatLane(nElts, "__c", "__d") + ");";
    845     break;
    846   case OpMlal:
    847     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
    848     break;
    849   case OpMlsN:
    850     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
    851     break;
    852   case OpMlsLane:
    853     s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
    854     break;
    855   case OpMls:
    856     s += "__a - (__b * __c);";
    857     break;
    858   case OpMlslN:
    859     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
    860       Duplicate(nElts, typestr, "__c") + ");";
    861     break;
    862   case OpMlslLane:
    863     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
    864       SplatLane(nElts, "__c", "__d") + ");";
    865     break;
    866   case OpMlsl:
    867     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
    868     break;
    869   case OpQDMullLane:
    870     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
    871       SplatLane(nElts, "__b", "__c") + ");";
    872     break;
    873   case OpQDMlalLane:
    874     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
    875       SplatLane(nElts, "__c", "__d") + ");";
    876     break;
    877   case OpQDMlslLane:
    878     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
    879       SplatLane(nElts, "__c", "__d") + ");";
    880     break;
    881   case OpQDMulhLane:
    882     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
    883       SplatLane(nElts, "__b", "__c") + ");";
    884     break;
    885   case OpQRDMulhLane:
    886     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
    887       SplatLane(nElts, "__b", "__c") + ");";
    888     break;
    889   case OpEq:
    890     s += "(" + ts + ")(__a == __b);";
    891     break;
    892   case OpGe:
    893     s += "(" + ts + ")(__a >= __b);";
    894     break;
    895   case OpLe:
    896     s += "(" + ts + ")(__a <= __b);";
    897     break;
    898   case OpGt:
    899     s += "(" + ts + ")(__a > __b);";
    900     break;
    901   case OpLt:
    902     s += "(" + ts + ")(__a < __b);";
    903     break;
    904   case OpNeg:
    905     s += " -__a;";
    906     break;
    907   case OpNot:
    908     s += " ~__a;";
    909     break;
    910   case OpAnd:
    911     s += "__a & __b;";
    912     break;
    913   case OpOr:
    914     s += "__a | __b;";
    915     break;
    916   case OpXor:
    917     s += "__a ^ __b;";
    918     break;
    919   case OpAndNot:
    920     s += "__a & ~__b;";
    921     break;
    922   case OpOrNot:
    923     s += "__a | ~__b;";
    924     break;
    925   case OpCast:
    926     s += "(" + ts + ")__a;";
    927     break;
    928   case OpConcat:
    929     s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
    930     s += ", (int64x1_t)__b, 0, 1);";
    931     break;
    932   case OpHi:
    933     s += "(" + ts +
    934       ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);";
    935     break;
    936   case OpLo:
    937     s += "(" + ts +
    938       ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);";
    939     break;
    940   case OpDup:
    941     s += Duplicate(nElts, typestr, "__a") + ";";
    942     break;
    943   case OpDupLane:
    944     s += SplatLane(nElts, "__a", "__b") + ";";
    945     break;
    946   case OpSelect:
    947     // ((0 & 1) | (~0 & 2))
    948     s += "(" + ts + ")";
    949     ts = TypeString(proto[1], typestr);
    950     s += "((__a & (" + ts + ")__b) | ";
    951     s += "(~__a & (" + ts + ")__c));";
    952     break;
    953   case OpRev16:
    954     s += "__builtin_shufflevector(__a, __a";
    955     for (unsigned i = 2; i <= nElts; i += 2)
    956       for (unsigned j = 0; j != 2; ++j)
    957         s += ", " + utostr(i - j - 1);
    958     s += ");";
    959     break;
    960   case OpRev32: {
    961     unsigned WordElts = nElts >> (1 + (int)quad);
    962     s += "__builtin_shufflevector(__a, __a";
    963     for (unsigned i = WordElts; i <= nElts; i += WordElts)
    964       for (unsigned j = 0; j != WordElts; ++j)
    965         s += ", " + utostr(i - j - 1);
    966     s += ");";
    967     break;
    968   }
    969   case OpRev64: {
    970     unsigned DblWordElts = nElts >> (int)quad;
    971     s += "__builtin_shufflevector(__a, __a";
    972     for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
    973       for (unsigned j = 0; j != DblWordElts; ++j)
    974         s += ", " + utostr(i - j - 1);
    975     s += ");";
    976     break;
    977   }
    978   case OpAbdl: {
    979     std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
    980     if (typestr[0] != 'U') {
    981       // vabd results are always unsigned and must be zero-extended.
    982       std::string utype = "U" + typestr.str();
    983       s += "(" + TypeString(proto[0], typestr) + ")";
    984       abd = "(" + TypeString('d', utype) + ")" + abd;
    985       s += Extend(utype, abd) + ";";
    986     } else {
    987       s += Extend(typestr, abd) + ";";
    988     }
    989     break;
    990   }
    991   case OpAba:
    992     s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
    993     break;
    994   case OpAbal: {
    995     s += "__a + ";
    996     std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
    997     if (typestr[0] != 'U') {
    998       // vabd results are always unsigned and must be zero-extended.
    999       std::string utype = "U" + typestr.str();
   1000       s += "(" + TypeString(proto[0], typestr) + ")";
   1001       abd = "(" + TypeString('d', utype) + ")" + abd;
   1002       s += Extend(utype, abd) + ";";
   1003     } else {
   1004       s += Extend(typestr, abd) + ";";
   1005     }
   1006     break;
   1007   }
   1008   default:
   1009     PrintFatalError("unknown OpKind!");
   1010   }
   1011   return s;
   1012 }
   1013 
   1014 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
   1015   unsigned mod = proto[0];
   1016 
   1017   if (mod == 'v' || mod == 'f')
   1018     mod = proto[1];
   1019 
   1020   bool quad = false;
   1021   bool poly = false;
   1022   bool usgn = false;
   1023   bool scal = false;
   1024   bool cnst = false;
   1025   bool pntr = false;
   1026 
   1027   // Base type to get the type string for.
   1028   char type = ClassifyType(typestr, quad, poly, usgn);
   1029 
   1030   // Based on the modifying character, change the type and width if necessary.
   1031   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
   1032 
   1033   NeonTypeFlags::EltType ET;
   1034   switch (type) {
   1035     case 'c':
   1036       ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
   1037       break;
   1038     case 's':
   1039       ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
   1040       break;
   1041     case 'i':
   1042       ET = NeonTypeFlags::Int32;
   1043       break;
   1044     case 'l':
   1045       ET = NeonTypeFlags::Int64;
   1046       break;
   1047     case 'h':
   1048       ET = NeonTypeFlags::Float16;
   1049       break;
   1050     case 'f':
   1051       ET = NeonTypeFlags::Float32;
   1052       break;
   1053     default:
   1054       PrintFatalError("unhandled type!");
   1055   }
   1056   NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
   1057   return Flags.getFlags();
   1058 }
   1059 
   1060 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
   1061 static std::string GenBuiltin(const std::string &name, const std::string &proto,
   1062                               StringRef typestr, ClassKind ck) {
   1063   std::string s;
   1064 
   1065   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
   1066   // sret-like argument.
   1067   bool sret = (proto[0] >= '2' && proto[0] <= '4');
   1068 
   1069   bool define = UseMacro(proto);
   1070 
   1071   // Check if the prototype has a scalar operand with the type of the vector
   1072   // elements.  If not, bitcasting the args will take care of arg checking.
   1073   // The actual signedness etc. will be taken care of with special enums.
   1074   if (proto.find('s') == std::string::npos)
   1075     ck = ClassB;
   1076 
   1077   if (proto[0] != 'v') {
   1078     std::string ts = TypeString(proto[0], typestr);
   1079 
   1080     if (define) {
   1081       if (sret)
   1082         s += ts + " r; ";
   1083       else
   1084         s += "(" + ts + ")";
   1085     } else if (sret) {
   1086       s += ts + " r; ";
   1087     } else {
   1088       s += "return (" + ts + ")";
   1089     }
   1090   }
   1091 
   1092   bool splat = proto.find('a') != std::string::npos;
   1093 
   1094   s += "__builtin_neon_";
   1095   if (splat) {
   1096     // Call the non-splat builtin: chop off the "_n" suffix from the name.
   1097     std::string vname(name, 0, name.size()-2);
   1098     s += MangleName(vname, typestr, ck);
   1099   } else {
   1100     s += MangleName(name, typestr, ck);
   1101   }
   1102   s += "(";
   1103 
   1104   // Pass the address of the return variable as the first argument to sret-like
   1105   // builtins.
   1106   if (sret)
   1107     s += "&r, ";
   1108 
   1109   char arg = 'a';
   1110   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
   1111     std::string args = std::string(&arg, 1);
   1112 
   1113     // Use the local temporaries instead of the macro arguments.
   1114     args = "__" + args;
   1115 
   1116     bool argQuad = false;
   1117     bool argPoly = false;
   1118     bool argUsgn = false;
   1119     bool argScalar = false;
   1120     bool dummy = false;
   1121     char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
   1122     argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
   1123                       dummy, dummy);
   1124 
   1125     // Handle multiple-vector values specially, emitting each subvector as an
   1126     // argument to the __builtin.
   1127     if (proto[i] >= '2' && proto[i] <= '4') {
   1128       // Check if an explicit cast is needed.
   1129       if (argType != 'c' || argPoly || argUsgn)
   1130         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
   1131 
   1132       for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
   1133         s += args + ".val[" + utostr(vi) + "]";
   1134         if ((vi + 1) < ve)
   1135           s += ", ";
   1136       }
   1137       if ((i + 1) < e)
   1138         s += ", ";
   1139 
   1140       continue;
   1141     }
   1142 
   1143     if (splat && (i + 1) == e)
   1144       args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
   1145 
   1146     // Check if an explicit cast is needed.
   1147     if ((splat || !argScalar) &&
   1148         ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
   1149       std::string argTypeStr = "c";
   1150       if (ck != ClassB)
   1151         argTypeStr = argType;
   1152       if (argQuad)
   1153         argTypeStr = "Q" + argTypeStr;
   1154       args = "(" + TypeString('d', argTypeStr) + ")" + args;
   1155     }
   1156 
   1157     s += args;
   1158     if ((i + 1) < e)
   1159       s += ", ";
   1160   }
   1161 
   1162   // Extra constant integer to hold type class enum for this function, e.g. s8
   1163   if (ck == ClassB)
   1164     s += ", " + utostr(GetNeonEnum(proto, typestr));
   1165 
   1166   s += ");";
   1167 
   1168   if (proto[0] != 'v' && sret) {
   1169     if (define)
   1170       s += " r;";
   1171     else
   1172       s += " return r;";
   1173   }
   1174   return s;
   1175 }
   1176 
   1177 static std::string GenBuiltinDef(const std::string &name,
   1178                                  const std::string &proto,
   1179                                  StringRef typestr, ClassKind ck) {
   1180   std::string s("BUILTIN(__builtin_neon_");
   1181 
   1182   // If all types are the same size, bitcasting the args will take care
   1183   // of arg checking.  The actual signedness etc. will be taken care of with
   1184   // special enums.
   1185   if (proto.find('s') == std::string::npos)
   1186     ck = ClassB;
   1187 
   1188   s += MangleName(name, typestr, ck);
   1189   s += ", \"";
   1190 
   1191   for (unsigned i = 0, e = proto.size(); i != e; ++i)
   1192     s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
   1193 
   1194   // Extra constant integer to hold type class enum for this function, e.g. s8
   1195   if (ck == ClassB)
   1196     s += "i";
   1197 
   1198   s += "\", \"n\")";
   1199   return s;
   1200 }
   1201 
   1202 static std::string GenIntrinsic(const std::string &name,
   1203                                 const std::string &proto,
   1204                                 StringRef outTypeStr, StringRef inTypeStr,
   1205                                 OpKind kind, ClassKind classKind) {
   1206   assert(!proto.empty() && "");
   1207   bool define = UseMacro(proto) && kind != OpUnavailable;
   1208   std::string s;
   1209 
   1210   // static always inline + return type
   1211   if (define)
   1212     s += "#define ";
   1213   else
   1214     s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
   1215 
   1216   // Function name with type suffix
   1217   std::string mangledName = MangleName(name, outTypeStr, ClassS);
   1218   if (outTypeStr != inTypeStr) {
   1219     // If the input type is different (e.g., for vreinterpret), append a suffix
   1220     // for the input type.  String off a "Q" (quad) prefix so that MangleName
   1221     // does not insert another "q" in the name.
   1222     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
   1223     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
   1224     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
   1225   }
   1226   s += mangledName;
   1227 
   1228   // Function arguments
   1229   s += GenArgs(proto, inTypeStr);
   1230 
   1231   // Definition.
   1232   if (define) {
   1233     s += " __extension__ ({ \\\n  ";
   1234     s += GenMacroLocals(proto, inTypeStr);
   1235   } else if (kind == OpUnavailable) {
   1236     s += " __attribute__((unavailable));\n";
   1237     return s;
   1238   } else
   1239     s += " {\n  ";
   1240 
   1241   if (kind != OpNone)
   1242     s += GenOpString(kind, proto, outTypeStr);
   1243   else
   1244     s += GenBuiltin(name, proto, outTypeStr, classKind);
   1245   if (define)
   1246     s += " })";
   1247   else
   1248     s += " }";
   1249   s += "\n";
   1250   return s;
   1251 }
   1252 
   1253 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
   1254 /// is comprised of type definitions and function declarations.
   1255 void NeonEmitter::run(raw_ostream &OS) {
   1256   OS <<
   1257     "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
   1258     "---===\n"
   1259     " *\n"
   1260     " * Permission is hereby granted, free of charge, to any person obtaining "
   1261     "a copy\n"
   1262     " * of this software and associated documentation files (the \"Software\"),"
   1263     " to deal\n"
   1264     " * in the Software without restriction, including without limitation the "
   1265     "rights\n"
   1266     " * to use, copy, modify, merge, publish, distribute, sublicense, "
   1267     "and/or sell\n"
   1268     " * copies of the Software, and to permit persons to whom the Software is\n"
   1269     " * furnished to do so, subject to the following conditions:\n"
   1270     " *\n"
   1271     " * The above copyright notice and this permission notice shall be "
   1272     "included in\n"
   1273     " * all copies or substantial portions of the Software.\n"
   1274     " *\n"
   1275     " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
   1276     "EXPRESS OR\n"
   1277     " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
   1278     "MERCHANTABILITY,\n"
   1279     " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
   1280     "SHALL THE\n"
   1281     " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
   1282     "OTHER\n"
   1283     " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
   1284     "ARISING FROM,\n"
   1285     " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
   1286     "DEALINGS IN\n"
   1287     " * THE SOFTWARE.\n"
   1288     " *\n"
   1289     " *===--------------------------------------------------------------------"
   1290     "---===\n"
   1291     " */\n\n";
   1292 
   1293   OS << "#ifndef __ARM_NEON_H\n";
   1294   OS << "#define __ARM_NEON_H\n\n";
   1295 
   1296   OS << "#ifndef __ARM_NEON__\n";
   1297   OS << "#error \"NEON support not enabled\"\n";
   1298   OS << "#endif\n\n";
   1299 
   1300   OS << "#include <stdint.h>\n\n";
   1301 
   1302   // Emit NEON-specific scalar typedefs.
   1303   OS << "typedef float float32_t;\n";
   1304   OS << "typedef int8_t poly8_t;\n";
   1305   OS << "typedef int16_t poly16_t;\n";
   1306   OS << "typedef uint16_t float16_t;\n";
   1307 
   1308   // Emit Neon vector typedefs.
   1309   std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs");
   1310   SmallVector<StringRef, 24> TDTypeVec;
   1311   ParseTypes(0, TypedefTypes, TDTypeVec);
   1312 
   1313   // Emit vector typedefs.
   1314   for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
   1315     bool dummy, quad = false, poly = false;
   1316     (void) ClassifyType(TDTypeVec[i], quad, poly, dummy);
   1317     if (poly)
   1318       OS << "typedef __attribute__((neon_polyvector_type(";
   1319     else
   1320       OS << "typedef __attribute__((neon_vector_type(";
   1321 
   1322     unsigned nElts = GetNumElements(TDTypeVec[i], quad);
   1323     OS << utostr(nElts) << "))) ";
   1324     if (nElts < 10)
   1325       OS << " ";
   1326 
   1327     OS << TypeString('s', TDTypeVec[i]);
   1328     OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
   1329   }
   1330   OS << "\n";
   1331 
   1332   // Emit struct typedefs.
   1333   for (unsigned vi = 2; vi != 5; ++vi) {
   1334     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
   1335       std::string ts = TypeString('d', TDTypeVec[i]);
   1336       std::string vs = TypeString('0' + vi, TDTypeVec[i]);
   1337       OS << "typedef struct " << vs << " {\n";
   1338       OS << "  " << ts << " val";
   1339       OS << "[" << utostr(vi) << "]";
   1340       OS << ";\n} ";
   1341       OS << vs << ";\n\n";
   1342     }
   1343   }
   1344 
   1345   OS<<"#define __ai static __attribute__((__always_inline__, __nodebug__))\n\n";
   1346 
   1347   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
   1348 
   1349   // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
   1350   // intrinsics.  (Some of the saturating multiply instructions are also
   1351   // used to implement the corresponding "_lane" variants, but tablegen
   1352   // sorts the records into alphabetical order so that the "_lane" variants
   1353   // come after the intrinsics they use.)
   1354   emitIntrinsic(OS, Records.getDef("VMOVL"));
   1355   emitIntrinsic(OS, Records.getDef("VMULL"));
   1356   emitIntrinsic(OS, Records.getDef("VABD"));
   1357 
   1358   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   1359     Record *R = RV[i];
   1360     if (R->getName() != "VMOVL" &&
   1361         R->getName() != "VMULL" &&
   1362         R->getName() != "VABD")
   1363       emitIntrinsic(OS, R);
   1364   }
   1365 
   1366   OS << "#undef __ai\n\n";
   1367   OS << "#endif /* __ARM_NEON_H */\n";
   1368 }
   1369 
   1370 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
   1371 /// intrinsics specified by record R.
   1372 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
   1373   std::string name = R->getValueAsString("Name");
   1374   std::string Proto = R->getValueAsString("Prototype");
   1375   std::string Types = R->getValueAsString("Types");
   1376 
   1377   SmallVector<StringRef, 16> TypeVec;
   1378   ParseTypes(R, Types, TypeVec);
   1379 
   1380   OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
   1381 
   1382   ClassKind classKind = ClassNone;
   1383   if (R->getSuperClasses().size() >= 2)
   1384     classKind = ClassMap[R->getSuperClasses()[1]];
   1385   if (classKind == ClassNone && kind == OpNone)
   1386     PrintFatalError(R->getLoc(), "Builtin has no class kind");
   1387 
   1388   for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   1389     if (kind == OpReinterpret) {
   1390       bool outQuad = false;
   1391       bool dummy = false;
   1392       (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
   1393       for (unsigned srcti = 0, srcte = TypeVec.size();
   1394            srcti != srcte; ++srcti) {
   1395         bool inQuad = false;
   1396         (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
   1397         if (srcti == ti || inQuad != outQuad)
   1398           continue;
   1399         OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
   1400                            OpCast, ClassS);
   1401       }
   1402     } else {
   1403       OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti],
   1404                          kind, classKind);
   1405     }
   1406   }
   1407   OS << "\n";
   1408 }
   1409 
   1410 static unsigned RangeFromType(const char mod, StringRef typestr) {
   1411   // base type to get the type string for.
   1412   bool quad = false, dummy = false;
   1413   char type = ClassifyType(typestr, quad, dummy, dummy);
   1414   type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
   1415 
   1416   switch (type) {
   1417     case 'c':
   1418       return (8 << (int)quad) - 1;
   1419     case 'h':
   1420     case 's':
   1421       return (4 << (int)quad) - 1;
   1422     case 'f':
   1423     case 'i':
   1424       return (2 << (int)quad) - 1;
   1425     case 'l':
   1426       return (1 << (int)quad) - 1;
   1427     default:
   1428       PrintFatalError("unhandled type!");
   1429   }
   1430 }
   1431 
   1432 /// runHeader - Emit a file with sections defining:
   1433 /// 1. the NEON section of BuiltinsARM.def.
   1434 /// 2. the SemaChecking code for the type overload checking.
   1435 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
   1436 void NeonEmitter::runHeader(raw_ostream &OS) {
   1437   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
   1438 
   1439   StringMap<OpKind> EmittedMap;
   1440 
   1441   // Generate BuiltinsARM.def for NEON
   1442   OS << "#ifdef GET_NEON_BUILTINS\n";
   1443   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   1444     Record *R = RV[i];
   1445     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
   1446     if (k != OpNone)
   1447       continue;
   1448 
   1449     std::string Proto = R->getValueAsString("Prototype");
   1450 
   1451     // Functions with 'a' (the splat code) in the type prototype should not get
   1452     // their own builtin as they use the non-splat variant.
   1453     if (Proto.find('a') != std::string::npos)
   1454       continue;
   1455 
   1456     std::string Types = R->getValueAsString("Types");
   1457     SmallVector<StringRef, 16> TypeVec;
   1458     ParseTypes(R, Types, TypeVec);
   1459 
   1460     if (R->getSuperClasses().size() < 2)
   1461       PrintFatalError(R->getLoc(), "Builtin has no class kind");
   1462 
   1463     std::string name = R->getValueAsString("Name");
   1464     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
   1465 
   1466     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   1467       // Generate the BuiltinsARM.def declaration for this builtin, ensuring
   1468       // that each unique BUILTIN() macro appears only once in the output
   1469       // stream.
   1470       std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
   1471       if (EmittedMap.count(bd))
   1472         continue;
   1473 
   1474       EmittedMap[bd] = OpNone;
   1475       OS << bd << "\n";
   1476     }
   1477   }
   1478   OS << "#endif\n\n";
   1479 
   1480   // Generate the overloaded type checking code for SemaChecking.cpp
   1481   OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
   1482   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   1483     Record *R = RV[i];
   1484     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
   1485     if (k != OpNone)
   1486       continue;
   1487 
   1488     std::string Proto = R->getValueAsString("Prototype");
   1489     std::string Types = R->getValueAsString("Types");
   1490     std::string name = R->getValueAsString("Name");
   1491 
   1492     // Functions with 'a' (the splat code) in the type prototype should not get
   1493     // their own builtin as they use the non-splat variant.
   1494     if (Proto.find('a') != std::string::npos)
   1495       continue;
   1496 
   1497     // Functions which have a scalar argument cannot be overloaded, no need to
   1498     // check them if we are emitting the type checking code.
   1499     if (Proto.find('s') != std::string::npos)
   1500       continue;
   1501 
   1502     SmallVector<StringRef, 16> TypeVec;
   1503     ParseTypes(R, Types, TypeVec);
   1504 
   1505     if (R->getSuperClasses().size() < 2)
   1506       PrintFatalError(R->getLoc(), "Builtin has no class kind");
   1507 
   1508     int si = -1, qi = -1;
   1509     uint64_t mask = 0, qmask = 0;
   1510     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   1511       // Generate the switch case(s) for this builtin for the type validation.
   1512       bool quad = false, poly = false, usgn = false;
   1513       (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
   1514 
   1515       if (quad) {
   1516         qi = ti;
   1517         qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
   1518       } else {
   1519         si = ti;
   1520         mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
   1521       }
   1522     }
   1523 
   1524     // Check if the builtin function has a pointer or const pointer argument.
   1525     int PtrArgNum = -1;
   1526     bool HasConstPtr = false;
   1527     for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
   1528       char ArgType = Proto[arg];
   1529       if (ArgType == 'c') {
   1530         HasConstPtr = true;
   1531         PtrArgNum = arg - 1;
   1532         break;
   1533       }
   1534       if (ArgType == 'p') {
   1535         PtrArgNum = arg - 1;
   1536         break;
   1537       }
   1538     }
   1539     // For sret builtins, adjust the pointer argument index.
   1540     if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
   1541       PtrArgNum += 1;
   1542 
   1543     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
   1544     // and vst1_lane intrinsics.  Using a pointer to the vector element
   1545     // type with one of those operations causes codegen to select an aligned
   1546     // load/store instruction.  If you want an unaligned operation,
   1547     // the pointer argument needs to have less alignment than element type,
   1548     // so just accept any pointer type.
   1549     if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
   1550       PtrArgNum = -1;
   1551       HasConstPtr = false;
   1552     }
   1553 
   1554     if (mask) {
   1555       OS << "case ARM::BI__builtin_neon_"
   1556          << MangleName(name, TypeVec[si], ClassB)
   1557          << ": mask = " << "0x" << utohexstr(mask) << "ULL";
   1558       if (PtrArgNum >= 0)
   1559         OS << "; PtrArgNum = " << PtrArgNum;
   1560       if (HasConstPtr)
   1561         OS << "; HasConstPtr = true";
   1562       OS << "; break;\n";
   1563     }
   1564     if (qmask) {
   1565       OS << "case ARM::BI__builtin_neon_"
   1566          << MangleName(name, TypeVec[qi], ClassB)
   1567          << ": mask = " << "0x" << utohexstr(qmask) << "ULL";
   1568       if (PtrArgNum >= 0)
   1569         OS << "; PtrArgNum = " << PtrArgNum;
   1570       if (HasConstPtr)
   1571         OS << "; HasConstPtr = true";
   1572       OS << "; break;\n";
   1573     }
   1574   }
   1575   OS << "#endif\n\n";
   1576 
   1577   // Generate the intrinsic range checking code for shift/lane immediates.
   1578   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
   1579   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   1580     Record *R = RV[i];
   1581 
   1582     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
   1583     if (k != OpNone)
   1584       continue;
   1585 
   1586     std::string name = R->getValueAsString("Name");
   1587     std::string Proto = R->getValueAsString("Prototype");
   1588     std::string Types = R->getValueAsString("Types");
   1589 
   1590     // Functions with 'a' (the splat code) in the type prototype should not get
   1591     // their own builtin as they use the non-splat variant.
   1592     if (Proto.find('a') != std::string::npos)
   1593       continue;
   1594 
   1595     // Functions which do not have an immediate do not need to have range
   1596     // checking code emitted.
   1597     size_t immPos = Proto.find('i');
   1598     if (immPos == std::string::npos)
   1599       continue;
   1600 
   1601     SmallVector<StringRef, 16> TypeVec;
   1602     ParseTypes(R, Types, TypeVec);
   1603 
   1604     if (R->getSuperClasses().size() < 2)
   1605       PrintFatalError(R->getLoc(), "Builtin has no class kind");
   1606 
   1607     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
   1608 
   1609     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   1610       std::string namestr, shiftstr, rangestr;
   1611 
   1612       if (R->getValueAsBit("isVCVT_N")) {
   1613         // VCVT between floating- and fixed-point values takes an immediate
   1614         // in the range 1 to 32.
   1615         ck = ClassB;
   1616         rangestr = "l = 1; u = 31"; // upper bound = l + u
   1617       } else if (Proto.find('s') == std::string::npos) {
   1618         // Builtins which are overloaded by type will need to have their upper
   1619         // bound computed at Sema time based on the type constant.
   1620         ck = ClassB;
   1621         if (R->getValueAsBit("isShift")) {
   1622           shiftstr = ", true";
   1623 
   1624           // Right shifts have an 'r' in the name, left shifts do not.
   1625           if (name.find('r') != std::string::npos)
   1626             rangestr = "l = 1; ";
   1627         }
   1628         rangestr += "u = RFT(TV" + shiftstr + ")";
   1629       } else {
   1630         // The immediate generally refers to a lane in the preceding argument.
   1631         assert(immPos > 0 && "unexpected immediate operand");
   1632         rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti]));
   1633       }
   1634       // Make sure cases appear only once by uniquing them in a string map.
   1635       namestr = MangleName(name, TypeVec[ti], ck);
   1636       if (EmittedMap.count(namestr))
   1637         continue;
   1638       EmittedMap[namestr] = OpNone;
   1639 
   1640       // Calculate the index of the immediate that should be range checked.
   1641       unsigned immidx = 0;
   1642 
   1643       // Builtins that return a struct of multiple vectors have an extra
   1644       // leading arg for the struct return.
   1645       if (Proto[0] >= '2' && Proto[0] <= '4')
   1646         ++immidx;
   1647 
   1648       // Add one to the index for each argument until we reach the immediate
   1649       // to be checked.  Structs of vectors are passed as multiple arguments.
   1650       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
   1651         switch (Proto[ii]) {
   1652           default:  immidx += 1; break;
   1653           case '2': immidx += 2; break;
   1654           case '3': immidx += 3; break;
   1655           case '4': immidx += 4; break;
   1656           case 'i': ie = ii + 1; break;
   1657         }
   1658       }
   1659       OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck)
   1660          << ": i = " << immidx << "; " << rangestr << "; break;\n";
   1661     }
   1662   }
   1663   OS << "#endif\n\n";
   1664 }
   1665 
   1666 /// GenTest - Write out a test for the intrinsic specified by the name and
   1667 /// type strings, including the embedded patterns for FileCheck to match.
   1668 static std::string GenTest(const std::string &name,
   1669                            const std::string &proto,
   1670                            StringRef outTypeStr, StringRef inTypeStr,
   1671                            bool isShift) {
   1672   assert(!proto.empty() && "");
   1673   std::string s;
   1674 
   1675   // Function name with type suffix
   1676   std::string mangledName = MangleName(name, outTypeStr, ClassS);
   1677   if (outTypeStr != inTypeStr) {
   1678     // If the input type is different (e.g., for vreinterpret), append a suffix
   1679     // for the input type.  String off a "Q" (quad) prefix so that MangleName
   1680     // does not insert another "q" in the name.
   1681     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
   1682     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
   1683     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
   1684   }
   1685 
   1686   // Emit the FileCheck patterns.
   1687   s += "// CHECK: test_" + mangledName + "\n";
   1688   // s += "// CHECK: \n"; // FIXME: + expected instruction opcode.
   1689 
   1690   // Emit the start of the test function.
   1691   s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
   1692   char arg = 'a';
   1693   std::string comma;
   1694   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
   1695     // Do not create arguments for values that must be immediate constants.
   1696     if (proto[i] == 'i')
   1697       continue;
   1698     s += comma + TypeString(proto[i], inTypeStr) + " ";
   1699     s.push_back(arg);
   1700     comma = ", ";
   1701   }
   1702   s += ") {\n  ";
   1703 
   1704   if (proto[0] != 'v')
   1705     s += "return ";
   1706   s += mangledName + "(";
   1707   arg = 'a';
   1708   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
   1709     if (proto[i] == 'i') {
   1710       // For immediate operands, test the maximum value.
   1711       if (isShift)
   1712         s += "1"; // FIXME
   1713       else
   1714         // The immediate generally refers to a lane in the preceding argument.
   1715         s += utostr(RangeFromType(proto[i-1], inTypeStr));
   1716     } else {
   1717       s.push_back(arg);
   1718     }
   1719     if ((i + 1) < e)
   1720       s += ", ";
   1721   }
   1722   s += ");\n}\n\n";
   1723   return s;
   1724 }
   1725 
   1726 /// runTests - Write out a complete set of tests for all of the Neon
   1727 /// intrinsics.
   1728 void NeonEmitter::runTests(raw_ostream &OS) {
   1729   OS <<
   1730     "// RUN: %clang_cc1 -triple thumbv7-apple-darwin \\\n"
   1731     "// RUN:  -target-cpu cortex-a9 -ffreestanding -S -o - %s | FileCheck %s\n"
   1732     "\n"
   1733     "#include <arm_neon.h>\n"
   1734     "\n";
   1735 
   1736   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
   1737   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   1738     Record *R = RV[i];
   1739     std::string name = R->getValueAsString("Name");
   1740     std::string Proto = R->getValueAsString("Prototype");
   1741     std::string Types = R->getValueAsString("Types");
   1742     bool isShift = R->getValueAsBit("isShift");
   1743 
   1744     SmallVector<StringRef, 16> TypeVec;
   1745     ParseTypes(R, Types, TypeVec);
   1746 
   1747     OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
   1748     if (kind == OpUnavailable)
   1749       continue;
   1750     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   1751       if (kind == OpReinterpret) {
   1752         bool outQuad = false;
   1753         bool dummy = false;
   1754         (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
   1755         for (unsigned srcti = 0, srcte = TypeVec.size();
   1756              srcti != srcte; ++srcti) {
   1757           bool inQuad = false;
   1758           (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
   1759           if (srcti == ti || inQuad != outQuad)
   1760             continue;
   1761           OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], isShift);
   1762         }
   1763       } else {
   1764         OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift);
   1765       }
   1766     }
   1767     OS << "\n";
   1768   }
   1769 }
   1770 
   1771 namespace clang {
   1772 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
   1773   NeonEmitter(Records).run(OS);
   1774 }
   1775 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
   1776   NeonEmitter(Records).runHeader(OS);
   1777 }
   1778 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
   1779   NeonEmitter(Records).runTests(OS);
   1780 }
   1781 } // End namespace clang
   1782