Home | History | Annotate | Download | only in TableGen
      1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
     11 // a declaration and definition of each function specified by the ARM NEON
     12 // compiler interface.  See ARM document DUI0348B.
     13 //
     14 // Each NEON instruction is implemented in terms of 1 or more functions which
     15 // are suffixed with the element type of the input vectors.  Functions may be
     16 // implemented in terms of generic vector operations such as +, *, -, etc. or
     17 // by calling a __builtin_-prefixed function which will be handled by clang's
     18 // CodeGen library.
     19 //
     20 // Additional validation code can be generated by this file when runHeader() is
     21 // called, rather than the normal run() entry point.  A complete set of tests
     22 // for Neon intrinsics can be generated by calling the runTests() entry point.
     23 //
     24 //===----------------------------------------------------------------------===//
     25 
     26 #include "llvm/ADT/DenseMap.h"
     27 #include "llvm/ADT/SmallString.h"
     28 #include "llvm/ADT/SmallVector.h"
     29 #include "llvm/ADT/StringExtras.h"
     30 #include "llvm/ADT/StringMap.h"
     31 #include "llvm/Support/ErrorHandling.h"
     32 #include "llvm/TableGen/Error.h"
     33 #include "llvm/TableGen/Record.h"
     34 #include "llvm/TableGen/TableGenBackend.h"
     35 #include <string>
     36 using namespace llvm;
     37 
     38 enum OpKind {
     39   OpNone,
     40   OpUnavailable,
     41   OpAdd,
     42   OpAddl,
     43   OpAddw,
     44   OpSub,
     45   OpSubl,
     46   OpSubw,
     47   OpMul,
     48   OpMla,
     49   OpMlal,
     50   OpMls,
     51   OpMlsl,
     52   OpMulN,
     53   OpMlaN,
     54   OpMlsN,
     55   OpMlalN,
     56   OpMlslN,
     57   OpMulLane,
     58   OpMullLane,
     59   OpMlaLane,
     60   OpMlsLane,
     61   OpMlalLane,
     62   OpMlslLane,
     63   OpQDMullLane,
     64   OpQDMlalLane,
     65   OpQDMlslLane,
     66   OpQDMulhLane,
     67   OpQRDMulhLane,
     68   OpEq,
     69   OpGe,
     70   OpLe,
     71   OpGt,
     72   OpLt,
     73   OpNeg,
     74   OpNot,
     75   OpAnd,
     76   OpOr,
     77   OpXor,
     78   OpAndNot,
     79   OpOrNot,
     80   OpCast,
     81   OpConcat,
     82   OpDup,
     83   OpDupLane,
     84   OpHi,
     85   OpLo,
     86   OpSelect,
     87   OpRev16,
     88   OpRev32,
     89   OpRev64,
     90   OpReinterpret,
     91   OpAbdl,
     92   OpAba,
     93   OpAbal,
     94   OpDiv
     95 };
     96 
     97 enum ClassKind {
     98   ClassNone,
     99   ClassI,           // generic integer instruction, e.g., "i8" suffix
    100   ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
    101   ClassW,           // width-specific instruction, e.g., "8" suffix
    102   ClassB,           // bitcast arguments with enum argument to specify type
    103   ClassL,           // Logical instructions which are op instructions
    104                     // but we need to not emit any suffix for in our
    105                     // tests.
    106   ClassNoTest       // Instructions which we do not test since they are
    107                     // not TRUE instructions.
    108 };
    109 
    110 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
    111 /// builtins.  These must be kept in sync with the flags in
    112 /// include/clang/Basic/TargetBuiltins.h.
    113 namespace {
    114 class NeonTypeFlags {
    115   enum {
    116     EltTypeMask = 0xf,
    117     UnsignedFlag = 0x10,
    118     QuadFlag = 0x20
    119   };
    120   uint32_t Flags;
    121 
    122 public:
    123   enum EltType {
    124     Int8,
    125     Int16,
    126     Int32,
    127     Int64,
    128     Poly8,
    129     Poly16,
    130     Float16,
    131     Float32,
    132     Float64
    133   };
    134 
    135   NeonTypeFlags(unsigned F) : Flags(F) {}
    136   NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
    137     if (IsUnsigned)
    138       Flags |= UnsignedFlag;
    139     if (IsQuad)
    140       Flags |= QuadFlag;
    141   }
    142 
    143   uint32_t getFlags() const { return Flags; }
    144 };
    145 } // end anonymous namespace
    146 
    147 namespace {
    148 class NeonEmitter {
    149   RecordKeeper &Records;
    150   StringMap<OpKind> OpMap;
    151   DenseMap<Record*, ClassKind> ClassMap;
    152 
    153 public:
    154   NeonEmitter(RecordKeeper &R) : Records(R) {
    155     OpMap["OP_NONE"]  = OpNone;
    156     OpMap["OP_UNAVAILABLE"] = OpUnavailable;
    157     OpMap["OP_ADD"]   = OpAdd;
    158     OpMap["OP_ADDL"]  = OpAddl;
    159     OpMap["OP_ADDW"]  = OpAddw;
    160     OpMap["OP_SUB"]   = OpSub;
    161     OpMap["OP_SUBL"]  = OpSubl;
    162     OpMap["OP_SUBW"]  = OpSubw;
    163     OpMap["OP_MUL"]   = OpMul;
    164     OpMap["OP_MLA"]   = OpMla;
    165     OpMap["OP_MLAL"]  = OpMlal;
    166     OpMap["OP_MLS"]   = OpMls;
    167     OpMap["OP_MLSL"]  = OpMlsl;
    168     OpMap["OP_MUL_N"] = OpMulN;
    169     OpMap["OP_MLA_N"] = OpMlaN;
    170     OpMap["OP_MLS_N"] = OpMlsN;
    171     OpMap["OP_MLAL_N"] = OpMlalN;
    172     OpMap["OP_MLSL_N"] = OpMlslN;
    173     OpMap["OP_MUL_LN"]= OpMulLane;
    174     OpMap["OP_MULL_LN"] = OpMullLane;
    175     OpMap["OP_MLA_LN"]= OpMlaLane;
    176     OpMap["OP_MLS_LN"]= OpMlsLane;
    177     OpMap["OP_MLAL_LN"] = OpMlalLane;
    178     OpMap["OP_MLSL_LN"] = OpMlslLane;
    179     OpMap["OP_QDMULL_LN"] = OpQDMullLane;
    180     OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
    181     OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
    182     OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
    183     OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
    184     OpMap["OP_EQ"]    = OpEq;
    185     OpMap["OP_GE"]    = OpGe;
    186     OpMap["OP_LE"]    = OpLe;
    187     OpMap["OP_GT"]    = OpGt;
    188     OpMap["OP_LT"]    = OpLt;
    189     OpMap["OP_NEG"]   = OpNeg;
    190     OpMap["OP_NOT"]   = OpNot;
    191     OpMap["OP_AND"]   = OpAnd;
    192     OpMap["OP_OR"]    = OpOr;
    193     OpMap["OP_XOR"]   = OpXor;
    194     OpMap["OP_ANDN"]  = OpAndNot;
    195     OpMap["OP_ORN"]   = OpOrNot;
    196     OpMap["OP_CAST"]  = OpCast;
    197     OpMap["OP_CONC"]  = OpConcat;
    198     OpMap["OP_HI"]    = OpHi;
    199     OpMap["OP_LO"]    = OpLo;
    200     OpMap["OP_DUP"]   = OpDup;
    201     OpMap["OP_DUP_LN"] = OpDupLane;
    202     OpMap["OP_SEL"]   = OpSelect;
    203     OpMap["OP_REV16"] = OpRev16;
    204     OpMap["OP_REV32"] = OpRev32;
    205     OpMap["OP_REV64"] = OpRev64;
    206     OpMap["OP_REINT"] = OpReinterpret;
    207     OpMap["OP_ABDL"]  = OpAbdl;
    208     OpMap["OP_ABA"]   = OpAba;
    209     OpMap["OP_ABAL"]  = OpAbal;
    210     OpMap["OP_DIV"] = OpDiv;
    211 
    212     Record *SI = R.getClass("SInst");
    213     Record *II = R.getClass("IInst");
    214     Record *WI = R.getClass("WInst");
    215     Record *SOpI = R.getClass("SOpInst");
    216     Record *IOpI = R.getClass("IOpInst");
    217     Record *WOpI = R.getClass("WOpInst");
    218     Record *LOpI = R.getClass("LOpInst");
    219     Record *NoTestOpI = R.getClass("NoTestOpInst");
    220 
    221     ClassMap[SI] = ClassS;
    222     ClassMap[II] = ClassI;
    223     ClassMap[WI] = ClassW;
    224     ClassMap[SOpI] = ClassS;
    225     ClassMap[IOpI] = ClassI;
    226     ClassMap[WOpI] = ClassW;
    227     ClassMap[LOpI] = ClassL;
    228     ClassMap[NoTestOpI] = ClassNoTest;
    229   }
    230 
    231   // run - Emit arm_neon.h.inc
    232   void run(raw_ostream &o);
    233 
    234   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
    235   void runHeader(raw_ostream &o);
    236 
    237   // runTests - Emit tests for all the Neon intrinsics.
    238   void runTests(raw_ostream &o);
    239 
    240 private:
    241   void emitIntrinsic(raw_ostream &OS, Record *R,
    242                      StringMap<ClassKind> &EmittedMap);
    243   void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
    244                       bool isA64GenBuiltinDef);
    245   void genOverloadTypeCheckCode(raw_ostream &OS,
    246                                 StringMap<ClassKind> &A64IntrinsicMap,
    247                                 bool isA64TypeCheck);
    248   void genIntrinsicRangeCheckCode(raw_ostream &OS,
    249                                   StringMap<ClassKind> &A64IntrinsicMap,
    250                                   bool isA64RangeCheck);
    251   void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
    252                      bool isA64TestGen);
    253 };
    254 } // end anonymous namespace
    255 
    256 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
    257 /// which each StringRef representing a single type declared in the string.
    258 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
    259 /// 2xfloat and 4xfloat respectively.
    260 static void ParseTypes(Record *r, std::string &s,
    261                        SmallVectorImpl<StringRef> &TV) {
    262   const char *data = s.data();
    263   int len = 0;
    264 
    265   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
    266     if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
    267       continue;
    268 
    269     switch (data[len]) {
    270       case 'c':
    271       case 's':
    272       case 'i':
    273       case 'l':
    274       case 'h':
    275       case 'f':
    276       case 'd':
    277         break;
    278       default:
    279         PrintFatalError(r->getLoc(),
    280                       "Unexpected letter: " + std::string(data + len, 1));
    281     }
    282     TV.push_back(StringRef(data, len + 1));
    283     data += len + 1;
    284     len = -1;
    285   }
    286 }
    287 
    288 /// Widen - Convert a type code into the next wider type.  char -> short,
    289 /// short -> int, etc.
    290 static char Widen(const char t) {
    291   switch (t) {
    292     case 'c':
    293       return 's';
    294     case 's':
    295       return 'i';
    296     case 'i':
    297       return 'l';
    298     case 'h':
    299       return 'f';
    300     default:
    301       PrintFatalError("unhandled type in widen!");
    302   }
    303 }
    304 
    305 /// Narrow - Convert a type code into the next smaller type.  short -> char,
    306 /// float -> half float, etc.
    307 static char Narrow(const char t) {
    308   switch (t) {
    309     case 's':
    310       return 'c';
    311     case 'i':
    312       return 's';
    313     case 'l':
    314       return 'i';
    315     case 'f':
    316       return 'h';
    317     default:
    318       PrintFatalError("unhandled type in narrow!");
    319   }
    320 }
    321 
    322 /// For a particular StringRef, return the base type code, and whether it has
    323 /// the quad-vector, polynomial, or unsigned modifiers set.
    324 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
    325   unsigned off = 0;
    326 
    327   // remember quad.
    328   if (ty[off] == 'Q') {
    329     quad = true;
    330     ++off;
    331   }
    332 
    333   // remember poly.
    334   if (ty[off] == 'P') {
    335     poly = true;
    336     ++off;
    337   }
    338 
    339   // remember unsigned.
    340   if (ty[off] == 'U') {
    341     usgn = true;
    342     ++off;
    343   }
    344 
    345   // base type to get the type string for.
    346   return ty[off];
    347 }
    348 
    349 /// ModType - Transform a type code and its modifiers based on a mod code. The
    350 /// mod code definitions may be found at the top of arm_neon.td.
    351 static char ModType(const char mod, char type, bool &quad, bool &poly,
    352                     bool &usgn, bool &scal, bool &cnst, bool &pntr) {
    353   switch (mod) {
    354     case 't':
    355       if (poly) {
    356         poly = false;
    357         usgn = true;
    358       }
    359       break;
    360     case 'u':
    361       usgn = true;
    362       poly = false;
    363       if (type == 'f')
    364         type = 'i';
    365       if (type == 'd')
    366         type = 'l';
    367       break;
    368     case 'x':
    369       usgn = false;
    370       poly = false;
    371       if (type == 'f')
    372         type = 'i';
    373       break;
    374     case 'f':
    375       if (type == 'h')
    376         quad = true;
    377       type = 'f';
    378       usgn = false;
    379       break;
    380     case 'g':
    381       quad = false;
    382       break;
    383     case 'w':
    384       type = Widen(type);
    385       quad = true;
    386       break;
    387     case 'n':
    388       type = Widen(type);
    389       break;
    390     case 'i':
    391       type = 'i';
    392       scal = true;
    393       break;
    394     case 'l':
    395       type = 'l';
    396       scal = true;
    397       usgn = true;
    398       break;
    399     case 's':
    400     case 'a':
    401       scal = true;
    402       break;
    403     case 'k':
    404       quad = true;
    405       break;
    406     case 'c':
    407       cnst = true;
    408     case 'p':
    409       pntr = true;
    410       scal = true;
    411       break;
    412     case 'h':
    413       type = Narrow(type);
    414       if (type == 'h')
    415         quad = false;
    416       break;
    417     case 'e':
    418       type = Narrow(type);
    419       usgn = true;
    420       break;
    421     default:
    422       break;
    423   }
    424   return type;
    425 }
    426 
    427 /// TypeString - for a modifier and type, generate the name of the typedef for
    428 /// that type.  QUc -> uint8x8_t.
    429 static std::string TypeString(const char mod, StringRef typestr) {
    430   bool quad = false;
    431   bool poly = false;
    432   bool usgn = false;
    433   bool scal = false;
    434   bool cnst = false;
    435   bool pntr = false;
    436 
    437   if (mod == 'v')
    438     return "void";
    439   if (mod == 'i')
    440     return "int";
    441 
    442   // base type to get the type string for.
    443   char type = ClassifyType(typestr, quad, poly, usgn);
    444 
    445   // Based on the modifying character, change the type and width if necessary.
    446   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
    447 
    448   SmallString<128> s;
    449 
    450   if (usgn)
    451     s.push_back('u');
    452 
    453   switch (type) {
    454     case 'c':
    455       s += poly ? "poly8" : "int8";
    456       if (scal)
    457         break;
    458       s += quad ? "x16" : "x8";
    459       break;
    460     case 's':
    461       s += poly ? "poly16" : "int16";
    462       if (scal)
    463         break;
    464       s += quad ? "x8" : "x4";
    465       break;
    466     case 'i':
    467       s += "int32";
    468       if (scal)
    469         break;
    470       s += quad ? "x4" : "x2";
    471       break;
    472     case 'l':
    473       s += "int64";
    474       if (scal)
    475         break;
    476       s += quad ? "x2" : "x1";
    477       break;
    478     case 'h':
    479       s += "float16";
    480       if (scal)
    481         break;
    482       s += quad ? "x8" : "x4";
    483       break;
    484     case 'f':
    485       s += "float32";
    486       if (scal)
    487         break;
    488       s += quad ? "x4" : "x2";
    489       break;
    490     case 'd':
    491       s += "float64";
    492       if (scal)
    493         break;
    494       s += quad ? "x2" : "x1";
    495       break;
    496 
    497     default:
    498       PrintFatalError("unhandled type!");
    499   }
    500 
    501   if (mod == '2')
    502     s += "x2";
    503   if (mod == '3')
    504     s += "x3";
    505   if (mod == '4')
    506     s += "x4";
    507 
    508   // Append _t, finishing the type string typedef type.
    509   s += "_t";
    510 
    511   if (cnst)
    512     s += " const";
    513 
    514   if (pntr)
    515     s += " *";
    516 
    517   return s.str();
    518 }
    519 
    520 /// BuiltinTypeString - for a modifier and type, generate the clang
    521 /// BuiltinsARM.def prototype code for the function.  See the top of clang's
    522 /// Builtins.def for a description of the type strings.
    523 static std::string BuiltinTypeString(const char mod, StringRef typestr,
    524                                      ClassKind ck, bool ret) {
    525   bool quad = false;
    526   bool poly = false;
    527   bool usgn = false;
    528   bool scal = false;
    529   bool cnst = false;
    530   bool pntr = false;
    531 
    532   if (mod == 'v')
    533     return "v"; // void
    534   if (mod == 'i')
    535     return "i"; // int
    536 
    537   // base type to get the type string for.
    538   char type = ClassifyType(typestr, quad, poly, usgn);
    539 
    540   // Based on the modifying character, change the type and width if necessary.
    541   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
    542 
    543   // All pointers are void* pointers.  Change type to 'v' now.
    544   if (pntr) {
    545     usgn = false;
    546     poly = false;
    547     type = 'v';
    548   }
    549   // Treat half-float ('h') types as unsigned short ('s') types.
    550   if (type == 'h') {
    551     type = 's';
    552     usgn = true;
    553   }
    554   usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
    555 
    556   if (scal) {
    557     SmallString<128> s;
    558 
    559     if (usgn)
    560       s.push_back('U');
    561     else if (type == 'c')
    562       s.push_back('S'); // make chars explicitly signed
    563 
    564     if (type == 'l') // 64-bit long
    565       s += "LLi";
    566     else
    567       s.push_back(type);
    568 
    569     if (cnst)
    570       s.push_back('C');
    571     if (pntr)
    572       s.push_back('*');
    573     return s.str();
    574   }
    575 
    576   // Since the return value must be one type, return a vector type of the
    577   // appropriate width which we will bitcast.  An exception is made for
    578   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
    579   // fashion, storing them to a pointer arg.
    580   if (ret) {
    581     if (mod >= '2' && mod <= '4')
    582       return "vv*"; // void result with void* first argument
    583     if (mod == 'f' || (ck != ClassB && type == 'f'))
    584       return quad ? "V4f" : "V2f";
    585     if (ck != ClassB && type == 's')
    586       return quad ? "V8s" : "V4s";
    587     if (ck != ClassB && type == 'i')
    588       return quad ? "V4i" : "V2i";
    589     if (ck != ClassB && type == 'l')
    590       return quad ? "V2LLi" : "V1LLi";
    591 
    592     return quad ? "V16Sc" : "V8Sc";
    593   }
    594 
    595   // Non-return array types are passed as individual vectors.
    596   if (mod == '2')
    597     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
    598   if (mod == '3')
    599     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
    600   if (mod == '4')
    601     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
    602 
    603   if (mod == 'f' || (ck != ClassB && type == 'f'))
    604     return quad ? "V4f" : "V2f";
    605   if (ck != ClassB && type == 's')
    606     return quad ? "V8s" : "V4s";
    607   if (ck != ClassB && type == 'i')
    608     return quad ? "V4i" : "V2i";
    609   if (ck != ClassB && type == 'l')
    610     return quad ? "V2LLi" : "V1LLi";
    611 
    612   return quad ? "V16Sc" : "V8Sc";
    613 }
    614 
    615 /// InstructionTypeCode - Computes the ARM argument character code and
    616 /// quad status for a specific type string and ClassKind.
    617 static void InstructionTypeCode(const StringRef &typeStr,
    618                                 const ClassKind ck,
    619                                 bool &quad,
    620                                 std::string &typeCode) {
    621   bool poly = false;
    622   bool usgn = false;
    623   char type = ClassifyType(typeStr, quad, poly, usgn);
    624 
    625   switch (type) {
    626   case 'c':
    627     switch (ck) {
    628     case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
    629     case ClassI: typeCode = "i8"; break;
    630     case ClassW: typeCode = "8"; break;
    631     default: break;
    632     }
    633     break;
    634   case 's':
    635     switch (ck) {
    636     case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
    637     case ClassI: typeCode = "i16"; break;
    638     case ClassW: typeCode = "16"; break;
    639     default: break;
    640     }
    641     break;
    642   case 'i':
    643     switch (ck) {
    644     case ClassS: typeCode = usgn ? "u32" : "s32"; break;
    645     case ClassI: typeCode = "i32"; break;
    646     case ClassW: typeCode = "32"; break;
    647     default: break;
    648     }
    649     break;
    650   case 'l':
    651     switch (ck) {
    652     case ClassS: typeCode = usgn ? "u64" : "s64"; break;
    653     case ClassI: typeCode = "i64"; break;
    654     case ClassW: typeCode = "64"; break;
    655     default: break;
    656     }
    657     break;
    658   case 'h':
    659     switch (ck) {
    660     case ClassS:
    661     case ClassI: typeCode = "f16"; break;
    662     case ClassW: typeCode = "16"; break;
    663     default: break;
    664     }
    665     break;
    666   case 'f':
    667     switch (ck) {
    668     case ClassS:
    669     case ClassI: typeCode = "f32"; break;
    670     case ClassW: typeCode = "32"; break;
    671     default: break;
    672     }
    673     break;
    674   case 'd':
    675     switch (ck) {
    676     case ClassS:
    677     case ClassI:
    678       typeCode += "f64";
    679       break;
    680     case ClassW:
    681       PrintFatalError("unhandled type!");
    682     default:
    683       break;
    684     }
    685     break;
    686   default:
    687     PrintFatalError("unhandled type!");
    688   }
    689 }
    690 
    691 /// MangleName - Append a type or width suffix to a base neon function name,
    692 /// and insert a 'q' in the appropriate location if the operation works on
    693 /// 128b rather than 64b.   E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
    694 static std::string MangleName(const std::string &name, StringRef typestr,
    695                               ClassKind ck) {
    696   if (name == "vcvt_f32_f16")
    697     return name;
    698 
    699   bool quad = false;
    700   std::string typeCode = "";
    701 
    702   InstructionTypeCode(typestr, ck, quad, typeCode);
    703 
    704   std::string s = name;
    705 
    706   if (typeCode.size() > 0) {
    707     s += "_" + typeCode;
    708   }
    709 
    710   if (ck == ClassB)
    711     s += "_v";
    712 
    713   // Insert a 'q' before the first '_' character so that it ends up before
    714   // _lane or _n on vector-scalar operations.
    715   if (quad) {
    716     size_t pos = s.find('_');
    717     s = s.insert(pos, "q");
    718   }
    719 
    720   return s;
    721 }
    722 
    723 static void PreprocessInstruction(const StringRef &Name,
    724                                   const std::string &InstName,
    725                                   std::string &Prefix,
    726                                   bool &HasNPostfix,
    727                                   bool &HasLanePostfix,
    728                                   bool &HasDupPostfix,
    729                                   bool &IsSpecialVCvt,
    730                                   size_t &TBNumber) {
    731   // All of our instruction name fields from arm_neon.td are of the form
    732   //   <instructionname>_...
    733   // Thus we grab our instruction name via computation of said Prefix.
    734   const size_t PrefixEnd = Name.find_first_of('_');
    735   // If InstName is passed in, we use that instead of our name Prefix.
    736   Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
    737 
    738   const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
    739 
    740   HasNPostfix = Postfix.count("_n");
    741   HasLanePostfix = Postfix.count("_lane");
    742   HasDupPostfix = Postfix.count("_dup");
    743   IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
    744 
    745   if (InstName.compare("vtbl") == 0 ||
    746       InstName.compare("vtbx") == 0) {
    747     // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
    748     // encoding to get its true value.
    749     TBNumber = Name[Name.size()-1] - 48;
    750   }
    751 }
    752 
    753 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
    754 /// extracted, generate a FileCheck pattern for a Load Or Store
    755 static void
    756 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
    757                                           const std::string& OutTypeCode,
    758                                           const bool &IsQuad,
    759                                           const bool &HasDupPostfix,
    760                                           const bool &HasLanePostfix,
    761                                           const size_t Count,
    762                                           std::string &RegisterSuffix) {
    763   const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
    764   // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
    765   // will output a series of v{ld,st}1s, so we have to handle it specially.
    766   if ((Count == 3 || Count == 4) && IsQuad) {
    767     RegisterSuffix += "{";
    768     for (size_t i = 0; i < Count; i++) {
    769       RegisterSuffix += "d{{[0-9]+}}";
    770       if (HasDupPostfix) {
    771         RegisterSuffix += "[]";
    772       }
    773       if (HasLanePostfix) {
    774         RegisterSuffix += "[{{[0-9]+}}]";
    775       }
    776       if (i < Count-1) {
    777         RegisterSuffix += ", ";
    778       }
    779     }
    780     RegisterSuffix += "}";
    781   } else {
    782 
    783     // Handle normal loads and stores.
    784     RegisterSuffix += "{";
    785     for (size_t i = 0; i < Count; i++) {
    786       RegisterSuffix += "d{{[0-9]+}}";
    787       if (HasDupPostfix) {
    788         RegisterSuffix += "[]";
    789       }
    790       if (HasLanePostfix) {
    791         RegisterSuffix += "[{{[0-9]+}}]";
    792       }
    793       if (IsQuad && !HasLanePostfix) {
    794         RegisterSuffix += ", d{{[0-9]+}}";
    795         if (HasDupPostfix) {
    796           RegisterSuffix += "[]";
    797         }
    798       }
    799       if (i < Count-1) {
    800         RegisterSuffix += ", ";
    801       }
    802     }
    803     RegisterSuffix += "}, [r{{[0-9]+}}";
    804 
    805     // We only include the alignment hint if we have a vld1.*64 or
    806     // a dup/lane instruction.
    807     if (IsLDSTOne) {
    808       if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
    809         RegisterSuffix += ":" + OutTypeCode;
    810       }
    811     }
    812 
    813     RegisterSuffix += "]";
    814   }
    815 }
    816 
    817 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
    818                                      const bool &HasNPostfix) {
    819   return (NameRef.count("vmla") ||
    820           NameRef.count("vmlal") ||
    821           NameRef.count("vmlsl") ||
    822           NameRef.count("vmull") ||
    823           NameRef.count("vqdmlal") ||
    824           NameRef.count("vqdmlsl") ||
    825           NameRef.count("vqdmulh") ||
    826           NameRef.count("vqdmull") ||
    827           NameRef.count("vqrdmulh")) && HasNPostfix;
    828 }
    829 
    830 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
    831                                          const bool &HasLanePostfix) {
    832   return (NameRef.count("vmla") ||
    833           NameRef.count("vmls") ||
    834           NameRef.count("vmlal") ||
    835           NameRef.count("vmlsl") ||
    836           (NameRef.count("vmul") && NameRef.size() == 3)||
    837           NameRef.count("vqdmlal") ||
    838           NameRef.count("vqdmlsl") ||
    839           NameRef.count("vqdmulh") ||
    840           NameRef.count("vqrdmulh")) && HasLanePostfix;
    841 }
    842 
    843 static bool IsSpecialLaneMultiply(const StringRef &NameRef,
    844                                   const bool &HasLanePostfix,
    845                                   const bool &IsQuad) {
    846   const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
    847                                && IsQuad;
    848   const bool IsVMull = NameRef.count("mull") && !IsQuad;
    849   return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
    850 }
    851 
    852 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
    853                                                      const std::string &Proto,
    854                                                      const bool &HasNPostfix,
    855                                                      const bool &IsQuad,
    856                                                      const bool &HasLanePostfix,
    857                                                      const bool &HasDupPostfix,
    858                                                      std::string &NormedProto) {
    859   // Handle generic case.
    860   const StringRef NameRef(Name);
    861   for (size_t i = 0, end = Proto.size(); i < end; i++) {
    862     switch (Proto[i]) {
    863     case 'u':
    864     case 'f':
    865     case 'd':
    866     case 's':
    867     case 'x':
    868     case 't':
    869     case 'n':
    870       NormedProto += IsQuad? 'q' : 'd';
    871       break;
    872     case 'w':
    873     case 'k':
    874       NormedProto += 'q';
    875       break;
    876     case 'g':
    877     case 'h':
    878     case 'e':
    879       NormedProto += 'd';
    880       break;
    881     case 'i':
    882       NormedProto += HasLanePostfix? 'a' : 'i';
    883       break;
    884     case 'a':
    885       if (HasLanePostfix) {
    886         NormedProto += 'a';
    887       } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
    888         NormedProto += IsQuad? 'q' : 'd';
    889       } else {
    890         NormedProto += 'i';
    891       }
    892       break;
    893     }
    894   }
    895 
    896   // Handle Special Cases.
    897   const bool IsNotVExt = !NameRef.count("vext");
    898   const bool IsVPADAL = NameRef.count("vpadal");
    899   const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
    900                                                            HasLanePostfix);
    901   const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
    902                                                       IsQuad);
    903 
    904   if (IsSpecialLaneMul) {
    905     // If
    906     NormedProto[2] = NormedProto[3];
    907     NormedProto.erase(3);
    908   } else if (NormedProto.size() == 4 &&
    909              NormedProto[0] == NormedProto[1] &&
    910              IsNotVExt) {
    911     // If NormedProto.size() == 4 and the first two proto characters are the
    912     // same, ignore the first.
    913     NormedProto = NormedProto.substr(1, 3);
    914   } else if (Is5OpLaneAccum) {
    915     // If we have a 5 op lane accumulator operation, we take characters 1,2,4
    916     std::string tmp = NormedProto.substr(1,2);
    917     tmp += NormedProto[4];
    918     NormedProto = tmp;
    919   } else if (IsVPADAL) {
    920     // If we have VPADAL, ignore the first character.
    921     NormedProto = NormedProto.substr(0, 2);
    922   } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
    923     // If our instruction is a dup instruction, keep only the first and
    924     // last characters.
    925     std::string tmp = "";
    926     tmp += NormedProto[0];
    927     tmp += NormedProto[NormedProto.size()-1];
    928     NormedProto = tmp;
    929   }
    930 }
    931 
    932 /// GenerateRegisterCheckPatterns - Given a bunch of data we have
    933 /// extracted, generate a FileCheck pattern to check that an
    934 /// instruction's arguments are correct.
    935 static void GenerateRegisterCheckPattern(const std::string &Name,
    936                                          const std::string &Proto,
    937                                          const std::string &OutTypeCode,
    938                                          const bool &HasNPostfix,
    939                                          const bool &IsQuad,
    940                                          const bool &HasLanePostfix,
    941                                          const bool &HasDupPostfix,
    942                                          const size_t &TBNumber,
    943                                          std::string &RegisterSuffix) {
    944 
    945   RegisterSuffix = "";
    946 
    947   const StringRef NameRef(Name);
    948   const StringRef ProtoRef(Proto);
    949 
    950   if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
    951     return;
    952   }
    953 
    954   const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
    955   const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
    956 
    957   if (IsLoadStore) {
    958     // Grab N value from  v{ld,st}N using its ascii representation.
    959     const size_t Count = NameRef[3] - 48;
    960 
    961     GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
    962                                               HasDupPostfix, HasLanePostfix,
    963                                               Count, RegisterSuffix);
    964   } else if (IsTBXOrTBL) {
    965     RegisterSuffix += "d{{[0-9]+}}, {";
    966     for (size_t i = 0; i < TBNumber-1; i++) {
    967       RegisterSuffix += "d{{[0-9]+}}, ";
    968     }
    969     RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
    970   } else {
    971     // Handle a normal instruction.
    972     if (NameRef.count("vget") || NameRef.count("vset"))
    973       return;
    974 
    975     // We first normalize our proto, since we only need to emit 4
    976     // different types of checks, yet have more than 4 proto types
    977     // that map onto those 4 patterns.
    978     std::string NormalizedProto("");
    979     NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
    980                                              HasLanePostfix, HasDupPostfix,
    981                                              NormalizedProto);
    982 
    983     for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
    984       const char &c = NormalizedProto[i];
    985       switch (c) {
    986       case 'q':
    987         RegisterSuffix += "q{{[0-9]+}}, ";
    988         break;
    989 
    990       case 'd':
    991         RegisterSuffix += "d{{[0-9]+}}, ";
    992         break;
    993 
    994       case 'i':
    995         RegisterSuffix += "#{{[0-9]+}}, ";
    996         break;
    997 
    998       case 'a':
    999         RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
   1000         break;
   1001       }
   1002     }
   1003 
   1004     // Remove extra ", ".
   1005     RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
   1006   }
   1007 }
   1008 
   1009 /// GenerateChecksForIntrinsic - Given a specific instruction name +
   1010 /// typestr + class kind, generate the proper set of FileCheck
   1011 /// Patterns to check for. We could just return a string, but instead
   1012 /// use a vector since it provides us with the extra flexibility of
   1013 /// emitting multiple checks, which comes in handy for certain cases
   1014 /// like mla where we want to check for 2 different instructions.
   1015 static void GenerateChecksForIntrinsic(const std::string &Name,
   1016                                        const std::string &Proto,
   1017                                        StringRef &OutTypeStr,
   1018                                        StringRef &InTypeStr,
   1019                                        ClassKind Ck,
   1020                                        const std::string &InstName,
   1021                                        bool IsHiddenLOp,
   1022                                        std::vector<std::string>& Result) {
   1023 
   1024   // If Ck is a ClassNoTest instruction, just return so no test is
   1025   // emitted.
   1026   if(Ck == ClassNoTest)
   1027     return;
   1028 
   1029   if (Name == "vcvt_f32_f16") {
   1030     Result.push_back("vcvt.f32.f16");
   1031     return;
   1032   }
   1033 
   1034 
   1035   // Now we preprocess our instruction given the data we have to get the
   1036   // data that we need.
   1037   // Create a StringRef for String Manipulation of our Name.
   1038   const StringRef NameRef(Name);
   1039   // Instruction Prefix.
   1040   std::string Prefix;
   1041   // The type code for our out type string.
   1042   std::string OutTypeCode;
   1043   // To handle our different cases, we need to check for different postfixes.
   1044   // Is our instruction a quad instruction.
   1045   bool IsQuad = false;
   1046   // Our instruction is of the form <instructionname>_n.
   1047   bool HasNPostfix = false;
   1048   // Our instruction is of the form <instructionname>_lane.
   1049   bool HasLanePostfix = false;
   1050   // Our instruction is of the form <instructionname>_dup.
   1051   bool HasDupPostfix  = false;
   1052   // Our instruction is a vcvt instruction which requires special handling.
   1053   bool IsSpecialVCvt = false;
   1054   // If we have a vtbxN or vtblN instruction, this is set to N.
   1055   size_t TBNumber = -1;
   1056   // Register Suffix
   1057   std::string RegisterSuffix;
   1058 
   1059   PreprocessInstruction(NameRef, InstName, Prefix,
   1060                         HasNPostfix, HasLanePostfix, HasDupPostfix,
   1061                         IsSpecialVCvt, TBNumber);
   1062 
   1063   InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
   1064   GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
   1065                                HasLanePostfix, HasDupPostfix, TBNumber,
   1066                                RegisterSuffix);
   1067 
   1068   // In the following section, we handle a bunch of special cases. You can tell
   1069   // a special case by the fact we are returning early.
   1070 
   1071   // If our instruction is a logical instruction without postfix or a
   1072   // hidden LOp just return the current Prefix.
   1073   if (Ck == ClassL || IsHiddenLOp) {
   1074     Result.push_back(Prefix + " " + RegisterSuffix);
   1075     return;
   1076   }
   1077 
   1078   // If we have a vmov, due to the many different cases, some of which
   1079   // vary within the different intrinsics generated for a single
   1080   // instruction type, just output a vmov. (e.g. given an instruction
   1081   // A, A.u32 might be vmov and A.u8 might be vmov.8).
   1082   //
   1083   // FIXME: Maybe something can be done about this. The two cases that we care
   1084   // about are vmov as an LType and vmov as a WType.
   1085   if (Prefix == "vmov") {
   1086     Result.push_back(Prefix + " " + RegisterSuffix);
   1087     return;
   1088   }
   1089 
   1090   // In the following section, we handle special cases.
   1091 
   1092   if (OutTypeCode == "64") {
   1093     // If we have a 64 bit vdup/vext and are handling an uint64x1_t
   1094     // type, the intrinsic will be optimized away, so just return
   1095     // nothing.  On the other hand if we are handling an uint64x2_t
   1096     // (i.e. quad instruction), vdup/vmov instructions should be
   1097     // emitted.
   1098     if (Prefix == "vdup" || Prefix == "vext") {
   1099       if (IsQuad) {
   1100         Result.push_back("{{vmov|vdup}}");
   1101       }
   1102       return;
   1103     }
   1104 
   1105     // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
   1106     // multiple register operands.
   1107     bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
   1108                             || Prefix == "vld4";
   1109     bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
   1110                             || Prefix == "vst4";
   1111     if (MultiLoadPrefix || MultiStorePrefix) {
   1112       Result.push_back(NameRef.slice(0, 3).str() + "1.64");
   1113       return;
   1114     }
   1115 
   1116     // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
   1117     // emitting said instructions. So return a check for
   1118     // vldr/vstr/vmov/str instead.
   1119     if (HasLanePostfix || HasDupPostfix) {
   1120       if (Prefix == "vst1") {
   1121         Result.push_back("{{str|vstr|vmov}}");
   1122         return;
   1123       } else if (Prefix == "vld1") {
   1124         Result.push_back("{{ldr|vldr|vmov}}");
   1125         return;
   1126       }
   1127     }
   1128   }
   1129 
   1130   // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
   1131   // sometimes disassembled as vtrn.32. We use a regex to handle both
   1132   // cases.
   1133   if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
   1134     Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
   1135     return;
   1136   }
   1137 
   1138   // Currently on most ARM processors, we do not use vmla/vmls for
   1139   // quad floating point operations. Instead we output vmul + vadd. So
   1140   // check if we have one of those instructions and just output a
   1141   // check for vmul.
   1142   if (OutTypeCode == "f32") {
   1143     if (Prefix == "vmls") {
   1144       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
   1145       Result.push_back("vsub." + OutTypeCode);
   1146       return;
   1147     } else if (Prefix == "vmla") {
   1148       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
   1149       Result.push_back("vadd." + OutTypeCode);
   1150       return;
   1151     }
   1152   }
   1153 
   1154   // If we have vcvt, get the input type from the instruction name
   1155   // (which should be of the form instname_inputtype) and append it
   1156   // before the output type.
   1157   if (Prefix == "vcvt") {
   1158     const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
   1159     Prefix += "." + inTypeCode;
   1160   }
   1161 
   1162   // Append output type code to get our final mangled instruction.
   1163   Prefix += "." + OutTypeCode;
   1164 
   1165   Result.push_back(Prefix + " " + RegisterSuffix);
   1166 }
   1167 
   1168 /// UseMacro - Examine the prototype string to determine if the intrinsic
   1169 /// should be defined as a preprocessor macro instead of an inline function.
   1170 static bool UseMacro(const std::string &proto) {
   1171   // If this builtin takes an immediate argument, we need to #define it rather
   1172   // than use a standard declaration, so that SemaChecking can range check
   1173   // the immediate passed by the user.
   1174   if (proto.find('i') != std::string::npos)
   1175     return true;
   1176 
   1177   // Pointer arguments need to use macros to avoid hiding aligned attributes
   1178   // from the pointer type.
   1179   if (proto.find('p') != std::string::npos ||
   1180       proto.find('c') != std::string::npos)
   1181     return true;
   1182 
   1183   return false;
   1184 }
   1185 
   1186 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
   1187 /// defined as a macro should be accessed directly instead of being first
   1188 /// assigned to a local temporary.
   1189 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
   1190   // True for constant ints (i), pointers (p) and const pointers (c).
   1191   return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
   1192 }
   1193 
   1194 // Generate the string "(argtype a, argtype b, ...)"
   1195 static std::string GenArgs(const std::string &proto, StringRef typestr) {
   1196   bool define = UseMacro(proto);
   1197   char arg = 'a';
   1198 
   1199   std::string s;
   1200   s += "(";
   1201 
   1202   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
   1203     if (define) {
   1204       // Some macro arguments are used directly instead of being assigned
   1205       // to local temporaries; prepend an underscore prefix to make their
   1206       // names consistent with the local temporaries.
   1207       if (MacroArgUsedDirectly(proto, i))
   1208         s += "__";
   1209     } else {
   1210       s += TypeString(proto[i], typestr) + " __";
   1211     }
   1212     s.push_back(arg);
   1213     if ((i + 1) < e)
   1214       s += ", ";
   1215   }
   1216 
   1217   s += ")";
   1218   return s;
   1219 }
   1220 
   1221 // Macro arguments are not type-checked like inline function arguments, so
   1222 // assign them to local temporaries to get the right type checking.
   1223 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
   1224   char arg = 'a';
   1225   std::string s;
   1226   bool generatedLocal = false;
   1227 
   1228   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
   1229     // Do not create a temporary for an immediate argument.
   1230     // That would defeat the whole point of using a macro!
   1231     if (MacroArgUsedDirectly(proto, i))
   1232       continue;
   1233     generatedLocal = true;
   1234 
   1235     s += TypeString(proto[i], typestr) + " __";
   1236     s.push_back(arg);
   1237     s += " = (";
   1238     s.push_back(arg);
   1239     s += "); ";
   1240   }
   1241 
   1242   if (generatedLocal)
   1243     s += "\\\n  ";
   1244   return s;
   1245 }
   1246 
   1247 // Use the vmovl builtin to sign-extend or zero-extend a vector.
   1248 static std::string Extend(StringRef typestr, const std::string &a) {
   1249   std::string s;
   1250   s = MangleName("vmovl", typestr, ClassS);
   1251   s += "(" + a + ")";
   1252   return s;
   1253 }
   1254 
   1255 static std::string Duplicate(unsigned nElts, StringRef typestr,
   1256                              const std::string &a) {
   1257   std::string s;
   1258 
   1259   s = "(" + TypeString('d', typestr) + "){ ";
   1260   for (unsigned i = 0; i != nElts; ++i) {
   1261     s += a;
   1262     if ((i + 1) < nElts)
   1263       s += ", ";
   1264   }
   1265   s += " }";
   1266 
   1267   return s;
   1268 }
   1269 
   1270 static std::string SplatLane(unsigned nElts, const std::string &vec,
   1271                              const std::string &lane) {
   1272   std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
   1273   for (unsigned i = 0; i < nElts; ++i)
   1274     s += ", " + lane;
   1275   s += ")";
   1276   return s;
   1277 }
   1278 
   1279 static unsigned GetNumElements(StringRef typestr, bool &quad) {
   1280   quad = false;
   1281   bool dummy = false;
   1282   char type = ClassifyType(typestr, quad, dummy, dummy);
   1283   unsigned nElts = 0;
   1284   switch (type) {
   1285   case 'c': nElts = 8; break;
   1286   case 's': nElts = 4; break;
   1287   case 'i': nElts = 2; break;
   1288   case 'l': nElts = 1; break;
   1289   case 'h': nElts = 4; break;
   1290   case 'f': nElts = 2; break;
   1291   case 'd':
   1292     nElts = 1;
   1293     break;
   1294   default:
   1295     PrintFatalError("unhandled type!");
   1296   }
   1297   if (quad) nElts <<= 1;
   1298   return nElts;
   1299 }
   1300 
   1301 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
   1302 static std::string GenOpString(OpKind op, const std::string &proto,
   1303                                StringRef typestr) {
   1304   bool quad;
   1305   unsigned nElts = GetNumElements(typestr, quad);
   1306   bool define = UseMacro(proto);
   1307 
   1308   std::string ts = TypeString(proto[0], typestr);
   1309   std::string s;
   1310   if (!define) {
   1311     s = "return ";
   1312   }
   1313 
   1314   switch(op) {
   1315   case OpAdd:
   1316     s += "__a + __b;";
   1317     break;
   1318   case OpAddl:
   1319     s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
   1320     break;
   1321   case OpAddw:
   1322     s += "__a + " + Extend(typestr, "__b") + ";";
   1323     break;
   1324   case OpSub:
   1325     s += "__a - __b;";
   1326     break;
   1327   case OpSubl:
   1328     s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
   1329     break;
   1330   case OpSubw:
   1331     s += "__a - " + Extend(typestr, "__b") + ";";
   1332     break;
   1333   case OpMulN:
   1334     s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
   1335     break;
   1336   case OpMulLane:
   1337     s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
   1338     break;
   1339   case OpMul:
   1340     s += "__a * __b;";
   1341     break;
   1342   case OpMullLane:
   1343     s += MangleName("vmull", typestr, ClassS) + "(__a, " +
   1344       SplatLane(nElts, "__b", "__c") + ");";
   1345     break;
   1346   case OpMlaN:
   1347     s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
   1348     break;
   1349   case OpMlaLane:
   1350     s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
   1351     break;
   1352   case OpMla:
   1353     s += "__a + (__b * __c);";
   1354     break;
   1355   case OpMlalN:
   1356     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
   1357       Duplicate(nElts, typestr, "__c") + ");";
   1358     break;
   1359   case OpMlalLane:
   1360     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
   1361       SplatLane(nElts, "__c", "__d") + ");";
   1362     break;
   1363   case OpMlal:
   1364     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
   1365     break;
   1366   case OpMlsN:
   1367     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
   1368     break;
   1369   case OpMlsLane:
   1370     s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
   1371     break;
   1372   case OpMls:
   1373     s += "__a - (__b * __c);";
   1374     break;
   1375   case OpMlslN:
   1376     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
   1377       Duplicate(nElts, typestr, "__c") + ");";
   1378     break;
   1379   case OpMlslLane:
   1380     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
   1381       SplatLane(nElts, "__c", "__d") + ");";
   1382     break;
   1383   case OpMlsl:
   1384     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
   1385     break;
   1386   case OpQDMullLane:
   1387     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
   1388       SplatLane(nElts, "__b", "__c") + ");";
   1389     break;
   1390   case OpQDMlalLane:
   1391     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
   1392       SplatLane(nElts, "__c", "__d") + ");";
   1393     break;
   1394   case OpQDMlslLane:
   1395     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
   1396       SplatLane(nElts, "__c", "__d") + ");";
   1397     break;
   1398   case OpQDMulhLane:
   1399     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
   1400       SplatLane(nElts, "__b", "__c") + ");";
   1401     break;
   1402   case OpQRDMulhLane:
   1403     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
   1404       SplatLane(nElts, "__b", "__c") + ");";
   1405     break;
   1406   case OpEq:
   1407     s += "(" + ts + ")(__a == __b);";
   1408     break;
   1409   case OpGe:
   1410     s += "(" + ts + ")(__a >= __b);";
   1411     break;
   1412   case OpLe:
   1413     s += "(" + ts + ")(__a <= __b);";
   1414     break;
   1415   case OpGt:
   1416     s += "(" + ts + ")(__a > __b);";
   1417     break;
   1418   case OpLt:
   1419     s += "(" + ts + ")(__a < __b);";
   1420     break;
   1421   case OpNeg:
   1422     s += " -__a;";
   1423     break;
   1424   case OpNot:
   1425     s += " ~__a;";
   1426     break;
   1427   case OpAnd:
   1428     s += "__a & __b;";
   1429     break;
   1430   case OpOr:
   1431     s += "__a | __b;";
   1432     break;
   1433   case OpXor:
   1434     s += "__a ^ __b;";
   1435     break;
   1436   case OpAndNot:
   1437     s += "__a & ~__b;";
   1438     break;
   1439   case OpOrNot:
   1440     s += "__a | ~__b;";
   1441     break;
   1442   case OpCast:
   1443     s += "(" + ts + ")__a;";
   1444     break;
   1445   case OpConcat:
   1446     s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
   1447     s += ", (int64x1_t)__b, 0, 1);";
   1448     break;
   1449   case OpHi:
   1450     // nElts is for the result vector, so the source is twice that number.
   1451     s += "__builtin_shufflevector(__a, __a";
   1452     for (unsigned i = nElts; i < nElts * 2; ++i)
   1453       s += ", " + utostr(i);
   1454     s+= ");";
   1455     break;
   1456   case OpLo:
   1457     s += "__builtin_shufflevector(__a, __a";
   1458     for (unsigned i = 0; i < nElts; ++i)
   1459       s += ", " + utostr(i);
   1460     s+= ");";
   1461     break;
   1462   case OpDup:
   1463     s += Duplicate(nElts, typestr, "__a") + ";";
   1464     break;
   1465   case OpDupLane:
   1466     s += SplatLane(nElts, "__a", "__b") + ";";
   1467     break;
   1468   case OpSelect:
   1469     // ((0 & 1) | (~0 & 2))
   1470     s += "(" + ts + ")";
   1471     ts = TypeString(proto[1], typestr);
   1472     s += "((__a & (" + ts + ")__b) | ";
   1473     s += "(~__a & (" + ts + ")__c));";
   1474     break;
   1475   case OpRev16:
   1476     s += "__builtin_shufflevector(__a, __a";
   1477     for (unsigned i = 2; i <= nElts; i += 2)
   1478       for (unsigned j = 0; j != 2; ++j)
   1479         s += ", " + utostr(i - j - 1);
   1480     s += ");";
   1481     break;
   1482   case OpRev32: {
   1483     unsigned WordElts = nElts >> (1 + (int)quad);
   1484     s += "__builtin_shufflevector(__a, __a";
   1485     for (unsigned i = WordElts; i <= nElts; i += WordElts)
   1486       for (unsigned j = 0; j != WordElts; ++j)
   1487         s += ", " + utostr(i - j - 1);
   1488     s += ");";
   1489     break;
   1490   }
   1491   case OpRev64: {
   1492     unsigned DblWordElts = nElts >> (int)quad;
   1493     s += "__builtin_shufflevector(__a, __a";
   1494     for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
   1495       for (unsigned j = 0; j != DblWordElts; ++j)
   1496         s += ", " + utostr(i - j - 1);
   1497     s += ");";
   1498     break;
   1499   }
   1500   case OpAbdl: {
   1501     std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
   1502     if (typestr[0] != 'U') {
   1503       // vabd results are always unsigned and must be zero-extended.
   1504       std::string utype = "U" + typestr.str();
   1505       s += "(" + TypeString(proto[0], typestr) + ")";
   1506       abd = "(" + TypeString('d', utype) + ")" + abd;
   1507       s += Extend(utype, abd) + ";";
   1508     } else {
   1509       s += Extend(typestr, abd) + ";";
   1510     }
   1511     break;
   1512   }
   1513   case OpAba:
   1514     s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
   1515     break;
   1516   case OpAbal: {
   1517     s += "__a + ";
   1518     std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
   1519     if (typestr[0] != 'U') {
   1520       // vabd results are always unsigned and must be zero-extended.
   1521       std::string utype = "U" + typestr.str();
   1522       s += "(" + TypeString(proto[0], typestr) + ")";
   1523       abd = "(" + TypeString('d', utype) + ")" + abd;
   1524       s += Extend(utype, abd) + ";";
   1525     } else {
   1526       s += Extend(typestr, abd) + ";";
   1527     }
   1528     break;
   1529   }
   1530   case OpDiv:
   1531     s += "__a / __b;";
   1532     break;
   1533   default:
   1534     PrintFatalError("unknown OpKind!");
   1535   }
   1536   return s;
   1537 }
   1538 
   1539 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
   1540   unsigned mod = proto[0];
   1541 
   1542   if (mod == 'v' || mod == 'f')
   1543     mod = proto[1];
   1544 
   1545   bool quad = false;
   1546   bool poly = false;
   1547   bool usgn = false;
   1548   bool scal = false;
   1549   bool cnst = false;
   1550   bool pntr = false;
   1551 
   1552   // Base type to get the type string for.
   1553   char type = ClassifyType(typestr, quad, poly, usgn);
   1554 
   1555   // Based on the modifying character, change the type and width if necessary.
   1556   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
   1557 
   1558   NeonTypeFlags::EltType ET;
   1559   switch (type) {
   1560     case 'c':
   1561       ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
   1562       break;
   1563     case 's':
   1564       ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
   1565       break;
   1566     case 'i':
   1567       ET = NeonTypeFlags::Int32;
   1568       break;
   1569     case 'l':
   1570       ET = NeonTypeFlags::Int64;
   1571       break;
   1572     case 'h':
   1573       ET = NeonTypeFlags::Float16;
   1574       break;
   1575     case 'f':
   1576       ET = NeonTypeFlags::Float32;
   1577       break;
   1578     case 'd':
   1579       ET = NeonTypeFlags::Float64;
   1580       break;
   1581     default:
   1582       PrintFatalError("unhandled type!");
   1583   }
   1584   NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
   1585   return Flags.getFlags();
   1586 }
   1587 
   1588 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
   1589 static std::string GenBuiltin(const std::string &name, const std::string &proto,
   1590                               StringRef typestr, ClassKind ck) {
   1591   std::string s;
   1592 
   1593   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
   1594   // sret-like argument.
   1595   bool sret = (proto[0] >= '2' && proto[0] <= '4');
   1596 
   1597   bool define = UseMacro(proto);
   1598 
   1599   // Check if the prototype has a scalar operand with the type of the vector
   1600   // elements.  If not, bitcasting the args will take care of arg checking.
   1601   // The actual signedness etc. will be taken care of with special enums.
   1602   if (proto.find('s') == std::string::npos)
   1603     ck = ClassB;
   1604 
   1605   if (proto[0] != 'v') {
   1606     std::string ts = TypeString(proto[0], typestr);
   1607 
   1608     if (define) {
   1609       if (sret)
   1610         s += ts + " r; ";
   1611       else
   1612         s += "(" + ts + ")";
   1613     } else if (sret) {
   1614       s += ts + " r; ";
   1615     } else {
   1616       s += "return (" + ts + ")";
   1617     }
   1618   }
   1619 
   1620   bool splat = proto.find('a') != std::string::npos;
   1621 
   1622   s += "__builtin_neon_";
   1623   if (splat) {
   1624     // Call the non-splat builtin: chop off the "_n" suffix from the name.
   1625     std::string vname(name, 0, name.size()-2);
   1626     s += MangleName(vname, typestr, ck);
   1627   } else {
   1628     s += MangleName(name, typestr, ck);
   1629   }
   1630   s += "(";
   1631 
   1632   // Pass the address of the return variable as the first argument to sret-like
   1633   // builtins.
   1634   if (sret)
   1635     s += "&r, ";
   1636 
   1637   char arg = 'a';
   1638   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
   1639     std::string args = std::string(&arg, 1);
   1640 
   1641     // Use the local temporaries instead of the macro arguments.
   1642     args = "__" + args;
   1643 
   1644     bool argQuad = false;
   1645     bool argPoly = false;
   1646     bool argUsgn = false;
   1647     bool argScalar = false;
   1648     bool dummy = false;
   1649     char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
   1650     argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
   1651                       dummy, dummy);
   1652 
   1653     // Handle multiple-vector values specially, emitting each subvector as an
   1654     // argument to the __builtin.
   1655     if (proto[i] >= '2' && proto[i] <= '4') {
   1656       // Check if an explicit cast is needed.
   1657       if (argType != 'c' || argPoly || argUsgn)
   1658         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
   1659 
   1660       for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
   1661         s += args + ".val[" + utostr(vi) + "]";
   1662         if ((vi + 1) < ve)
   1663           s += ", ";
   1664       }
   1665       if ((i + 1) < e)
   1666         s += ", ";
   1667 
   1668       continue;
   1669     }
   1670 
   1671     if (splat && (i + 1) == e)
   1672       args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
   1673 
   1674     // Check if an explicit cast is needed.
   1675     if ((splat || !argScalar) &&
   1676         ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
   1677       std::string argTypeStr = "c";
   1678       if (ck != ClassB)
   1679         argTypeStr = argType;
   1680       if (argQuad)
   1681         argTypeStr = "Q" + argTypeStr;
   1682       args = "(" + TypeString('d', argTypeStr) + ")" + args;
   1683     }
   1684 
   1685     s += args;
   1686     if ((i + 1) < e)
   1687       s += ", ";
   1688   }
   1689 
   1690   // Extra constant integer to hold type class enum for this function, e.g. s8
   1691   if (ck == ClassB)
   1692     s += ", " + utostr(GetNeonEnum(proto, typestr));
   1693 
   1694   s += ");";
   1695 
   1696   if (proto[0] != 'v' && sret) {
   1697     if (define)
   1698       s += " r;";
   1699     else
   1700       s += " return r;";
   1701   }
   1702   return s;
   1703 }
   1704 
   1705 static std::string GenBuiltinDef(const std::string &name,
   1706                                  const std::string &proto,
   1707                                  StringRef typestr, ClassKind ck) {
   1708   std::string s("BUILTIN(__builtin_neon_");
   1709 
   1710   // If all types are the same size, bitcasting the args will take care
   1711   // of arg checking.  The actual signedness etc. will be taken care of with
   1712   // special enums.
   1713   if (proto.find('s') == std::string::npos)
   1714     ck = ClassB;
   1715 
   1716   s += MangleName(name, typestr, ck);
   1717   s += ", \"";
   1718 
   1719   for (unsigned i = 0, e = proto.size(); i != e; ++i)
   1720     s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
   1721 
   1722   // Extra constant integer to hold type class enum for this function, e.g. s8
   1723   if (ck == ClassB)
   1724     s += "i";
   1725 
   1726   s += "\", \"n\")";
   1727   return s;
   1728 }
   1729 
   1730 static std::string GenIntrinsic(const std::string &name,
   1731                                 const std::string &proto,
   1732                                 StringRef outTypeStr, StringRef inTypeStr,
   1733                                 OpKind kind, ClassKind classKind) {
   1734   assert(!proto.empty() && "");
   1735   bool define = UseMacro(proto) && kind != OpUnavailable;
   1736   std::string s;
   1737 
   1738   // static always inline + return type
   1739   if (define)
   1740     s += "#define ";
   1741   else
   1742     s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
   1743 
   1744   // Function name with type suffix
   1745   std::string mangledName = MangleName(name, outTypeStr, ClassS);
   1746   if (outTypeStr != inTypeStr) {
   1747     // If the input type is different (e.g., for vreinterpret), append a suffix
   1748     // for the input type.  String off a "Q" (quad) prefix so that MangleName
   1749     // does not insert another "q" in the name.
   1750     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
   1751     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
   1752     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
   1753   }
   1754   s += mangledName;
   1755 
   1756   // Function arguments
   1757   s += GenArgs(proto, inTypeStr);
   1758 
   1759   // Definition.
   1760   if (define) {
   1761     s += " __extension__ ({ \\\n  ";
   1762     s += GenMacroLocals(proto, inTypeStr);
   1763   } else if (kind == OpUnavailable) {
   1764     s += " __attribute__((unavailable));\n";
   1765     return s;
   1766   } else
   1767     s += " {\n  ";
   1768 
   1769   if (kind != OpNone)
   1770     s += GenOpString(kind, proto, outTypeStr);
   1771   else
   1772     s += GenBuiltin(name, proto, outTypeStr, classKind);
   1773   if (define)
   1774     s += " })";
   1775   else
   1776     s += " }";
   1777   s += "\n";
   1778   return s;
   1779 }
   1780 
   1781 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
   1782 /// is comprised of type definitions and function declarations.
   1783 void NeonEmitter::run(raw_ostream &OS) {
   1784   OS <<
   1785     "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
   1786     "---===\n"
   1787     " *\n"
   1788     " * Permission is hereby granted, free of charge, to any person obtaining "
   1789     "a copy\n"
   1790     " * of this software and associated documentation files (the \"Software\"),"
   1791     " to deal\n"
   1792     " * in the Software without restriction, including without limitation the "
   1793     "rights\n"
   1794     " * to use, copy, modify, merge, publish, distribute, sublicense, "
   1795     "and/or sell\n"
   1796     " * copies of the Software, and to permit persons to whom the Software is\n"
   1797     " * furnished to do so, subject to the following conditions:\n"
   1798     " *\n"
   1799     " * The above copyright notice and this permission notice shall be "
   1800     "included in\n"
   1801     " * all copies or substantial portions of the Software.\n"
   1802     " *\n"
   1803     " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
   1804     "EXPRESS OR\n"
   1805     " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
   1806     "MERCHANTABILITY,\n"
   1807     " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
   1808     "SHALL THE\n"
   1809     " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
   1810     "OTHER\n"
   1811     " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
   1812     "ARISING FROM,\n"
   1813     " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
   1814     "DEALINGS IN\n"
   1815     " * THE SOFTWARE.\n"
   1816     " *\n"
   1817     " *===--------------------------------------------------------------------"
   1818     "---===\n"
   1819     " */\n\n";
   1820 
   1821   OS << "#ifndef __ARM_NEON_H\n";
   1822   OS << "#define __ARM_NEON_H\n\n";
   1823 
   1824   OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
   1825   OS << "#error \"NEON support not enabled\"\n";
   1826   OS << "#endif\n\n";
   1827 
   1828   OS << "#include <stdint.h>\n\n";
   1829 
   1830   // Emit NEON-specific scalar typedefs.
   1831   OS << "typedef float float32_t;\n";
   1832   OS << "typedef __fp16 float16_t;\n";
   1833 
   1834   OS << "#ifdef __aarch64__\n";
   1835   OS << "typedef double float64_t;\n";
   1836   OS << "#endif\n\n";
   1837 
   1838   // For now, signedness of polynomial types depends on target
   1839   OS << "#ifdef __aarch64__\n";
   1840   OS << "typedef uint8_t poly8_t;\n";
   1841   OS << "typedef uint16_t poly16_t;\n";
   1842   OS << "#else\n";
   1843   OS << "typedef int8_t poly8_t;\n";
   1844   OS << "typedef int16_t poly16_t;\n";
   1845   OS << "#endif\n";
   1846 
   1847   // Emit Neon vector typedefs.
   1848   std::string TypedefTypes(
   1849       "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfQdPcQPcPsQPs");
   1850   SmallVector<StringRef, 24> TDTypeVec;
   1851   ParseTypes(0, TypedefTypes, TDTypeVec);
   1852 
   1853   // Emit vector typedefs.
   1854   for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
   1855     bool dummy, quad = false, poly = false;
   1856     char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
   1857     bool isA64 = false;
   1858 
   1859     if (type == 'd' && quad)
   1860       isA64 = true;
   1861 
   1862     if (isA64)
   1863       OS << "#ifdef __aarch64__\n";
   1864 
   1865     if (poly)
   1866       OS << "typedef __attribute__((neon_polyvector_type(";
   1867     else
   1868       OS << "typedef __attribute__((neon_vector_type(";
   1869 
   1870     unsigned nElts = GetNumElements(TDTypeVec[i], quad);
   1871     OS << utostr(nElts) << "))) ";
   1872     if (nElts < 10)
   1873       OS << " ";
   1874 
   1875     OS << TypeString('s', TDTypeVec[i]);
   1876     OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
   1877 
   1878     if (isA64)
   1879       OS << "#endif\n";
   1880   }
   1881   OS << "\n";
   1882 
   1883   // Emit struct typedefs.
   1884   for (unsigned vi = 2; vi != 5; ++vi) {
   1885     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
   1886       bool dummy, quad = false, poly = false;
   1887       char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
   1888       bool isA64 = false;
   1889 
   1890       if (type == 'd' && quad)
   1891         isA64 = true;
   1892 
   1893       if (isA64)
   1894         OS << "#ifdef __aarch64__\n";
   1895 
   1896       std::string ts = TypeString('d', TDTypeVec[i]);
   1897       std::string vs = TypeString('0' + vi, TDTypeVec[i]);
   1898       OS << "typedef struct " << vs << " {\n";
   1899       OS << "  " << ts << " val";
   1900       OS << "[" << utostr(vi) << "]";
   1901       OS << ";\n} ";
   1902       OS << vs << ";\n";
   1903 
   1904       if (isA64)
   1905         OS << "#endif\n";
   1906 
   1907       OS << "\n";
   1908     }
   1909   }
   1910 
   1911   OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
   1912 
   1913   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
   1914 
   1915   StringMap<ClassKind> EmittedMap;
   1916 
   1917   // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
   1918   // intrinsics.  (Some of the saturating multiply instructions are also
   1919   // used to implement the corresponding "_lane" variants, but tablegen
   1920   // sorts the records into alphabetical order so that the "_lane" variants
   1921   // come after the intrinsics they use.)
   1922   emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
   1923   emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
   1924   emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
   1925 
   1926   // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
   1927   // common intrinsics appear only once in the output stream.
   1928   // The check for uniquiness is done in emitIntrinsic.
   1929   // Emit ARM intrinsics.
   1930   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   1931     Record *R = RV[i];
   1932 
   1933     // Skip AArch64 intrinsics; they will be emitted at the end.
   1934     bool isA64 = R->getValueAsBit("isA64");
   1935     if (isA64)
   1936       continue;
   1937 
   1938     if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
   1939         R->getName() != "VABD")
   1940       emitIntrinsic(OS, R, EmittedMap);
   1941   }
   1942 
   1943   // Emit AArch64-specific intrinsics.
   1944   OS << "#ifdef __aarch64__\n";
   1945 
   1946   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   1947     Record *R = RV[i];
   1948 
   1949     // Skip ARM intrinsics already included above.
   1950     bool isA64 = R->getValueAsBit("isA64");
   1951     if (!isA64)
   1952       continue;
   1953 
   1954     emitIntrinsic(OS, R, EmittedMap);
   1955   }
   1956 
   1957   OS << "#endif\n\n";
   1958 
   1959   OS << "#undef __ai\n\n";
   1960   OS << "#endif /* __ARM_NEON_H */\n";
   1961 }
   1962 
   1963 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
   1964 /// intrinsics specified by record R checking for intrinsic uniqueness.
   1965 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
   1966                                 StringMap<ClassKind> &EmittedMap) {
   1967   std::string name = R->getValueAsString("Name");
   1968   std::string Proto = R->getValueAsString("Prototype");
   1969   std::string Types = R->getValueAsString("Types");
   1970 
   1971   SmallVector<StringRef, 16> TypeVec;
   1972   ParseTypes(R, Types, TypeVec);
   1973 
   1974   OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
   1975 
   1976   ClassKind classKind = ClassNone;
   1977   if (R->getSuperClasses().size() >= 2)
   1978     classKind = ClassMap[R->getSuperClasses()[1]];
   1979   if (classKind == ClassNone && kind == OpNone)
   1980     PrintFatalError(R->getLoc(), "Builtin has no class kind");
   1981 
   1982   for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   1983     if (kind == OpReinterpret) {
   1984       bool outQuad = false;
   1985       bool dummy = false;
   1986       (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
   1987       for (unsigned srcti = 0, srcte = TypeVec.size();
   1988            srcti != srcte; ++srcti) {
   1989         bool inQuad = false;
   1990         (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
   1991         if (srcti == ti || inQuad != outQuad)
   1992           continue;
   1993         std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
   1994                                      OpCast, ClassS);
   1995         if (EmittedMap.count(s))
   1996           continue;
   1997         EmittedMap[s] = ClassS;
   1998         OS << s;
   1999       }
   2000     } else {
   2001       std::string s =
   2002           GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
   2003       if (EmittedMap.count(s))
   2004         continue;
   2005       EmittedMap[s] = classKind;
   2006       OS << s;
   2007     }
   2008   }
   2009   OS << "\n";
   2010 }
   2011 
   2012 static unsigned RangeFromType(const char mod, StringRef typestr) {
   2013   // base type to get the type string for.
   2014   bool quad = false, dummy = false;
   2015   char type = ClassifyType(typestr, quad, dummy, dummy);
   2016   type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
   2017 
   2018   switch (type) {
   2019     case 'c':
   2020       return (8 << (int)quad) - 1;
   2021     case 'h':
   2022     case 's':
   2023       return (4 << (int)quad) - 1;
   2024     case 'f':
   2025     case 'i':
   2026       return (2 << (int)quad) - 1;
   2027     case 'l':
   2028       return (1 << (int)quad) - 1;
   2029     default:
   2030       PrintFatalError("unhandled type!");
   2031   }
   2032 }
   2033 
   2034 /// Generate the ARM and AArch64 intrinsic range checking code for
   2035 /// shift/lane immediates, checking for unique declarations.
   2036 void
   2037 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
   2038                                         StringMap<ClassKind> &A64IntrinsicMap,
   2039                                         bool isA64RangeCheck) {
   2040   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
   2041   StringMap<OpKind> EmittedMap;
   2042 
   2043   // Generate the intrinsic range checking code for shift/lane immediates.
   2044   if (isA64RangeCheck)
   2045     OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
   2046   else
   2047     OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
   2048 
   2049   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   2050     Record *R = RV[i];
   2051 
   2052     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
   2053     if (k != OpNone)
   2054       continue;
   2055 
   2056     std::string name = R->getValueAsString("Name");
   2057     std::string Proto = R->getValueAsString("Prototype");
   2058     std::string Types = R->getValueAsString("Types");
   2059 
   2060     // Functions with 'a' (the splat code) in the type prototype should not get
   2061     // their own builtin as they use the non-splat variant.
   2062     if (Proto.find('a') != std::string::npos)
   2063       continue;
   2064 
   2065     // Functions which do not have an immediate do not need to have range
   2066     // checking code emitted.
   2067     size_t immPos = Proto.find('i');
   2068     if (immPos == std::string::npos)
   2069       continue;
   2070 
   2071     SmallVector<StringRef, 16> TypeVec;
   2072     ParseTypes(R, Types, TypeVec);
   2073 
   2074     if (R->getSuperClasses().size() < 2)
   2075       PrintFatalError(R->getLoc(), "Builtin has no class kind");
   2076 
   2077     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
   2078 
   2079     // Do not include AArch64 range checks if not generating code for AArch64.
   2080     bool isA64 = R->getValueAsBit("isA64");
   2081     if (!isA64RangeCheck && isA64)
   2082       continue;
   2083 
   2084     // Include ARM range checks in AArch64 but only if ARM intrinsics are not
   2085     // redefined by AArch64 to handle new types.
   2086     if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(name)) {
   2087       ClassKind &A64CK = A64IntrinsicMap[name];
   2088       if (A64CK == ck && ck != ClassNone)
   2089         continue;
   2090     }
   2091 
   2092     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   2093       std::string namestr, shiftstr, rangestr;
   2094 
   2095       if (R->getValueAsBit("isVCVT_N")) {
   2096         // VCVT between floating- and fixed-point values takes an immediate
   2097         // in the range 1 to 32.
   2098         ck = ClassB;
   2099         rangestr = "l = 1; u = 31"; // upper bound = l + u
   2100       } else if (Proto.find('s') == std::string::npos) {
   2101         // Builtins which are overloaded by type will need to have their upper
   2102         // bound computed at Sema time based on the type constant.
   2103         ck = ClassB;
   2104         if (R->getValueAsBit("isShift")) {
   2105           shiftstr = ", true";
   2106 
   2107           // Right shifts have an 'r' in the name, left shifts do not.
   2108           if (name.find('r') != std::string::npos)
   2109             rangestr = "l = 1; ";
   2110         }
   2111         rangestr += "u = RFT(TV" + shiftstr + ")";
   2112       } else {
   2113         // The immediate generally refers to a lane in the preceding argument.
   2114         assert(immPos > 0 && "unexpected immediate operand");
   2115         rangestr =
   2116             "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
   2117       }
   2118       // Make sure cases appear only once by uniquing them in a string map.
   2119       namestr = MangleName(name, TypeVec[ti], ck);
   2120       if (EmittedMap.count(namestr))
   2121         continue;
   2122       EmittedMap[namestr] = OpNone;
   2123 
   2124       // Calculate the index of the immediate that should be range checked.
   2125       unsigned immidx = 0;
   2126 
   2127       // Builtins that return a struct of multiple vectors have an extra
   2128       // leading arg for the struct return.
   2129       if (Proto[0] >= '2' && Proto[0] <= '4')
   2130         ++immidx;
   2131 
   2132       // Add one to the index for each argument until we reach the immediate
   2133       // to be checked.  Structs of vectors are passed as multiple arguments.
   2134       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
   2135         switch (Proto[ii]) {
   2136         default:
   2137           immidx += 1;
   2138           break;
   2139         case '2':
   2140           immidx += 2;
   2141           break;
   2142         case '3':
   2143           immidx += 3;
   2144           break;
   2145         case '4':
   2146           immidx += 4;
   2147           break;
   2148         case 'i':
   2149           ie = ii + 1;
   2150           break;
   2151         }
   2152       }
   2153       if (isA64RangeCheck)
   2154         OS << "case AArch64::BI__builtin_neon_";
   2155       else
   2156         OS << "case ARM::BI__builtin_neon_";
   2157       OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
   2158          << rangestr << "; break;\n";
   2159     }
   2160   }
   2161   OS << "#endif\n\n";
   2162 }
   2163 
   2164 /// Generate the ARM and AArch64 overloaded type checking code for
   2165 /// SemaChecking.cpp, checking for unique builtin declarations.
   2166 void
   2167 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
   2168                                       StringMap<ClassKind> &A64IntrinsicMap,
   2169                                       bool isA64TypeCheck) {
   2170   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
   2171   StringMap<OpKind> EmittedMap;
   2172 
   2173   // Generate the overloaded type checking code for SemaChecking.cpp
   2174   if (isA64TypeCheck)
   2175     OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
   2176   else
   2177     OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
   2178 
   2179   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   2180     Record *R = RV[i];
   2181     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
   2182     if (k != OpNone)
   2183       continue;
   2184 
   2185     std::string Proto = R->getValueAsString("Prototype");
   2186     std::string Types = R->getValueAsString("Types");
   2187     std::string name = R->getValueAsString("Name");
   2188 
   2189     // Functions with 'a' (the splat code) in the type prototype should not get
   2190     // their own builtin as they use the non-splat variant.
   2191     if (Proto.find('a') != std::string::npos)
   2192       continue;
   2193 
   2194     // Functions which have a scalar argument cannot be overloaded, no need to
   2195     // check them if we are emitting the type checking code.
   2196     if (Proto.find('s') != std::string::npos)
   2197       continue;
   2198 
   2199     SmallVector<StringRef, 16> TypeVec;
   2200     ParseTypes(R, Types, TypeVec);
   2201 
   2202     if (R->getSuperClasses().size() < 2)
   2203       PrintFatalError(R->getLoc(), "Builtin has no class kind");
   2204 
   2205     // Do not include AArch64 type checks if not generating code for AArch64.
   2206     bool isA64 = R->getValueAsBit("isA64");
   2207     if (!isA64TypeCheck && isA64)
   2208       continue;
   2209 
   2210     // Include ARM  type check in AArch64 but only if ARM intrinsics
   2211     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
   2212     // redefined in AArch64 to handle an additional 2 x f64 type.
   2213     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
   2214     if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(name)) {
   2215       ClassKind &A64CK = A64IntrinsicMap[name];
   2216       if (A64CK == ck && ck != ClassNone)
   2217         continue;
   2218     }
   2219 
   2220     int si = -1, qi = -1;
   2221     uint64_t mask = 0, qmask = 0;
   2222     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   2223       // Generate the switch case(s) for this builtin for the type validation.
   2224       bool quad = false, poly = false, usgn = false;
   2225       (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
   2226 
   2227       if (quad) {
   2228         qi = ti;
   2229         qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
   2230       } else {
   2231         si = ti;
   2232         mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
   2233       }
   2234     }
   2235 
   2236     // Check if the builtin function has a pointer or const pointer argument.
   2237     int PtrArgNum = -1;
   2238     bool HasConstPtr = false;
   2239     for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
   2240       char ArgType = Proto[arg];
   2241       if (ArgType == 'c') {
   2242         HasConstPtr = true;
   2243         PtrArgNum = arg - 1;
   2244         break;
   2245       }
   2246       if (ArgType == 'p') {
   2247         PtrArgNum = arg - 1;
   2248         break;
   2249       }
   2250     }
   2251     // For sret builtins, adjust the pointer argument index.
   2252     if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
   2253       PtrArgNum += 1;
   2254 
   2255     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
   2256     // and vst1_lane intrinsics.  Using a pointer to the vector element
   2257     // type with one of those operations causes codegen to select an aligned
   2258     // load/store instruction.  If you want an unaligned operation,
   2259     // the pointer argument needs to have less alignment than element type,
   2260     // so just accept any pointer type.
   2261     if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
   2262       PtrArgNum = -1;
   2263       HasConstPtr = false;
   2264     }
   2265 
   2266     if (mask) {
   2267       if (isA64TypeCheck)
   2268         OS << "case AArch64::BI__builtin_neon_";
   2269       else
   2270         OS << "case ARM::BI__builtin_neon_";
   2271       OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
   2272          << "0x" << utohexstr(mask) << "ULL";
   2273       if (PtrArgNum >= 0)
   2274         OS << "; PtrArgNum = " << PtrArgNum;
   2275       if (HasConstPtr)
   2276         OS << "; HasConstPtr = true";
   2277       OS << "; break;\n";
   2278     }
   2279     if (qmask) {
   2280       if (isA64TypeCheck)
   2281         OS << "case AArch64::BI__builtin_neon_";
   2282       else
   2283         OS << "case ARM::BI__builtin_neon_";
   2284       OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
   2285          << "0x" << utohexstr(qmask) << "ULL";
   2286       if (PtrArgNum >= 0)
   2287         OS << "; PtrArgNum = " << PtrArgNum;
   2288       if (HasConstPtr)
   2289         OS << "; HasConstPtr = true";
   2290       OS << "; break;\n";
   2291     }
   2292   }
   2293   OS << "#endif\n\n";
   2294 }
   2295 
   2296 /// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
   2297 /// declaration of builtins, checking for unique builtin declarations.
   2298 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
   2299                                  StringMap<ClassKind> &A64IntrinsicMap,
   2300                                  bool isA64GenBuiltinDef) {
   2301   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
   2302   StringMap<OpKind> EmittedMap;
   2303 
   2304   // Generate BuiltinsARM.def and BuiltinsAArch64.def
   2305   if (isA64GenBuiltinDef)
   2306     OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
   2307   else
   2308     OS << "#ifdef GET_NEON_BUILTINS\n";
   2309 
   2310   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   2311     Record *R = RV[i];
   2312     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
   2313     if (k != OpNone)
   2314       continue;
   2315 
   2316     std::string Proto = R->getValueAsString("Prototype");
   2317     std::string name = R->getValueAsString("Name");
   2318 
   2319     // Functions with 'a' (the splat code) in the type prototype should not get
   2320     // their own builtin as they use the non-splat variant.
   2321     if (Proto.find('a') != std::string::npos)
   2322       continue;
   2323 
   2324     std::string Types = R->getValueAsString("Types");
   2325     SmallVector<StringRef, 16> TypeVec;
   2326     ParseTypes(R, Types, TypeVec);
   2327 
   2328     if (R->getSuperClasses().size() < 2)
   2329       PrintFatalError(R->getLoc(), "Builtin has no class kind");
   2330 
   2331     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
   2332 
   2333     // Do not include AArch64 BUILTIN() macros if not generating
   2334     // code for AArch64
   2335     bool isA64 = R->getValueAsBit("isA64");
   2336     if (!isA64GenBuiltinDef && isA64)
   2337       continue;
   2338 
   2339     // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
   2340     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
   2341     // redefined in AArch64 to handle an additional 2 x f64 type.
   2342     if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(name)) {
   2343       ClassKind &A64CK = A64IntrinsicMap[name];
   2344       if (A64CK == ck && ck != ClassNone)
   2345         continue;
   2346     }
   2347 
   2348     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   2349       // Generate the declaration for this builtin, ensuring
   2350       // that each unique BUILTIN() macro appears only once in the output
   2351       // stream.
   2352       std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
   2353       if (EmittedMap.count(bd))
   2354         continue;
   2355 
   2356       EmittedMap[bd] = OpNone;
   2357       OS << bd << "\n";
   2358     }
   2359   }
   2360   OS << "#endif\n\n";
   2361 }
   2362 
   2363 /// runHeader - Emit a file with sections defining:
   2364 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
   2365 /// 2. the SemaChecking code for the type overload checking.
   2366 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
   2367 void NeonEmitter::runHeader(raw_ostream &OS) {
   2368   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
   2369 
   2370   // build a map of AArch64 intriniscs to be used in uniqueness checks.
   2371   StringMap<ClassKind> A64IntrinsicMap;
   2372   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   2373     Record *R = RV[i];
   2374 
   2375     bool isA64 = R->getValueAsBit("isA64");
   2376     if (!isA64)
   2377       continue;
   2378 
   2379     ClassKind CK = ClassNone;
   2380     if (R->getSuperClasses().size() >= 2)
   2381       CK = ClassMap[R->getSuperClasses()[1]];
   2382 
   2383     std::string Name = R->getValueAsString("Name");
   2384     if (A64IntrinsicMap.count(Name))
   2385       continue;
   2386     A64IntrinsicMap[Name] = CK;
   2387   }
   2388 
   2389   // Generate BuiltinsARM.def for ARM
   2390   genBuiltinsDef(OS, A64IntrinsicMap, false);
   2391 
   2392   // Generate BuiltinsAArch64.def for AArch64
   2393   genBuiltinsDef(OS, A64IntrinsicMap, true);
   2394 
   2395   // Generate ARM overloaded type checking code for SemaChecking.cpp
   2396   genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
   2397 
   2398   // Generate AArch64 overloaded type checking code for SemaChecking.cpp
   2399   genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
   2400 
   2401   // Generate ARM range checking code for shift/lane immediates.
   2402   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
   2403 
   2404   // Generate the AArch64 range checking code for shift/lane immediates.
   2405   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
   2406 }
   2407 
   2408 /// GenTest - Write out a test for the intrinsic specified by the name and
   2409 /// type strings, including the embedded patterns for FileCheck to match.
   2410 static std::string GenTest(const std::string &name,
   2411                            const std::string &proto,
   2412                            StringRef outTypeStr, StringRef inTypeStr,
   2413                            bool isShift, bool isHiddenLOp,
   2414                            ClassKind ck, const std::string &InstName,
   2415 						   bool isA64,
   2416 						   std::string & testFuncProto) {
   2417   assert(!proto.empty() && "");
   2418   std::string s;
   2419 
   2420   // Function name with type suffix
   2421   std::string mangledName = MangleName(name, outTypeStr, ClassS);
   2422   if (outTypeStr != inTypeStr) {
   2423     // If the input type is different (e.g., for vreinterpret), append a suffix
   2424     // for the input type.  String off a "Q" (quad) prefix so that MangleName
   2425     // does not insert another "q" in the name.
   2426     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
   2427     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
   2428     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
   2429   }
   2430 
   2431   // todo: GenerateChecksForIntrinsic does not generate CHECK
   2432   // for aarch64 instructions yet
   2433   std::vector<std::string> FileCheckPatterns;
   2434   if (!isA64) {
   2435 	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
   2436 							   isHiddenLOp, FileCheckPatterns);
   2437 	s+= "// CHECK_ARM: test_" + mangledName + "\n";
   2438   }
   2439   s += "// CHECK_AARCH64: test_" + mangledName + "\n";
   2440 
   2441   // Emit the FileCheck patterns.
   2442   // If for any reason we do not want to emit a check, mangledInst
   2443   // will be the empty string.
   2444   if (FileCheckPatterns.size()) {
   2445     for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
   2446                                                   e = FileCheckPatterns.end();
   2447          i != e;
   2448          ++i) {
   2449       s += "// CHECK_ARM: " + *i + "\n";
   2450     }
   2451   }
   2452 
   2453   // Emit the start of the test function.
   2454 
   2455   testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
   2456   char arg = 'a';
   2457   std::string comma;
   2458   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
   2459     // Do not create arguments for values that must be immediate constants.
   2460     if (proto[i] == 'i')
   2461       continue;
   2462     testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
   2463     testFuncProto.push_back(arg);
   2464     comma = ", ";
   2465   }
   2466   testFuncProto += ")";
   2467 
   2468   s+= testFuncProto;
   2469   s+= " {\n  ";
   2470 
   2471   if (proto[0] != 'v')
   2472     s += "return ";
   2473   s += mangledName + "(";
   2474   arg = 'a';
   2475   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
   2476     if (proto[i] == 'i') {
   2477       // For immediate operands, test the maximum value.
   2478       if (isShift)
   2479         s += "1"; // FIXME
   2480       else
   2481         // The immediate generally refers to a lane in the preceding argument.
   2482         s += utostr(RangeFromType(proto[i-1], inTypeStr));
   2483     } else {
   2484       s.push_back(arg);
   2485     }
   2486     if ((i + 1) < e)
   2487       s += ", ";
   2488   }
   2489   s += ");\n}\n\n";
   2490   return s;
   2491 }
   2492 
   2493 /// Write out all intrinsic tests for the specified target, checking
   2494 /// for intrinsic test uniqueness.
   2495 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
   2496                                 bool isA64GenTest) {
   2497   if (isA64GenTest)
   2498 	OS << "#ifdef __aarch64__\n";
   2499 
   2500   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
   2501   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
   2502     Record *R = RV[i];
   2503     std::string name = R->getValueAsString("Name");
   2504     std::string Proto = R->getValueAsString("Prototype");
   2505     std::string Types = R->getValueAsString("Types");
   2506     bool isShift = R->getValueAsBit("isShift");
   2507     std::string InstName = R->getValueAsString("InstName");
   2508     bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
   2509     bool isA64 = R->getValueAsBit("isA64");
   2510 
   2511     // do not include AArch64 intrinsic test if not generating
   2512     // code for AArch64
   2513     if (!isA64GenTest && isA64)
   2514       continue;
   2515 
   2516     SmallVector<StringRef, 16> TypeVec;
   2517     ParseTypes(R, Types, TypeVec);
   2518 
   2519     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
   2520     OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
   2521     if (kind == OpUnavailable)
   2522       continue;
   2523     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
   2524       if (kind == OpReinterpret) {
   2525         bool outQuad = false;
   2526         bool dummy = false;
   2527         (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
   2528         for (unsigned srcti = 0, srcte = TypeVec.size();
   2529              srcti != srcte; ++srcti) {
   2530           bool inQuad = false;
   2531           (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
   2532           if (srcti == ti || inQuad != outQuad)
   2533             continue;
   2534 		  std::string testFuncProto;
   2535           std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
   2536                                   isShift, isHiddenLOp, ck, InstName, isA64,
   2537 								  testFuncProto);
   2538           if (EmittedMap.count(testFuncProto))
   2539             continue;
   2540           EmittedMap[testFuncProto] = kind;
   2541           OS << s << "\n";
   2542         }
   2543       } else {
   2544 		std::string testFuncProto;
   2545         std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
   2546                                 isHiddenLOp, ck, InstName, isA64, testFuncProto);
   2547         if (EmittedMap.count(testFuncProto))
   2548           continue;
   2549         EmittedMap[testFuncProto] = kind;
   2550         OS << s << "\n";
   2551       }
   2552     }
   2553   }
   2554 
   2555   if (isA64GenTest)
   2556 	OS << "#endif\n";
   2557 }
   2558 /// runTests - Write out a complete set of tests for all of the Neon
   2559 /// intrinsics.
   2560 void NeonEmitter::runTests(raw_ostream &OS) {
   2561   OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
   2562         "apcs-gnu\\\n"
   2563         "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
   2564         "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
   2565 		"\n"
   2566 	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
   2567 	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
   2568 	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
   2569         "\n"
   2570         "// REQUIRES: long_tests\n"
   2571         "\n"
   2572         "#include <arm_neon.h>\n"
   2573         "\n";
   2574 
   2575   // ARM tests must be emitted before AArch64 tests to ensure
   2576   // tests for intrinsics that are common to ARM and AArch64
   2577   // appear only once in the output stream.
   2578   // The check for uniqueness is done in genTargetTest.
   2579   StringMap<OpKind> EmittedMap;
   2580 
   2581   genTargetTest(OS, EmittedMap, false);
   2582 
   2583   genTargetTest(OS, EmittedMap, true);
   2584 }
   2585 
   2586 namespace clang {
   2587 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
   2588   NeonEmitter(Records).run(OS);
   2589 }
   2590 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
   2591   NeonEmitter(Records).runHeader(OS);
   2592 }
   2593 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
   2594   NeonEmitter(Records).runTests(OS);
   2595 }
   2596 } // End namespace clang
   2597