Home | History | Annotate | Download | only in TableGen
      1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
     11 // a declaration and definition of each function specified by the ARM NEON
     12 // compiler interface.  See ARM document DUI0348B.
     13 //
     14 // Each NEON instruction is implemented in terms of 1 or more functions which
     15 // are suffixed with the element type of the input vectors.  Functions may be
     16 // implemented in terms of generic vector operations such as +, *, -, etc. or
     17 // by calling a __builtin_-prefixed function which will be handled by clang's
     18 // CodeGen library.
     19 //
     20 // Additional validation code can be generated by this file when runHeader() is
     21 // called, rather than the normal run() entry point.
     22 //
     23 // See also the documentation in include/clang/Basic/arm_neon.td.
     24 //
     25 //===----------------------------------------------------------------------===//
     26 
     27 #include "llvm/ADT/DenseMap.h"
     28 #include "llvm/ADT/SmallString.h"
     29 #include "llvm/ADT/SmallVector.h"
     30 #include "llvm/ADT/StringExtras.h"
     31 #include "llvm/ADT/StringMap.h"
     32 #include "llvm/Support/ErrorHandling.h"
     33 #include "llvm/TableGen/Error.h"
     34 #include "llvm/TableGen/Record.h"
     35 #include "llvm/TableGen/SetTheory.h"
     36 #include "llvm/TableGen/TableGenBackend.h"
     37 #include <string>
     38 #include <sstream>
     39 #include <vector>
     40 #include <map>
     41 #include <algorithm>
     42 using namespace llvm;
     43 
     44 namespace {
     45 
     46 // While globals are generally bad, this one allows us to perform assertions
     47 // liberally and somehow still trace them back to the def they indirectly
     48 // came from.
     49 static Record *CurrentRecord = nullptr;
     50 static void assert_with_loc(bool Assertion, const std::string &Str) {
     51   if (!Assertion) {
     52     if (CurrentRecord)
     53       PrintFatalError(CurrentRecord->getLoc(), Str);
     54     else
     55       PrintFatalError(Str);
     56   }
     57 }
     58 
     59 enum ClassKind {
     60   ClassNone,
     61   ClassI,     // generic integer instruction, e.g., "i8" suffix
     62   ClassS,     // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
     63   ClassW,     // width-specific instruction, e.g., "8" suffix
     64   ClassB,     // bitcast arguments with enum argument to specify type
     65   ClassL,     // Logical instructions which are op instructions
     66               // but we need to not emit any suffix for in our
     67               // tests.
     68   ClassNoTest // Instructions which we do not test since they are
     69               // not TRUE instructions.
     70 };
     71 
     72 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
     73 /// builtins.  These must be kept in sync with the flags in
     74 /// include/clang/Basic/TargetBuiltins.h.
     75 namespace NeonTypeFlags {
     76 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
     77 
     78 enum EltType {
     79   Int8,
     80   Int16,
     81   Int32,
     82   Int64,
     83   Poly8,
     84   Poly16,
     85   Poly64,
     86   Poly128,
     87   Float16,
     88   Float32,
     89   Float64
     90 };
     91 }
     92 
     93 class Intrinsic;
     94 class NeonEmitter;
     95 class Type;
     96 class Variable;
     97 
     98 //===----------------------------------------------------------------------===//
     99 // TypeSpec
    100 //===----------------------------------------------------------------------===//
    101 
    102 /// A TypeSpec is just a simple wrapper around a string, but gets its own type
    103 /// for strong typing purposes.
    104 ///
    105 /// A TypeSpec can be used to create a type.
    106 class TypeSpec : public std::string {
    107 public:
    108   static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
    109     std::vector<TypeSpec> Ret;
    110     TypeSpec Acc;
    111     for (char I : Str.str()) {
    112       if (islower(I)) {
    113         Acc.push_back(I);
    114         Ret.push_back(TypeSpec(Acc));
    115         Acc.clear();
    116       } else {
    117         Acc.push_back(I);
    118       }
    119     }
    120     return Ret;
    121   }
    122 };
    123 
    124 //===----------------------------------------------------------------------===//
    125 // Type
    126 //===----------------------------------------------------------------------===//
    127 
    128 /// A Type. Not much more to say here.
    129 class Type {
    130 private:
    131   TypeSpec TS;
    132 
    133   bool Float, Signed, Void, Poly, Constant, Pointer;
    134   // ScalarForMangling and NoManglingQ are really not suited to live here as
    135   // they are not related to the type. But they live in the TypeSpec (not the
    136   // prototype), so this is really the only place to store them.
    137   bool ScalarForMangling, NoManglingQ;
    138   unsigned Bitwidth, ElementBitwidth, NumVectors;
    139 
    140 public:
    141   Type()
    142       : Float(false), Signed(false), Void(true), Poly(false), Constant(false),
    143         Pointer(false), ScalarForMangling(false), NoManglingQ(false),
    144         Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
    145 
    146   Type(TypeSpec TS, char CharMod)
    147       : TS(TS), Float(false), Signed(false), Void(false), Poly(false),
    148         Constant(false), Pointer(false), ScalarForMangling(false),
    149         NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
    150     applyModifier(CharMod);
    151   }
    152 
    153   /// Returns a type representing "void".
    154   static Type getVoid() { return Type(); }
    155 
    156   bool operator==(const Type &Other) const { return str() == Other.str(); }
    157   bool operator!=(const Type &Other) const { return !operator==(Other); }
    158 
    159   //
    160   // Query functions
    161   //
    162   bool isScalarForMangling() const { return ScalarForMangling; }
    163   bool noManglingQ() const { return NoManglingQ; }
    164 
    165   bool isPointer() const { return Pointer; }
    166   bool isFloating() const { return Float; }
    167   bool isInteger() const { return !Float && !Poly; }
    168   bool isSigned() const { return Signed; }
    169   bool isScalar() const { return NumVectors == 0; }
    170   bool isVector() const { return NumVectors > 0; }
    171   bool isFloat() const { return Float && ElementBitwidth == 32; }
    172   bool isDouble() const { return Float && ElementBitwidth == 64; }
    173   bool isHalf() const { return Float && ElementBitwidth == 16; }
    174   bool isPoly() const { return Poly; }
    175   bool isChar() const { return ElementBitwidth == 8; }
    176   bool isShort() const { return !Float && ElementBitwidth == 16; }
    177   bool isInt() const { return !Float && ElementBitwidth == 32; }
    178   bool isLong() const { return !Float && ElementBitwidth == 64; }
    179   bool isVoid() const { return Void; }
    180   unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
    181   unsigned getSizeInBits() const { return Bitwidth; }
    182   unsigned getElementSizeInBits() const { return ElementBitwidth; }
    183   unsigned getNumVectors() const { return NumVectors; }
    184 
    185   //
    186   // Mutator functions
    187   //
    188   void makeUnsigned() { Signed = false; }
    189   void makeSigned() { Signed = true; }
    190   void makeInteger(unsigned ElemWidth, bool Sign) {
    191     Float = false;
    192     Poly = false;
    193     Signed = Sign;
    194     ElementBitwidth = ElemWidth;
    195   }
    196   void makeScalar() {
    197     Bitwidth = ElementBitwidth;
    198     NumVectors = 0;
    199   }
    200   void makeOneVector() {
    201     assert(isVector());
    202     NumVectors = 1;
    203   }
    204   void doubleLanes() {
    205     assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
    206     Bitwidth = 128;
    207   }
    208   void halveLanes() {
    209     assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
    210     Bitwidth = 64;
    211   }
    212 
    213   /// Return the C string representation of a type, which is the typename
    214   /// defined in stdint.h or arm_neon.h.
    215   std::string str() const;
    216 
    217   /// Return the string representation of a type, which is an encoded
    218   /// string for passing to the BUILTIN() macro in Builtins.def.
    219   std::string builtin_str() const;
    220 
    221   /// Return the value in NeonTypeFlags for this type.
    222   unsigned getNeonEnum() const;
    223 
    224   /// Parse a type from a stdint.h or arm_neon.h typedef name,
    225   /// for example uint32x2_t or int64_t.
    226   static Type fromTypedefName(StringRef Name);
    227 
    228 private:
    229   /// Creates the type based on the typespec string in TS.
    230   /// Sets "Quad" to true if the "Q" or "H" modifiers were
    231   /// seen. This is needed by applyModifier as some modifiers
    232   /// only take effect if the type size was changed by "Q" or "H".
    233   void applyTypespec(bool &Quad);
    234   /// Applies a prototype modifier to the type.
    235   void applyModifier(char Mod);
    236 };
    237 
    238 //===----------------------------------------------------------------------===//
    239 // Variable
    240 //===----------------------------------------------------------------------===//
    241 
    242 /// A variable is a simple class that just has a type and a name.
    243 class Variable {
    244   Type T;
    245   std::string N;
    246 
    247 public:
    248   Variable() : T(Type::getVoid()), N("") {}
    249   Variable(Type T, std::string N) : T(T), N(N) {}
    250 
    251   Type getType() const { return T; }
    252   std::string getName() const { return "__" + N; }
    253 };
    254 
    255 //===----------------------------------------------------------------------===//
    256 // Intrinsic
    257 //===----------------------------------------------------------------------===//
    258 
    259 /// The main grunt class. This represents an instantiation of an intrinsic with
    260 /// a particular typespec and prototype.
    261 class Intrinsic {
    262   friend class DagEmitter;
    263 
    264   /// The Record this intrinsic was created from.
    265   Record *R;
    266   /// The unmangled name and prototype.
    267   std::string Name, Proto;
    268   /// The input and output typespecs. InTS == OutTS except when
    269   /// CartesianProductOfTypes is 1 - this is the case for vreinterpret.
    270   TypeSpec OutTS, InTS;
    271   /// The base class kind. Most intrinsics use ClassS, which has full type
    272   /// info for integers (s32/u32). Some use ClassI, which doesn't care about
    273   /// signedness (i32), while some (ClassB) have no type at all, only a width
    274   /// (32).
    275   ClassKind CK;
    276   /// The list of DAGs for the body. May be empty, in which case we should
    277   /// emit a builtin call.
    278   ListInit *Body;
    279   /// The architectural #ifdef guard.
    280   std::string Guard;
    281   /// Set if the Unvailable bit is 1. This means we don't generate a body,
    282   /// just an "unavailable" attribute on a declaration.
    283   bool IsUnavailable;
    284   /// Is this intrinsic safe for big-endian? or does it need its arguments
    285   /// reversing?
    286   bool BigEndianSafe;
    287 
    288   /// The types of return value [0] and parameters [1..].
    289   std::vector<Type> Types;
    290   /// The local variables defined.
    291   std::map<std::string, Variable> Variables;
    292   /// NeededEarly - set if any other intrinsic depends on this intrinsic.
    293   bool NeededEarly;
    294   /// UseMacro - set if we should implement using a macro or unset for a
    295   ///            function.
    296   bool UseMacro;
    297   /// The set of intrinsics that this intrinsic uses/requires.
    298   std::set<Intrinsic *> Dependencies;
    299   /// The "base type", which is Type('d', OutTS). InBaseType is only
    300   /// different if CartesianProductOfTypes = 1 (for vreinterpret).
    301   Type BaseType, InBaseType;
    302   /// The return variable.
    303   Variable RetVar;
    304   /// A postfix to apply to every variable. Defaults to "".
    305   std::string VariablePostfix;
    306 
    307   NeonEmitter &Emitter;
    308   std::stringstream OS;
    309 
    310 public:
    311   Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
    312             TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
    313             StringRef Guard, bool IsUnavailable, bool BigEndianSafe)
    314       : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS),
    315         CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable),
    316         BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false),
    317         BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) {
    318     // If this builtin takes an immediate argument, we need to #define it rather
    319     // than use a standard declaration, so that SemaChecking can range check
    320     // the immediate passed by the user.
    321     if (Proto.find('i') != std::string::npos)
    322       UseMacro = true;
    323 
    324     // Pointer arguments need to use macros to avoid hiding aligned attributes
    325     // from the pointer type.
    326     if (Proto.find('p') != std::string::npos ||
    327         Proto.find('c') != std::string::npos)
    328       UseMacro = true;
    329 
    330     // It is not permitted to pass or return an __fp16 by value, so intrinsics
    331     // taking a scalar float16_t must be implemented as macros.
    332     if (OutTS.find('h') != std::string::npos &&
    333         Proto.find('s') != std::string::npos)
    334       UseMacro = true;
    335 
    336     // Modify the TypeSpec per-argument to get a concrete Type, and create
    337     // known variables for each.
    338     // Types[0] is the return value.
    339     Types.push_back(Type(OutTS, Proto[0]));
    340     for (unsigned I = 1; I < Proto.size(); ++I)
    341       Types.push_back(Type(InTS, Proto[I]));
    342   }
    343 
    344   /// Get the Record that this intrinsic is based off.
    345   Record *getRecord() const { return R; }
    346   /// Get the set of Intrinsics that this intrinsic calls.
    347   /// this is the set of immediate dependencies, NOT the
    348   /// transitive closure.
    349   const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
    350   /// Get the architectural guard string (#ifdef).
    351   std::string getGuard() const { return Guard; }
    352   /// Get the non-mangled name.
    353   std::string getName() const { return Name; }
    354 
    355   /// Return true if the intrinsic takes an immediate operand.
    356   bool hasImmediate() const {
    357     return Proto.find('i') != std::string::npos;
    358   }
    359   /// Return the parameter index of the immediate operand.
    360   unsigned getImmediateIdx() const {
    361     assert(hasImmediate());
    362     unsigned Idx = Proto.find('i');
    363     assert(Idx > 0 && "Can't return an immediate!");
    364     return Idx - 1;
    365   }
    366 
    367   /// Return true if the intrinsic takes an splat operand.
    368   bool hasSplat() const { return Proto.find('a') != std::string::npos; }
    369   /// Return the parameter index of the splat operand.
    370   unsigned getSplatIdx() const {
    371     assert(hasSplat());
    372     unsigned Idx = Proto.find('a');
    373     assert(Idx > 0 && "Can't return a splat!");
    374     return Idx - 1;
    375   }
    376 
    377   unsigned getNumParams() const { return Proto.size() - 1; }
    378   Type getReturnType() const { return Types[0]; }
    379   Type getParamType(unsigned I) const { return Types[I + 1]; }
    380   Type getBaseType() const { return BaseType; }
    381   /// Return the raw prototype string.
    382   std::string getProto() const { return Proto; }
    383 
    384   /// Return true if the prototype has a scalar argument.
    385   /// This does not return true for the "splat" code ('a').
    386   bool protoHasScalar();
    387 
    388   /// Return the index that parameter PIndex will sit at
    389   /// in a generated function call. This is often just PIndex,
    390   /// but may not be as things such as multiple-vector operands
    391   /// and sret parameters need to be taken into accont.
    392   unsigned getGeneratedParamIdx(unsigned PIndex) {
    393     unsigned Idx = 0;
    394     if (getReturnType().getNumVectors() > 1)
    395       // Multiple vectors are passed as sret.
    396       ++Idx;
    397 
    398     for (unsigned I = 0; I < PIndex; ++I)
    399       Idx += std::max(1U, getParamType(I).getNumVectors());
    400 
    401     return Idx;
    402   }
    403 
    404   bool hasBody() const { return Body && Body->getValues().size() > 0; }
    405 
    406   void setNeededEarly() { NeededEarly = true; }
    407 
    408   bool operator<(const Intrinsic &Other) const {
    409     // Sort lexicographically on a two-tuple (Guard, Name)
    410     if (Guard != Other.Guard)
    411       return Guard < Other.Guard;
    412     return Name < Other.Name;
    413   }
    414 
    415   ClassKind getClassKind(bool UseClassBIfScalar = false) {
    416     if (UseClassBIfScalar && !protoHasScalar())
    417       return ClassB;
    418     return CK;
    419   }
    420 
    421   /// Return the name, mangled with type information.
    422   /// If ForceClassS is true, use ClassS (u32/s32) instead
    423   /// of the intrinsic's own type class.
    424   std::string getMangledName(bool ForceClassS = false);
    425   /// Return the type code for a builtin function call.
    426   std::string getInstTypeCode(Type T, ClassKind CK);
    427   /// Return the type string for a BUILTIN() macro in Builtins.def.
    428   std::string getBuiltinTypeStr();
    429 
    430   /// Generate the intrinsic, returning code.
    431   std::string generate();
    432   /// Perform type checking and populate the dependency graph, but
    433   /// don't generate code yet.
    434   void indexBody();
    435 
    436 private:
    437   std::string mangleName(std::string Name, ClassKind CK);
    438 
    439   void initVariables();
    440   std::string replaceParamsIn(std::string S);
    441 
    442   void emitBodyAsBuiltinCall();
    443 
    444   void generateImpl(bool ReverseArguments,
    445                     StringRef NamePrefix, StringRef CallPrefix);
    446   void emitReturn();
    447   void emitBody(StringRef CallPrefix);
    448   void emitShadowedArgs();
    449   void emitArgumentReversal();
    450   void emitReturnReversal();
    451   void emitReverseVariable(Variable &Dest, Variable &Src);
    452   void emitNewLine();
    453   void emitClosingBrace();
    454   void emitOpeningBrace();
    455   void emitPrototype(StringRef NamePrefix);
    456 
    457   class DagEmitter {
    458     Intrinsic &Intr;
    459     StringRef CallPrefix;
    460 
    461   public:
    462     DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
    463       Intr(Intr), CallPrefix(CallPrefix) {
    464     }
    465     std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName);
    466     std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI);
    467     std::pair<Type, std::string> emitDagSplat(DagInit *DI);
    468     std::pair<Type, std::string> emitDagDup(DagInit *DI);
    469     std::pair<Type, std::string> emitDagShuffle(DagInit *DI);
    470     std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);
    471     std::pair<Type, std::string> emitDagCall(DagInit *DI);
    472     std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);
    473     std::pair<Type, std::string> emitDagLiteral(DagInit *DI);
    474     std::pair<Type, std::string> emitDagOp(DagInit *DI);
    475     std::pair<Type, std::string> emitDag(DagInit *DI);
    476   };
    477 
    478 };
    479 
    480 //===----------------------------------------------------------------------===//
    481 // NeonEmitter
    482 //===----------------------------------------------------------------------===//
    483 
    484 class NeonEmitter {
    485   RecordKeeper &Records;
    486   DenseMap<Record *, ClassKind> ClassMap;
    487   std::map<std::string, std::vector<Intrinsic *>> IntrinsicMap;
    488   unsigned UniqueNumber;
    489 
    490   void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);
    491   void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
    492   void genOverloadTypeCheckCode(raw_ostream &OS,
    493                                 SmallVectorImpl<Intrinsic *> &Defs);
    494   void genIntrinsicRangeCheckCode(raw_ostream &OS,
    495                                   SmallVectorImpl<Intrinsic *> &Defs);
    496 
    497 public:
    498   /// Called by Intrinsic - this attempts to get an intrinsic that takes
    499   /// the given types as arguments.
    500   Intrinsic *getIntrinsic(StringRef Name, ArrayRef<Type> Types);
    501 
    502   /// Called by Intrinsic - returns a globally-unique number.
    503   unsigned getUniqueNumber() { return UniqueNumber++; }
    504 
    505   NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) {
    506     Record *SI = R.getClass("SInst");
    507     Record *II = R.getClass("IInst");
    508     Record *WI = R.getClass("WInst");
    509     Record *SOpI = R.getClass("SOpInst");
    510     Record *IOpI = R.getClass("IOpInst");
    511     Record *WOpI = R.getClass("WOpInst");
    512     Record *LOpI = R.getClass("LOpInst");
    513     Record *NoTestOpI = R.getClass("NoTestOpInst");
    514 
    515     ClassMap[SI] = ClassS;
    516     ClassMap[II] = ClassI;
    517     ClassMap[WI] = ClassW;
    518     ClassMap[SOpI] = ClassS;
    519     ClassMap[IOpI] = ClassI;
    520     ClassMap[WOpI] = ClassW;
    521     ClassMap[LOpI] = ClassL;
    522     ClassMap[NoTestOpI] = ClassNoTest;
    523   }
    524 
    525   // run - Emit arm_neon.h.inc
    526   void run(raw_ostream &o);
    527 
    528   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
    529   void runHeader(raw_ostream &o);
    530 
    531   // runTests - Emit tests for all the Neon intrinsics.
    532   void runTests(raw_ostream &o);
    533 };
    534 
    535 } // end anonymous namespace
    536 
    537 //===----------------------------------------------------------------------===//
    538 // Type implementation
    539 //===----------------------------------------------------------------------===//
    540 
    541 std::string Type::str() const {
    542   if (Void)
    543     return "void";
    544   std::string S;
    545 
    546   if (!Signed && isInteger())
    547     S += "u";
    548 
    549   if (Poly)
    550     S += "poly";
    551   else if (Float)
    552     S += "float";
    553   else
    554     S += "int";
    555 
    556   S += utostr(ElementBitwidth);
    557   if (isVector())
    558     S += "x" + utostr(getNumElements());
    559   if (NumVectors > 1)
    560     S += "x" + utostr(NumVectors);
    561   S += "_t";
    562 
    563   if (Constant)
    564     S += " const";
    565   if (Pointer)
    566     S += " *";
    567 
    568   return S;
    569 }
    570 
    571 std::string Type::builtin_str() const {
    572   std::string S;
    573   if (isVoid())
    574     return "v";
    575 
    576   if (Pointer)
    577     // All pointers are void pointers.
    578     S += "v";
    579   else if (isInteger())
    580     switch (ElementBitwidth) {
    581     case 8: S += "c"; break;
    582     case 16: S += "s"; break;
    583     case 32: S += "i"; break;
    584     case 64: S += "Wi"; break;
    585     case 128: S += "LLLi"; break;
    586     default: llvm_unreachable("Unhandled case!");
    587     }
    588   else
    589     switch (ElementBitwidth) {
    590     case 16: S += "h"; break;
    591     case 32: S += "f"; break;
    592     case 64: S += "d"; break;
    593     default: llvm_unreachable("Unhandled case!");
    594     }
    595 
    596   if (isChar() && !Pointer)
    597     // Make chars explicitly signed.
    598     S = "S" + S;
    599   else if (isInteger() && !Pointer && !Signed)
    600     S = "U" + S;
    601 
    602   if (isScalar()) {
    603     if (Constant) S += "C";
    604     if (Pointer) S += "*";
    605     return S;
    606   }
    607 
    608   std::string Ret;
    609   for (unsigned I = 0; I < NumVectors; ++I)
    610     Ret += "V" + utostr(getNumElements()) + S;
    611 
    612   return Ret;
    613 }
    614 
    615 unsigned Type::getNeonEnum() const {
    616   unsigned Addend;
    617   switch (ElementBitwidth) {
    618   case 8: Addend = 0; break;
    619   case 16: Addend = 1; break;
    620   case 32: Addend = 2; break;
    621   case 64: Addend = 3; break;
    622   case 128: Addend = 4; break;
    623   default: llvm_unreachable("Unhandled element bitwidth!");
    624   }
    625 
    626   unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
    627   if (Poly) {
    628     // Adjustment needed because Poly32 doesn't exist.
    629     if (Addend >= 2)
    630       --Addend;
    631     Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
    632   }
    633   if (Float) {
    634     assert(Addend != 0 && "Float8 doesn't exist!");
    635     Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
    636   }
    637 
    638   if (Bitwidth == 128)
    639     Base |= (unsigned)NeonTypeFlags::QuadFlag;
    640   if (isInteger() && !Signed)
    641     Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
    642 
    643   return Base;
    644 }
    645 
    646 Type Type::fromTypedefName(StringRef Name) {
    647   Type T;
    648   T.Void = false;
    649   T.Float = false;
    650   T.Poly = false;
    651 
    652   if (Name.front() == 'u') {
    653     T.Signed = false;
    654     Name = Name.drop_front();
    655   } else {
    656     T.Signed = true;
    657   }
    658 
    659   if (Name.startswith("float")) {
    660     T.Float = true;
    661     Name = Name.drop_front(5);
    662   } else if (Name.startswith("poly")) {
    663     T.Poly = true;
    664     Name = Name.drop_front(4);
    665   } else {
    666     assert(Name.startswith("int"));
    667     Name = Name.drop_front(3);
    668   }
    669 
    670   unsigned I = 0;
    671   for (I = 0; I < Name.size(); ++I) {
    672     if (!isdigit(Name[I]))
    673       break;
    674   }
    675   Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
    676   Name = Name.drop_front(I);
    677 
    678   T.Bitwidth = T.ElementBitwidth;
    679   T.NumVectors = 1;
    680 
    681   if (Name.front() == 'x') {
    682     Name = Name.drop_front();
    683     unsigned I = 0;
    684     for (I = 0; I < Name.size(); ++I) {
    685       if (!isdigit(Name[I]))
    686         break;
    687     }
    688     unsigned NumLanes;
    689     Name.substr(0, I).getAsInteger(10, NumLanes);
    690     Name = Name.drop_front(I);
    691     T.Bitwidth = T.ElementBitwidth * NumLanes;
    692   } else {
    693     // Was scalar.
    694     T.NumVectors = 0;
    695   }
    696   if (Name.front() == 'x') {
    697     Name = Name.drop_front();
    698     unsigned I = 0;
    699     for (I = 0; I < Name.size(); ++I) {
    700       if (!isdigit(Name[I]))
    701         break;
    702     }
    703     Name.substr(0, I).getAsInteger(10, T.NumVectors);
    704     Name = Name.drop_front(I);
    705   }
    706 
    707   assert(Name.startswith("_t") && "Malformed typedef!");
    708   return T;
    709 }
    710 
    711 void Type::applyTypespec(bool &Quad) {
    712   std::string S = TS;
    713   ScalarForMangling = false;
    714   Void = false;
    715   Poly = Float = false;
    716   ElementBitwidth = ~0U;
    717   Signed = true;
    718   NumVectors = 1;
    719 
    720   for (char I : S) {
    721     switch (I) {
    722     case 'S':
    723       ScalarForMangling = true;
    724       break;
    725     case 'H':
    726       NoManglingQ = true;
    727       Quad = true;
    728       break;
    729     case 'Q':
    730       Quad = true;
    731       break;
    732     case 'P':
    733       Poly = true;
    734       break;
    735     case 'U':
    736       Signed = false;
    737       break;
    738     case 'c':
    739       ElementBitwidth = 8;
    740       break;
    741     case 'h':
    742       Float = true;
    743     // Fall through
    744     case 's':
    745       ElementBitwidth = 16;
    746       break;
    747     case 'f':
    748       Float = true;
    749     // Fall through
    750     case 'i':
    751       ElementBitwidth = 32;
    752       break;
    753     case 'd':
    754       Float = true;
    755     // Fall through
    756     case 'l':
    757       ElementBitwidth = 64;
    758       break;
    759     case 'k':
    760       ElementBitwidth = 128;
    761       // Poly doesn't have a 128x1 type.
    762       if (Poly)
    763         NumVectors = 0;
    764       break;
    765     default:
    766       llvm_unreachable("Unhandled type code!");
    767     }
    768   }
    769   assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
    770 
    771   Bitwidth = Quad ? 128 : 64;
    772 }
    773 
    774 void Type::applyModifier(char Mod) {
    775   bool AppliedQuad = false;
    776   applyTypespec(AppliedQuad);
    777 
    778   switch (Mod) {
    779   case 'v':
    780     Void = true;
    781     break;
    782   case 't':
    783     if (Poly) {
    784       Poly = false;
    785       Signed = false;
    786     }
    787     break;
    788   case 'b':
    789     Signed = false;
    790     Float = false;
    791     Poly = false;
    792     NumVectors = 0;
    793     Bitwidth = ElementBitwidth;
    794     break;
    795   case '$':
    796     Signed = true;
    797     Float = false;
    798     Poly = false;
    799     NumVectors = 0;
    800     Bitwidth = ElementBitwidth;
    801     break;
    802   case 'u':
    803     Signed = false;
    804     Poly = false;
    805     Float = false;
    806     break;
    807   case 'x':
    808     Signed = true;
    809     assert(!Poly && "'u' can't be used with poly types!");
    810     Float = false;
    811     break;
    812   case 'o':
    813     Bitwidth = ElementBitwidth = 64;
    814     NumVectors = 0;
    815     Float = true;
    816     break;
    817   case 'y':
    818     Bitwidth = ElementBitwidth = 32;
    819     NumVectors = 0;
    820     Float = true;
    821     break;
    822   case 'f':
    823     // Special case - if we're half-precision, a floating
    824     // point argument needs to be 128-bits (double size).
    825     if (isHalf())
    826       Bitwidth = 128;
    827     Float = true;
    828     ElementBitwidth = 32;
    829     break;
    830   case 'F':
    831     Float = true;
    832     ElementBitwidth = 64;
    833     break;
    834   case 'g':
    835     if (AppliedQuad)
    836       Bitwidth /= 2;
    837     break;
    838   case 'j':
    839     if (!AppliedQuad)
    840       Bitwidth *= 2;
    841     break;
    842   case 'w':
    843     ElementBitwidth *= 2;
    844     Bitwidth *= 2;
    845     break;
    846   case 'n':
    847     ElementBitwidth *= 2;
    848     break;
    849   case 'i':
    850     Float = false;
    851     Poly = false;
    852     ElementBitwidth = Bitwidth = 32;
    853     NumVectors = 0;
    854     Signed = true;
    855     break;
    856   case 'l':
    857     Float = false;
    858     Poly = false;
    859     ElementBitwidth = Bitwidth = 64;
    860     NumVectors = 0;
    861     Signed = false;
    862     break;
    863   case 'z':
    864     ElementBitwidth /= 2;
    865     Bitwidth = ElementBitwidth;
    866     NumVectors = 0;
    867     break;
    868   case 'r':
    869     ElementBitwidth *= 2;
    870     Bitwidth = ElementBitwidth;
    871     NumVectors = 0;
    872     break;
    873   case 's':
    874   case 'a':
    875     Bitwidth = ElementBitwidth;
    876     NumVectors = 0;
    877     break;
    878   case 'k':
    879     Bitwidth *= 2;
    880     break;
    881   case 'c':
    882     Constant = true;
    883   // Fall through
    884   case 'p':
    885     Pointer = true;
    886     Bitwidth = ElementBitwidth;
    887     NumVectors = 0;
    888     break;
    889   case 'h':
    890     ElementBitwidth /= 2;
    891     break;
    892   case 'q':
    893     ElementBitwidth /= 2;
    894     Bitwidth *= 2;
    895     break;
    896   case 'e':
    897     ElementBitwidth /= 2;
    898     Signed = false;
    899     break;
    900   case 'm':
    901     ElementBitwidth /= 2;
    902     Bitwidth /= 2;
    903     break;
    904   case 'd':
    905     break;
    906   case '2':
    907     NumVectors = 2;
    908     break;
    909   case '3':
    910     NumVectors = 3;
    911     break;
    912   case '4':
    913     NumVectors = 4;
    914     break;
    915   case 'B':
    916     NumVectors = 2;
    917     if (!AppliedQuad)
    918       Bitwidth *= 2;
    919     break;
    920   case 'C':
    921     NumVectors = 3;
    922     if (!AppliedQuad)
    923       Bitwidth *= 2;
    924     break;
    925   case 'D':
    926     NumVectors = 4;
    927     if (!AppliedQuad)
    928       Bitwidth *= 2;
    929     break;
    930   default:
    931     llvm_unreachable("Unhandled character!");
    932   }
    933 }
    934 
    935 //===----------------------------------------------------------------------===//
    936 // Intrinsic implementation
    937 //===----------------------------------------------------------------------===//
    938 
    939 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) {
    940   char typeCode = '\0';
    941   bool printNumber = true;
    942 
    943   if (CK == ClassB)
    944     return "";
    945 
    946   if (T.isPoly())
    947     typeCode = 'p';
    948   else if (T.isInteger())
    949     typeCode = T.isSigned() ? 's' : 'u';
    950   else
    951     typeCode = 'f';
    952 
    953   if (CK == ClassI) {
    954     switch (typeCode) {
    955     default:
    956       break;
    957     case 's':
    958     case 'u':
    959     case 'p':
    960       typeCode = 'i';
    961       break;
    962     }
    963   }
    964   if (CK == ClassB) {
    965     typeCode = '\0';
    966   }
    967 
    968   std::string S;
    969   if (typeCode != '\0')
    970     S.push_back(typeCode);
    971   if (printNumber)
    972     S += utostr(T.getElementSizeInBits());
    973 
    974   return S;
    975 }
    976 
    977 std::string Intrinsic::getBuiltinTypeStr() {
    978   ClassKind LocalCK = getClassKind(true);
    979   std::string S;
    980 
    981   Type RetT = getReturnType();
    982   if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
    983       !RetT.isFloating())
    984     RetT.makeInteger(RetT.getElementSizeInBits(), false);
    985 
    986   // Since the return value must be one type, return a vector type of the
    987   // appropriate width which we will bitcast.  An exception is made for
    988   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
    989   // fashion, storing them to a pointer arg.
    990   if (RetT.getNumVectors() > 1) {
    991     S += "vv*"; // void result with void* first argument
    992   } else {
    993     if (RetT.isPoly())
    994       RetT.makeInteger(RetT.getElementSizeInBits(), false);
    995     if (!RetT.isScalar() && !RetT.isSigned())
    996       RetT.makeSigned();
    997 
    998     bool ForcedVectorFloatingType = Proto[0] == 'F' || Proto[0] == 'f';
    999     if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType)
   1000       // Cast to vector of 8-bit elements.
   1001       RetT.makeInteger(8, true);
   1002 
   1003     S += RetT.builtin_str();
   1004   }
   1005 
   1006   for (unsigned I = 0; I < getNumParams(); ++I) {
   1007     Type T = getParamType(I);
   1008     if (T.isPoly())
   1009       T.makeInteger(T.getElementSizeInBits(), false);
   1010 
   1011     bool ForcedFloatingType = Proto[I + 1] == 'F' || Proto[I + 1] == 'f';
   1012     if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType)
   1013       T.makeInteger(8, true);
   1014     // Halves always get converted to 8-bit elements.
   1015     if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
   1016       T.makeInteger(8, true);
   1017 
   1018     if (LocalCK == ClassI)
   1019       T.makeSigned();
   1020 
   1021     // Constant indices are always just "int".
   1022     if (hasImmediate() && getImmediateIdx() == I)
   1023       T.makeInteger(32, true);
   1024 
   1025     S += T.builtin_str();
   1026   }
   1027 
   1028   // Extra constant integer to hold type class enum for this function, e.g. s8
   1029   if (LocalCK == ClassB)
   1030     S += "i";
   1031 
   1032   return S;
   1033 }
   1034 
   1035 std::string Intrinsic::getMangledName(bool ForceClassS) {
   1036   // Check if the prototype has a scalar operand with the type of the vector
   1037   // elements.  If not, bitcasting the args will take care of arg checking.
   1038   // The actual signedness etc. will be taken care of with special enums.
   1039   ClassKind LocalCK = CK;
   1040   if (!protoHasScalar())
   1041     LocalCK = ClassB;
   1042 
   1043   return mangleName(Name, ForceClassS ? ClassS : LocalCK);
   1044 }
   1045 
   1046 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) {
   1047   std::string typeCode = getInstTypeCode(BaseType, LocalCK);
   1048   std::string S = Name;
   1049 
   1050   if (Name == "vcvt_f32_f16" || Name == "vcvt_f32_f64" ||
   1051       Name == "vcvt_f64_f32")
   1052     return Name;
   1053 
   1054   if (typeCode.size() > 0) {
   1055     // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
   1056     if (Name.size() >= 3 && isdigit(Name.back()) &&
   1057         Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
   1058       S.insert(S.length() - 3, "_" + typeCode);
   1059     else
   1060       S += "_" + typeCode;
   1061   }
   1062 
   1063   if (BaseType != InBaseType) {
   1064     // A reinterpret - out the input base type at the end.
   1065     S += "_" + getInstTypeCode(InBaseType, LocalCK);
   1066   }
   1067 
   1068   if (LocalCK == ClassB)
   1069     S += "_v";
   1070 
   1071   // Insert a 'q' before the first '_' character so that it ends up before
   1072   // _lane or _n on vector-scalar operations.
   1073   if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
   1074     size_t Pos = S.find('_');
   1075     S.insert(Pos, "q");
   1076   }
   1077 
   1078   char Suffix = '\0';
   1079   if (BaseType.isScalarForMangling()) {
   1080     switch (BaseType.getElementSizeInBits()) {
   1081     case 8: Suffix = 'b'; break;
   1082     case 16: Suffix = 'h'; break;
   1083     case 32: Suffix = 's'; break;
   1084     case 64: Suffix = 'd'; break;
   1085     default: llvm_unreachable("Bad suffix!");
   1086     }
   1087   }
   1088   if (Suffix != '\0') {
   1089     size_t Pos = S.find('_');
   1090     S.insert(Pos, &Suffix, 1);
   1091   }
   1092 
   1093   return S;
   1094 }
   1095 
   1096 std::string Intrinsic::replaceParamsIn(std::string S) {
   1097   while (S.find('$') != std::string::npos) {
   1098     size_t Pos = S.find('$');
   1099     size_t End = Pos + 1;
   1100     while (isalpha(S[End]))
   1101       ++End;
   1102 
   1103     std::string VarName = S.substr(Pos + 1, End - Pos - 1);
   1104     assert_with_loc(Variables.find(VarName) != Variables.end(),
   1105                     "Variable not defined!");
   1106     S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
   1107   }
   1108 
   1109   return S;
   1110 }
   1111 
   1112 void Intrinsic::initVariables() {
   1113   Variables.clear();
   1114 
   1115   // Modify the TypeSpec per-argument to get a concrete Type, and create
   1116   // known variables for each.
   1117   for (unsigned I = 1; I < Proto.size(); ++I) {
   1118     char NameC = '0' + (I - 1);
   1119     std::string Name = "p";
   1120     Name.push_back(NameC);
   1121 
   1122     Variables[Name] = Variable(Types[I], Name + VariablePostfix);
   1123   }
   1124   RetVar = Variable(Types[0], "ret" + VariablePostfix);
   1125 }
   1126 
   1127 void Intrinsic::emitPrototype(StringRef NamePrefix) {
   1128   if (UseMacro)
   1129     OS << "#define ";
   1130   else
   1131     OS << "__ai " << Types[0].str() << " ";
   1132 
   1133   OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";
   1134 
   1135   for (unsigned I = 0; I < getNumParams(); ++I) {
   1136     if (I != 0)
   1137       OS << ", ";
   1138 
   1139     char NameC = '0' + I;
   1140     std::string Name = "p";
   1141     Name.push_back(NameC);
   1142     assert(Variables.find(Name) != Variables.end());
   1143     Variable &V = Variables[Name];
   1144 
   1145     if (!UseMacro)
   1146       OS << V.getType().str() << " ";
   1147     OS << V.getName();
   1148   }
   1149 
   1150   OS << ")";
   1151 }
   1152 
   1153 void Intrinsic::emitOpeningBrace() {
   1154   if (UseMacro)
   1155     OS << " __extension__ ({";
   1156   else
   1157     OS << " {";
   1158   emitNewLine();
   1159 }
   1160 
   1161 void Intrinsic::emitClosingBrace() {
   1162   if (UseMacro)
   1163     OS << "})";
   1164   else
   1165     OS << "}";
   1166 }
   1167 
   1168 void Intrinsic::emitNewLine() {
   1169   if (UseMacro)
   1170     OS << " \\\n";
   1171   else
   1172     OS << "\n";
   1173 }
   1174 
   1175 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
   1176   if (Dest.getType().getNumVectors() > 1) {
   1177     emitNewLine();
   1178 
   1179     for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
   1180       OS << "  " << Dest.getName() << ".val[" << utostr(K) << "] = "
   1181          << "__builtin_shufflevector("
   1182          << Src.getName() << ".val[" << utostr(K) << "], "
   1183          << Src.getName() << ".val[" << utostr(K) << "]";
   1184       for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
   1185         OS << ", " << utostr(J);
   1186       OS << ");";
   1187       emitNewLine();
   1188     }
   1189   } else {
   1190     OS << "  " << Dest.getName()
   1191        << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName();
   1192     for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
   1193       OS << ", " << utostr(J);
   1194     OS << ");";
   1195     emitNewLine();
   1196   }
   1197 }
   1198 
   1199 void Intrinsic::emitArgumentReversal() {
   1200   if (BigEndianSafe)
   1201     return;
   1202 
   1203   // Reverse all vector arguments.
   1204   for (unsigned I = 0; I < getNumParams(); ++I) {
   1205     std::string Name = "p" + utostr(I);
   1206     std::string NewName = "rev" + utostr(I);
   1207 
   1208     Variable &V = Variables[Name];
   1209     Variable NewV(V.getType(), NewName + VariablePostfix);
   1210 
   1211     if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
   1212       continue;
   1213 
   1214     OS << "  " << NewV.getType().str() << " " << NewV.getName() << ";";
   1215     emitReverseVariable(NewV, V);
   1216     V = NewV;
   1217   }
   1218 }
   1219 
   1220 void Intrinsic::emitReturnReversal() {
   1221   if (BigEndianSafe)
   1222     return;
   1223   if (!getReturnType().isVector() || getReturnType().isVoid() ||
   1224       getReturnType().getNumElements() == 1)
   1225     return;
   1226   emitReverseVariable(RetVar, RetVar);
   1227 }
   1228 
   1229 
   1230 void Intrinsic::emitShadowedArgs() {
   1231   // Macro arguments are not type-checked like inline function arguments,
   1232   // so assign them to local temporaries to get the right type checking.
   1233   if (!UseMacro)
   1234     return;
   1235 
   1236   for (unsigned I = 0; I < getNumParams(); ++I) {
   1237     // Do not create a temporary for an immediate argument.
   1238     // That would defeat the whole point of using a macro!
   1239     if (hasImmediate() && Proto[I+1] == 'i')
   1240       continue;
   1241     // Do not create a temporary for pointer arguments. The input
   1242     // pointer may have an alignment hint.
   1243     if (getParamType(I).isPointer())
   1244       continue;
   1245 
   1246     std::string Name = "p" + utostr(I);
   1247 
   1248     assert(Variables.find(Name) != Variables.end());
   1249     Variable &V = Variables[Name];
   1250 
   1251     std::string NewName = "s" + utostr(I);
   1252     Variable V2(V.getType(), NewName + VariablePostfix);
   1253 
   1254     OS << "  " << V2.getType().str() << " " << V2.getName() << " = "
   1255        << V.getName() << ";";
   1256     emitNewLine();
   1257 
   1258     V = V2;
   1259   }
   1260 }
   1261 
   1262 // We don't check 'a' in this function, because for builtin function the
   1263 // argument matching to 'a' uses a vector type splatted from a scalar type.
   1264 bool Intrinsic::protoHasScalar() {
   1265   return (Proto.find('s') != std::string::npos ||
   1266           Proto.find('z') != std::string::npos ||
   1267           Proto.find('r') != std::string::npos ||
   1268           Proto.find('b') != std::string::npos ||
   1269           Proto.find('$') != std::string::npos ||
   1270           Proto.find('y') != std::string::npos ||
   1271           Proto.find('o') != std::string::npos);
   1272 }
   1273 
   1274 void Intrinsic::emitBodyAsBuiltinCall() {
   1275   std::string S;
   1276 
   1277   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
   1278   // sret-like argument.
   1279   bool SRet = getReturnType().getNumVectors() >= 2;
   1280 
   1281   StringRef N = Name;
   1282   if (hasSplat()) {
   1283     // Call the non-splat builtin: chop off the "_n" suffix from the name.
   1284     assert(N.endswith("_n"));
   1285     N = N.drop_back(2);
   1286   }
   1287 
   1288   ClassKind LocalCK = CK;
   1289   if (!protoHasScalar())
   1290     LocalCK = ClassB;
   1291 
   1292   if (!getReturnType().isVoid() && !SRet)
   1293     S += "(" + RetVar.getType().str() + ") ";
   1294 
   1295   S += "__builtin_neon_" + mangleName(N, LocalCK) + "(";
   1296 
   1297   if (SRet)
   1298     S += "&" + RetVar.getName() + ", ";
   1299 
   1300   for (unsigned I = 0; I < getNumParams(); ++I) {
   1301     Variable &V = Variables["p" + utostr(I)];
   1302     Type T = V.getType();
   1303 
   1304     // Handle multiple-vector values specially, emitting each subvector as an
   1305     // argument to the builtin.
   1306     if (T.getNumVectors() > 1) {
   1307       // Check if an explicit cast is needed.
   1308       std::string Cast;
   1309       if (T.isChar() || T.isPoly() || !T.isSigned()) {
   1310         Type T2 = T;
   1311         T2.makeOneVector();
   1312         T2.makeInteger(8, /*Signed=*/true);
   1313         Cast = "(" + T2.str() + ")";
   1314       }
   1315 
   1316       for (unsigned J = 0; J < T.getNumVectors(); ++J)
   1317         S += Cast + V.getName() + ".val[" + utostr(J) + "], ";
   1318       continue;
   1319     }
   1320 
   1321     std::string Arg;
   1322     Type CastToType = T;
   1323     if (hasSplat() && I == getSplatIdx()) {
   1324       Arg = "(" + BaseType.str() + ") {";
   1325       for (unsigned J = 0; J < BaseType.getNumElements(); ++J) {
   1326         if (J != 0)
   1327           Arg += ", ";
   1328         Arg += V.getName();
   1329       }
   1330       Arg += "}";
   1331 
   1332       CastToType = BaseType;
   1333     } else {
   1334       Arg = V.getName();
   1335     }
   1336 
   1337     // Check if an explicit cast is needed.
   1338     if (CastToType.isVector()) {
   1339       CastToType.makeInteger(8, true);
   1340       Arg = "(" + CastToType.str() + ")" + Arg;
   1341     }
   1342 
   1343     S += Arg + ", ";
   1344   }
   1345 
   1346   // Extra constant integer to hold type class enum for this function, e.g. s8
   1347   if (getClassKind(true) == ClassB) {
   1348     Type ThisTy = getReturnType();
   1349     if (Proto[0] == 'v' || Proto[0] == 'f' || Proto[0] == 'F')
   1350       ThisTy = getParamType(0);
   1351     if (ThisTy.isPointer())
   1352       ThisTy = getParamType(1);
   1353 
   1354     S += utostr(ThisTy.getNeonEnum());
   1355   } else {
   1356     // Remove extraneous ", ".
   1357     S.pop_back();
   1358     S.pop_back();
   1359   }
   1360   S += ");";
   1361 
   1362   std::string RetExpr;
   1363   if (!SRet && !RetVar.getType().isVoid())
   1364     RetExpr = RetVar.getName() + " = ";
   1365 
   1366   OS << "  " << RetExpr << S;
   1367   emitNewLine();
   1368 }
   1369 
   1370 void Intrinsic::emitBody(StringRef CallPrefix) {
   1371   std::vector<std::string> Lines;
   1372 
   1373   assert(RetVar.getType() == Types[0]);
   1374   // Create a return variable, if we're not void.
   1375   if (!RetVar.getType().isVoid()) {
   1376     OS << "  " << RetVar.getType().str() << " " << RetVar.getName() << ";";
   1377     emitNewLine();
   1378   }
   1379 
   1380   if (!Body || Body->getValues().size() == 0) {
   1381     // Nothing specific to output - must output a builtin.
   1382     emitBodyAsBuiltinCall();
   1383     return;
   1384   }
   1385 
   1386   // We have a list of "things to output". The last should be returned.
   1387   for (auto *I : Body->getValues()) {
   1388     if (StringInit *SI = dyn_cast<StringInit>(I)) {
   1389       Lines.push_back(replaceParamsIn(SI->getAsString()));
   1390     } else if (DagInit *DI = dyn_cast<DagInit>(I)) {
   1391       DagEmitter DE(*this, CallPrefix);
   1392       Lines.push_back(DE.emitDag(DI).second + ";");
   1393     }
   1394   }
   1395 
   1396   assert(Lines.size() && "Empty def?");
   1397   if (!RetVar.getType().isVoid())
   1398     Lines.back().insert(0, RetVar.getName() + " = ");
   1399 
   1400   for (auto &L : Lines) {
   1401     OS << "  " << L;
   1402     emitNewLine();
   1403   }
   1404 }
   1405 
   1406 void Intrinsic::emitReturn() {
   1407   if (RetVar.getType().isVoid())
   1408     return;
   1409   if (UseMacro)
   1410     OS << "  " << RetVar.getName() << ";";
   1411   else
   1412     OS << "  return " << RetVar.getName() << ";";
   1413   emitNewLine();
   1414 }
   1415 
   1416 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) {
   1417   // At this point we should only be seeing a def.
   1418   DefInit *DefI = cast<DefInit>(DI->getOperator());
   1419   std::string Op = DefI->getAsString();
   1420 
   1421   if (Op == "cast" || Op == "bitcast")
   1422     return emitDagCast(DI, Op == "bitcast");
   1423   if (Op == "shuffle")
   1424     return emitDagShuffle(DI);
   1425   if (Op == "dup")
   1426     return emitDagDup(DI);
   1427   if (Op == "splat")
   1428     return emitDagSplat(DI);
   1429   if (Op == "save_temp")
   1430     return emitDagSaveTemp(DI);
   1431   if (Op == "op")
   1432     return emitDagOp(DI);
   1433   if (Op == "call")
   1434     return emitDagCall(DI);
   1435   if (Op == "name_replace")
   1436     return emitDagNameReplace(DI);
   1437   if (Op == "literal")
   1438     return emitDagLiteral(DI);
   1439   assert_with_loc(false, "Unknown operation!");
   1440   return std::make_pair(Type::getVoid(), "");
   1441 }
   1442 
   1443 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) {
   1444   std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
   1445   if (DI->getNumArgs() == 2) {
   1446     // Unary op.
   1447     std::pair<Type, std::string> R =
   1448         emitDagArg(DI->getArg(1), DI->getArgName(1));
   1449     return std::make_pair(R.first, Op + R.second);
   1450   } else {
   1451     assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
   1452     std::pair<Type, std::string> R1 =
   1453         emitDagArg(DI->getArg(1), DI->getArgName(1));
   1454     std::pair<Type, std::string> R2 =
   1455         emitDagArg(DI->getArg(2), DI->getArgName(2));
   1456     assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
   1457     return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
   1458   }
   1459 }
   1460 
   1461 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) {
   1462   std::vector<Type> Types;
   1463   std::vector<std::string> Values;
   1464   for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
   1465     std::pair<Type, std::string> R =
   1466         emitDagArg(DI->getArg(I + 1), DI->getArgName(I + 1));
   1467     Types.push_back(R.first);
   1468     Values.push_back(R.second);
   1469   }
   1470 
   1471   // Look up the called intrinsic.
   1472   std::string N;
   1473   if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0)))
   1474     N = SI->getAsUnquotedString();
   1475   else
   1476     N = emitDagArg(DI->getArg(0), "").second;
   1477   Intrinsic *Callee = Intr.Emitter.getIntrinsic(N, Types);
   1478   assert(Callee && "getIntrinsic should not return us nullptr!");
   1479 
   1480   // Make sure the callee is known as an early def.
   1481   Callee->setNeededEarly();
   1482   Intr.Dependencies.insert(Callee);
   1483 
   1484   // Now create the call itself.
   1485   std::string S = CallPrefix.str() + Callee->getMangledName(true) + "(";
   1486   for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
   1487     if (I != 0)
   1488       S += ", ";
   1489     S += Values[I];
   1490   }
   1491   S += ")";
   1492 
   1493   return std::make_pair(Callee->getReturnType(), S);
   1494 }
   1495 
   1496 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
   1497                                                                 bool IsBitCast){
   1498   // (cast MOD* VAL) -> cast VAL to type given by MOD.
   1499   std::pair<Type, std::string> R = emitDagArg(
   1500       DI->getArg(DI->getNumArgs() - 1), DI->getArgName(DI->getNumArgs() - 1));
   1501   Type castToType = R.first;
   1502   for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
   1503 
   1504     // MOD can take several forms:
   1505     //   1. $X - take the type of parameter / variable X.
   1506     //   2. The value "R" - take the type of the return type.
   1507     //   3. a type string
   1508     //   4. The value "U" or "S" to switch the signedness.
   1509     //   5. The value "H" or "D" to half or double the bitwidth.
   1510     //   6. The value "8" to convert to 8-bit (signed) integer lanes.
   1511     if (DI->getArgName(ArgIdx).size()) {
   1512       assert_with_loc(Intr.Variables.find(DI->getArgName(ArgIdx)) !=
   1513                       Intr.Variables.end(),
   1514                       "Variable not found");
   1515       castToType = Intr.Variables[DI->getArgName(ArgIdx)].getType();
   1516     } else {
   1517       StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
   1518       assert_with_loc(SI, "Expected string type or $Name for cast type");
   1519 
   1520       if (SI->getAsUnquotedString() == "R") {
   1521         castToType = Intr.getReturnType();
   1522       } else if (SI->getAsUnquotedString() == "U") {
   1523         castToType.makeUnsigned();
   1524       } else if (SI->getAsUnquotedString() == "S") {
   1525         castToType.makeSigned();
   1526       } else if (SI->getAsUnquotedString() == "H") {
   1527         castToType.halveLanes();
   1528       } else if (SI->getAsUnquotedString() == "D") {
   1529         castToType.doubleLanes();
   1530       } else if (SI->getAsUnquotedString() == "8") {
   1531         castToType.makeInteger(8, true);
   1532       } else {
   1533         castToType = Type::fromTypedefName(SI->getAsUnquotedString());
   1534         assert_with_loc(!castToType.isVoid(), "Unknown typedef");
   1535       }
   1536     }
   1537   }
   1538 
   1539   std::string S;
   1540   if (IsBitCast) {
   1541     // Emit a reinterpret cast. The second operand must be an lvalue, so create
   1542     // a temporary.
   1543     std::string N = "reint";
   1544     unsigned I = 0;
   1545     while (Intr.Variables.find(N) != Intr.Variables.end())
   1546       N = "reint" + utostr(++I);
   1547     Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix);
   1548 
   1549     Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = "
   1550             << R.second << ";";
   1551     Intr.emitNewLine();
   1552 
   1553     S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + "";
   1554   } else {
   1555     // Emit a normal (static) cast.
   1556     S = "(" + castToType.str() + ")(" + R.second + ")";
   1557   }
   1558 
   1559   return std::make_pair(castToType, S);
   1560 }
   1561 
   1562 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
   1563   // See the documentation in arm_neon.td for a description of these operators.
   1564   class LowHalf : public SetTheory::Operator {
   1565   public:
   1566     virtual void anchor() {}
   1567     virtual ~LowHalf() {}
   1568     virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
   1569                        ArrayRef<SMLoc> Loc) {
   1570       SetTheory::RecSet Elts2;
   1571       ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
   1572       Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
   1573     }
   1574   };
   1575   class HighHalf : public SetTheory::Operator {
   1576   public:
   1577     virtual void anchor() {}
   1578     virtual ~HighHalf() {}
   1579     virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
   1580                        ArrayRef<SMLoc> Loc) {
   1581       SetTheory::RecSet Elts2;
   1582       ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
   1583       Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
   1584     }
   1585   };
   1586   class Rev : public SetTheory::Operator {
   1587     unsigned ElementSize;
   1588 
   1589   public:
   1590     Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
   1591     virtual void anchor() {}
   1592     virtual ~Rev() {}
   1593     virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
   1594                        ArrayRef<SMLoc> Loc) {
   1595       SetTheory::RecSet Elts2;
   1596       ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);
   1597 
   1598       int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
   1599       VectorSize /= ElementSize;
   1600 
   1601       std::vector<Record *> Revved;
   1602       for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
   1603         for (int LI = VectorSize - 1; LI >= 0; --LI) {
   1604           Revved.push_back(Elts2[VI + LI]);
   1605         }
   1606       }
   1607 
   1608       Elts.insert(Revved.begin(), Revved.end());
   1609     }
   1610   };
   1611   class MaskExpander : public SetTheory::Expander {
   1612     unsigned N;
   1613 
   1614   public:
   1615     MaskExpander(unsigned N) : N(N) {}
   1616     virtual void anchor() {}
   1617     virtual ~MaskExpander() {}
   1618     virtual void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) {
   1619       unsigned Addend = 0;
   1620       if (R->getName() == "mask0")
   1621         Addend = 0;
   1622       else if (R->getName() == "mask1")
   1623         Addend = N;
   1624       else
   1625         return;
   1626       for (unsigned I = 0; I < N; ++I)
   1627         Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
   1628     }
   1629   };
   1630 
   1631   // (shuffle arg1, arg2, sequence)
   1632   std::pair<Type, std::string> Arg1 =
   1633       emitDagArg(DI->getArg(0), DI->getArgName(0));
   1634   std::pair<Type, std::string> Arg2 =
   1635       emitDagArg(DI->getArg(1), DI->getArgName(1));
   1636   assert_with_loc(Arg1.first == Arg2.first,
   1637                   "Different types in arguments to shuffle!");
   1638 
   1639   SetTheory ST;
   1640   LowHalf LH;
   1641   HighHalf HH;
   1642   MaskExpander ME(Arg1.first.getNumElements());
   1643   Rev R(Arg1.first.getElementSizeInBits());
   1644   SetTheory::RecSet Elts;
   1645   ST.addOperator("lowhalf", &LH);
   1646   ST.addOperator("highhalf", &HH);
   1647   ST.addOperator("rev", &R);
   1648   ST.addExpander("MaskExpand", &ME);
   1649   ST.evaluate(DI->getArg(2), Elts, ArrayRef<SMLoc>());
   1650 
   1651   std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
   1652   for (auto &E : Elts) {
   1653     StringRef Name = E->getName();
   1654     assert_with_loc(Name.startswith("sv"),
   1655                     "Incorrect element kind in shuffle mask!");
   1656     S += ", " + Name.drop_front(2).str();
   1657   }
   1658   S += ")";
   1659 
   1660   // Recalculate the return type - the shuffle may have halved or doubled it.
   1661   Type T(Arg1.first);
   1662   if (Elts.size() > T.getNumElements()) {
   1663     assert_with_loc(
   1664         Elts.size() == T.getNumElements() * 2,
   1665         "Can only double or half the number of elements in a shuffle!");
   1666     T.doubleLanes();
   1667   } else if (Elts.size() < T.getNumElements()) {
   1668     assert_with_loc(
   1669         Elts.size() == T.getNumElements() / 2,
   1670         "Can only double or half the number of elements in a shuffle!");
   1671     T.halveLanes();
   1672   }
   1673 
   1674   return std::make_pair(T, S);
   1675 }
   1676 
   1677 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {
   1678   assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
   1679   std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0));
   1680   assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
   1681 
   1682   Type T = Intr.getBaseType();
   1683   assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
   1684   std::string S = "(" + T.str() + ") {";
   1685   for (unsigned I = 0; I < T.getNumElements(); ++I) {
   1686     if (I != 0)
   1687       S += ", ";
   1688     S += A.second;
   1689   }
   1690   S += "}";
   1691 
   1692   return std::make_pair(T, S);
   1693 }
   1694 
   1695 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {
   1696   assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
   1697   std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0));
   1698   std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), DI->getArgName(1));
   1699 
   1700   assert_with_loc(B.first.isScalar(),
   1701                   "splat() requires a scalar int as the second argument");
   1702 
   1703   std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
   1704   for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
   1705     S += ", " + B.second;
   1706   }
   1707   S += ")";
   1708 
   1709   return std::make_pair(Intr.getBaseType(), S);
   1710 }
   1711 
   1712 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) {
   1713   assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
   1714   std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), DI->getArgName(1));
   1715 
   1716   assert_with_loc(!A.first.isVoid(),
   1717                   "Argument to save_temp() must have non-void type!");
   1718 
   1719   std::string N = DI->getArgName(0);
   1720   assert_with_loc(N.size(), "save_temp() expects a name as the first argument");
   1721 
   1722   assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(),
   1723                   "Variable already defined!");
   1724   Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix);
   1725 
   1726   std::string S =
   1727       A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second;
   1728 
   1729   return std::make_pair(Type::getVoid(), S);
   1730 }
   1731 
   1732 std::pair<Type, std::string>
   1733 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) {
   1734   std::string S = Intr.Name;
   1735 
   1736   assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
   1737   std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
   1738   std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
   1739 
   1740   size_t Idx = S.find(ToReplace);
   1741 
   1742   assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
   1743   S.replace(Idx, ToReplace.size(), ReplaceWith);
   1744 
   1745   return std::make_pair(Type::getVoid(), S);
   1746 }
   1747 
   1748 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){
   1749   std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
   1750   std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
   1751   return std::make_pair(Type::fromTypedefName(Ty), Value);
   1752 }
   1753 
   1754 std::pair<Type, std::string>
   1755 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) {
   1756   if (ArgName.size()) {
   1757     assert_with_loc(!Arg->isComplete(),
   1758                     "Arguments must either be DAGs or names, not both!");
   1759     assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),
   1760                     "Variable not defined!");
   1761     Variable &V = Intr.Variables[ArgName];
   1762     return std::make_pair(V.getType(), V.getName());
   1763   }
   1764 
   1765   assert(Arg && "Neither ArgName nor Arg?!");
   1766   DagInit *DI = dyn_cast<DagInit>(Arg);
   1767   assert_with_loc(DI, "Arguments must either be DAGs or names!");
   1768 
   1769   return emitDag(DI);
   1770 }
   1771 
   1772 std::string Intrinsic::generate() {
   1773   // Little endian intrinsics are simple and don't require any argument
   1774   // swapping.
   1775   OS << "#ifdef __LITTLE_ENDIAN__\n";
   1776 
   1777   generateImpl(false, "", "");
   1778 
   1779   OS << "#else\n";
   1780 
   1781   // Big endian intrinsics are more complex. The user intended these
   1782   // intrinsics to operate on a vector "as-if" loaded by (V)LDR,
   1783   // but we load as-if (V)LD1. So we should swap all arguments and
   1784   // swap the return value too.
   1785   //
   1786   // If we call sub-intrinsics, we should call a version that does
   1787   // not re-swap the arguments!
   1788   generateImpl(true, "", "__noswap_");
   1789 
   1790   // If we're needed early, create a non-swapping variant for
   1791   // big-endian.
   1792   if (NeededEarly) {
   1793     generateImpl(false, "__noswap_", "__noswap_");
   1794   }
   1795   OS << "#endif\n\n";
   1796 
   1797   return OS.str();
   1798 }
   1799 
   1800 void Intrinsic::generateImpl(bool ReverseArguments,
   1801                              StringRef NamePrefix, StringRef CallPrefix) {
   1802   CurrentRecord = R;
   1803 
   1804   // If we call a macro, our local variables may be corrupted due to
   1805   // lack of proper lexical scoping. So, add a globally unique postfix
   1806   // to every variable.
   1807   //
   1808   // indexBody() should have set up the Dependencies set by now.
   1809   for (auto *I : Dependencies)
   1810     if (I->UseMacro) {
   1811       VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
   1812       break;
   1813     }
   1814 
   1815   initVariables();
   1816 
   1817   emitPrototype(NamePrefix);
   1818 
   1819   if (IsUnavailable) {
   1820     OS << " __attribute__((unavailable));";
   1821   } else {
   1822     emitOpeningBrace();
   1823     emitShadowedArgs();
   1824     if (ReverseArguments)
   1825       emitArgumentReversal();
   1826     emitBody(CallPrefix);
   1827     if (ReverseArguments)
   1828       emitReturnReversal();
   1829     emitReturn();
   1830     emitClosingBrace();
   1831   }
   1832   OS << "\n";
   1833 
   1834   CurrentRecord = nullptr;
   1835 }
   1836 
   1837 void Intrinsic::indexBody() {
   1838   CurrentRecord = R;
   1839 
   1840   initVariables();
   1841   emitBody("");
   1842   OS.str("");
   1843 
   1844   CurrentRecord = nullptr;
   1845 }
   1846 
   1847 //===----------------------------------------------------------------------===//
   1848 // NeonEmitter implementation
   1849 //===----------------------------------------------------------------------===//
   1850 
   1851 Intrinsic *NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
   1852   // First, look up the name in the intrinsic map.
   1853   assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
   1854                   ("Intrinsic '" + Name + "' not found!").str());
   1855   std::vector<Intrinsic *> &V = IntrinsicMap[Name.str()];
   1856   std::vector<Intrinsic *> GoodVec;
   1857 
   1858   // Create a string to print if we end up failing.
   1859   std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
   1860   for (unsigned I = 0; I < Types.size(); ++I) {
   1861     if (I != 0)
   1862       ErrMsg += ", ";
   1863     ErrMsg += Types[I].str();
   1864   }
   1865   ErrMsg += ")'\n";
   1866   ErrMsg += "Available overloads:\n";
   1867 
   1868   // Now, look through each intrinsic implementation and see if the types are
   1869   // compatible.
   1870   for (auto *I : V) {
   1871     ErrMsg += "  - " + I->getReturnType().str() + " " + I->getMangledName();
   1872     ErrMsg += "(";
   1873     for (unsigned A = 0; A < I->getNumParams(); ++A) {
   1874       if (A != 0)
   1875         ErrMsg += ", ";
   1876       ErrMsg += I->getParamType(A).str();
   1877     }
   1878     ErrMsg += ")\n";
   1879 
   1880     if (I->getNumParams() != Types.size())
   1881       continue;
   1882 
   1883     bool Good = true;
   1884     for (unsigned Arg = 0; Arg < Types.size(); ++Arg) {
   1885       if (I->getParamType(Arg) != Types[Arg]) {
   1886         Good = false;
   1887         break;
   1888       }
   1889     }
   1890     if (Good)
   1891       GoodVec.push_back(I);
   1892   }
   1893 
   1894   assert_with_loc(GoodVec.size() > 0,
   1895                   "No compatible intrinsic found - " + ErrMsg);
   1896   assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);
   1897 
   1898   return GoodVec.front();
   1899 }
   1900 
   1901 void NeonEmitter::createIntrinsic(Record *R,
   1902                                   SmallVectorImpl<Intrinsic *> &Out) {
   1903   std::string Name = R->getValueAsString("Name");
   1904   std::string Proto = R->getValueAsString("Prototype");
   1905   std::string Types = R->getValueAsString("Types");
   1906   Record *OperationRec = R->getValueAsDef("Operation");
   1907   bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes");
   1908   bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
   1909   std::string Guard = R->getValueAsString("ArchGuard");
   1910   bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
   1911 
   1912   // Set the global current record. This allows assert_with_loc to produce
   1913   // decent location information even when highly nested.
   1914   CurrentRecord = R;
   1915 
   1916   ListInit *Body = OperationRec->getValueAsListInit("Ops");
   1917 
   1918   std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);
   1919 
   1920   ClassKind CK = ClassNone;
   1921   if (R->getSuperClasses().size() >= 2)
   1922     CK = ClassMap[R->getSuperClasses()[1]];
   1923 
   1924   std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
   1925   for (auto TS : TypeSpecs) {
   1926     if (CartesianProductOfTypes) {
   1927       Type DefaultT(TS, 'd');
   1928       for (auto SrcTS : TypeSpecs) {
   1929         Type DefaultSrcT(SrcTS, 'd');
   1930         if (TS == SrcTS ||
   1931             DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
   1932           continue;
   1933         NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
   1934       }
   1935     } else {
   1936       NewTypeSpecs.push_back(std::make_pair(TS, TS));
   1937     }
   1938   }
   1939 
   1940   std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end());
   1941   std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end());
   1942 
   1943   for (auto &I : NewTypeSpecs) {
   1944     Intrinsic *IT = new Intrinsic(R, Name, Proto, I.first, I.second, CK, Body,
   1945                                   *this, Guard, IsUnavailable, BigEndianSafe);
   1946 
   1947     IntrinsicMap[Name].push_back(IT);
   1948     Out.push_back(IT);
   1949   }
   1950 
   1951   CurrentRecord = nullptr;
   1952 }
   1953 
   1954 /// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
   1955 /// declaration of builtins, checking for unique builtin declarations.
   1956 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
   1957                                  SmallVectorImpl<Intrinsic *> &Defs) {
   1958   OS << "#ifdef GET_NEON_BUILTINS\n";
   1959 
   1960   // We only want to emit a builtin once, and we want to emit them in
   1961   // alphabetical order, so use a std::set.
   1962   std::set<std::string> Builtins;
   1963 
   1964   for (auto *Def : Defs) {
   1965     if (Def->hasBody())
   1966       continue;
   1967     // Functions with 'a' (the splat code) in the type prototype should not get
   1968     // their own builtin as they use the non-splat variant.
   1969     if (Def->hasSplat())
   1970       continue;
   1971 
   1972     std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \"";
   1973 
   1974     S += Def->getBuiltinTypeStr();
   1975     S += "\", \"n\")";
   1976 
   1977     Builtins.insert(S);
   1978   }
   1979 
   1980   for (auto &S : Builtins)
   1981     OS << S << "\n";
   1982   OS << "#endif\n\n";
   1983 }
   1984 
   1985 /// Generate the ARM and AArch64 overloaded type checking code for
   1986 /// SemaChecking.cpp, checking for unique builtin declarations.
   1987 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
   1988                                            SmallVectorImpl<Intrinsic *> &Defs) {
   1989   OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
   1990 
   1991   // We record each overload check line before emitting because subsequent Inst
   1992   // definitions may extend the number of permitted types (i.e. augment the
   1993   // Mask). Use std::map to avoid sorting the table by hash number.
   1994   struct OverloadInfo {
   1995     uint64_t Mask;
   1996     int PtrArgNum;
   1997     bool HasConstPtr;
   1998     OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {}
   1999   };
   2000   std::map<std::string, OverloadInfo> OverloadMap;
   2001 
   2002   for (auto *Def : Defs) {
   2003     // If the def has a body (that is, it has Operation DAGs), it won't call
   2004     // __builtin_neon_* so we don't need to generate a definition for it.
   2005     if (Def->hasBody())
   2006       continue;
   2007     // Functions with 'a' (the splat code) in the type prototype should not get
   2008     // their own builtin as they use the non-splat variant.
   2009     if (Def->hasSplat())
   2010       continue;
   2011     // Functions which have a scalar argument cannot be overloaded, no need to
   2012     // check them if we are emitting the type checking code.
   2013     if (Def->protoHasScalar())
   2014       continue;
   2015 
   2016     uint64_t Mask = 0ULL;
   2017     Type Ty = Def->getReturnType();
   2018     if (Def->getProto()[0] == 'v' || Def->getProto()[0] == 'f' ||
   2019         Def->getProto()[0] == 'F')
   2020       Ty = Def->getParamType(0);
   2021     if (Ty.isPointer())
   2022       Ty = Def->getParamType(1);
   2023 
   2024     Mask |= 1ULL << Ty.getNeonEnum();
   2025 
   2026     // Check if the function has a pointer or const pointer argument.
   2027     std::string Proto = Def->getProto();
   2028     int PtrArgNum = -1;
   2029     bool HasConstPtr = false;
   2030     for (unsigned I = 0; I < Def->getNumParams(); ++I) {
   2031       char ArgType = Proto[I + 1];
   2032       if (ArgType == 'c') {
   2033         HasConstPtr = true;
   2034         PtrArgNum = I;
   2035         break;
   2036       }
   2037       if (ArgType == 'p') {
   2038         PtrArgNum = I;
   2039         break;
   2040       }
   2041     }
   2042     // For sret builtins, adjust the pointer argument index.
   2043     if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
   2044       PtrArgNum += 1;
   2045 
   2046     std::string Name = Def->getName();
   2047     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
   2048     // and vst1_lane intrinsics.  Using a pointer to the vector element
   2049     // type with one of those operations causes codegen to select an aligned
   2050     // load/store instruction.  If you want an unaligned operation,
   2051     // the pointer argument needs to have less alignment than element type,
   2052     // so just accept any pointer type.
   2053     if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") {
   2054       PtrArgNum = -1;
   2055       HasConstPtr = false;
   2056     }
   2057 
   2058     if (Mask) {
   2059       std::string Name = Def->getMangledName();
   2060       OverloadMap.insert(std::make_pair(Name, OverloadInfo()));
   2061       OverloadInfo &OI = OverloadMap[Name];
   2062       OI.Mask |= Mask;
   2063       OI.PtrArgNum |= PtrArgNum;
   2064       OI.HasConstPtr = HasConstPtr;
   2065     }
   2066   }
   2067 
   2068   for (auto &I : OverloadMap) {
   2069     OverloadInfo &OI = I.second;
   2070 
   2071     OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
   2072     OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL";
   2073     if (OI.PtrArgNum >= 0)
   2074       OS << "; PtrArgNum = " << OI.PtrArgNum;
   2075     if (OI.HasConstPtr)
   2076       OS << "; HasConstPtr = true";
   2077     OS << "; break;\n";
   2078   }
   2079   OS << "#endif\n\n";
   2080 }
   2081 
   2082 void
   2083 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
   2084                                         SmallVectorImpl<Intrinsic *> &Defs) {
   2085   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
   2086 
   2087   std::set<std::string> Emitted;
   2088 
   2089   for (auto *Def : Defs) {
   2090     if (Def->hasBody())
   2091       continue;
   2092     // Functions with 'a' (the splat code) in the type prototype should not get
   2093     // their own builtin as they use the non-splat variant.
   2094     if (Def->hasSplat())
   2095       continue;
   2096     // Functions which do not have an immediate do not ned to have range
   2097     // checking
   2098     // code emitted.
   2099     if (!Def->hasImmediate())
   2100       continue;
   2101     if (Emitted.find(Def->getMangledName()) != Emitted.end())
   2102       continue;
   2103 
   2104     std::string LowerBound, UpperBound;
   2105 
   2106     Record *R = Def->getRecord();
   2107     if (R->getValueAsBit("isVCVT_N")) {
   2108       // VCVT between floating- and fixed-point values takes an immediate
   2109       // in the range [1, 32) for f32 or [1, 64) for f64.
   2110       LowerBound = "1";
   2111       if (Def->getBaseType().getElementSizeInBits() == 32)
   2112         UpperBound = "31";
   2113       else
   2114         UpperBound = "63";
   2115     } else if (R->getValueAsBit("isScalarShift")) {
   2116       // Right shifts have an 'r' in the name, left shifts do not. Convert
   2117       // instructions have the same bounds and right shifts.
   2118       if (Def->getName().find('r') != std::string::npos ||
   2119           Def->getName().find("cvt") != std::string::npos)
   2120         LowerBound = "1";
   2121 
   2122       UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
   2123     } else if (R->getValueAsBit("isShift")) {
   2124       // Builtins which are overloaded by type will need to have thier upper
   2125       // bound computed at Sema time based on the type constant.
   2126 
   2127       // Right shifts have an 'r' in the name, left shifts do not.
   2128       if (Def->getName().find('r') != std::string::npos)
   2129         LowerBound = "1";
   2130       UpperBound = "RFT(TV, true)";
   2131     } else if (Def->getClassKind(true) == ClassB) {
   2132       // ClassB intrinsics have a type (and hence lane number) that is only
   2133       // known at runtime.
   2134       if (R->getValueAsBit("isLaneQ"))
   2135         UpperBound = "RFT(TV, false, true)";
   2136       else
   2137         UpperBound = "RFT(TV, false, false)";
   2138     } else {
   2139       // The immediate generally refers to a lane in the preceding argument.
   2140       assert(Def->getImmediateIdx() > 0);
   2141       Type T = Def->getParamType(Def->getImmediateIdx() - 1);
   2142       UpperBound = utostr(T.getNumElements() - 1);
   2143     }
   2144 
   2145     // Calculate the index of the immediate that should be range checked.
   2146     unsigned Idx = Def->getNumParams();
   2147     if (Def->hasImmediate())
   2148       Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
   2149 
   2150     OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
   2151        << "i = " << Idx << ";";
   2152     if (LowerBound.size())
   2153       OS << " l = " << LowerBound << ";";
   2154     if (UpperBound.size())
   2155       OS << " u = " << UpperBound << ";";
   2156     OS << " break;\n";
   2157 
   2158     Emitted.insert(Def->getMangledName());
   2159   }
   2160 
   2161   OS << "#endif\n\n";
   2162 }
   2163 
   2164 /// runHeader - Emit a file with sections defining:
   2165 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
   2166 /// 2. the SemaChecking code for the type overload checking.
   2167 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
   2168 void NeonEmitter::runHeader(raw_ostream &OS) {
   2169   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
   2170 
   2171   SmallVector<Intrinsic *, 128> Defs;
   2172   for (auto *R : RV)
   2173     createIntrinsic(R, Defs);
   2174 
   2175   // Generate shared BuiltinsXXX.def
   2176   genBuiltinsDef(OS, Defs);
   2177 
   2178   // Generate ARM overloaded type checking code for SemaChecking.cpp
   2179   genOverloadTypeCheckCode(OS, Defs);
   2180 
   2181   // Generate ARM range checking code for shift/lane immediates.
   2182   genIntrinsicRangeCheckCode(OS, Defs);
   2183 }
   2184 
   2185 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
   2186 /// is comprised of type definitions and function declarations.
   2187 void NeonEmitter::run(raw_ostream &OS) {
   2188   OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
   2189         "------------------------------"
   2190         "---===\n"
   2191         " *\n"
   2192         " * Permission is hereby granted, free of charge, to any person "
   2193         "obtaining "
   2194         "a copy\n"
   2195         " * of this software and associated documentation files (the "
   2196         "\"Software\"),"
   2197         " to deal\n"
   2198         " * in the Software without restriction, including without limitation "
   2199         "the "
   2200         "rights\n"
   2201         " * to use, copy, modify, merge, publish, distribute, sublicense, "
   2202         "and/or sell\n"
   2203         " * copies of the Software, and to permit persons to whom the Software "
   2204         "is\n"
   2205         " * furnished to do so, subject to the following conditions:\n"
   2206         " *\n"
   2207         " * The above copyright notice and this permission notice shall be "
   2208         "included in\n"
   2209         " * all copies or substantial portions of the Software.\n"
   2210         " *\n"
   2211         " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
   2212         "EXPRESS OR\n"
   2213         " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
   2214         "MERCHANTABILITY,\n"
   2215         " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
   2216         "SHALL THE\n"
   2217         " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
   2218         "OTHER\n"
   2219         " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
   2220         "ARISING FROM,\n"
   2221         " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
   2222         "DEALINGS IN\n"
   2223         " * THE SOFTWARE.\n"
   2224         " *\n"
   2225         " *===-----------------------------------------------------------------"
   2226         "---"
   2227         "---===\n"
   2228         " */\n\n";
   2229 
   2230   OS << "#ifndef __ARM_NEON_H\n";
   2231   OS << "#define __ARM_NEON_H\n\n";
   2232 
   2233   OS << "#if !defined(__ARM_NEON)\n";
   2234   OS << "#error \"NEON support not enabled\"\n";
   2235   OS << "#endif\n\n";
   2236 
   2237   OS << "#include <stdint.h>\n\n";
   2238 
   2239   // Emit NEON-specific scalar typedefs.
   2240   OS << "typedef float float32_t;\n";
   2241   OS << "typedef __fp16 float16_t;\n";
   2242 
   2243   OS << "#ifdef __aarch64__\n";
   2244   OS << "typedef double float64_t;\n";
   2245   OS << "#endif\n\n";
   2246 
   2247   // For now, signedness of polynomial types depends on target
   2248   OS << "#ifdef __aarch64__\n";
   2249   OS << "typedef uint8_t poly8_t;\n";
   2250   OS << "typedef uint16_t poly16_t;\n";
   2251   OS << "typedef uint64_t poly64_t;\n";
   2252   OS << "typedef __uint128_t poly128_t;\n";
   2253   OS << "#else\n";
   2254   OS << "typedef int8_t poly8_t;\n";
   2255   OS << "typedef int16_t poly16_t;\n";
   2256   OS << "#endif\n";
   2257 
   2258   // Emit Neon vector typedefs.
   2259   std::string TypedefTypes(
   2260       "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
   2261   std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
   2262 
   2263   // Emit vector typedefs.
   2264   bool InIfdef = false;
   2265   for (auto &TS : TDTypeVec) {
   2266     bool IsA64 = false;
   2267     Type T(TS, 'd');
   2268     if (T.isDouble() || (T.isPoly() && T.isLong()))
   2269       IsA64 = true;
   2270 
   2271     if (InIfdef && !IsA64) {
   2272       OS << "#endif\n";
   2273       InIfdef = false;
   2274     }
   2275     if (!InIfdef && IsA64) {
   2276       OS << "#ifdef __aarch64__\n";
   2277       InIfdef = true;
   2278     }
   2279 
   2280     if (T.isPoly())
   2281       OS << "typedef __attribute__((neon_polyvector_type(";
   2282     else
   2283       OS << "typedef __attribute__((neon_vector_type(";
   2284 
   2285     Type T2 = T;
   2286     T2.makeScalar();
   2287     OS << utostr(T.getNumElements()) << "))) ";
   2288     OS << T2.str();
   2289     OS << " " << T.str() << ";\n";
   2290   }
   2291   if (InIfdef)
   2292     OS << "#endif\n";
   2293   OS << "\n";
   2294 
   2295   // Emit struct typedefs.
   2296   InIfdef = false;
   2297   for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
   2298     for (auto &TS : TDTypeVec) {
   2299       bool IsA64 = false;
   2300       Type T(TS, 'd');
   2301       if (T.isDouble() || (T.isPoly() && T.isLong()))
   2302         IsA64 = true;
   2303 
   2304       if (InIfdef && !IsA64) {
   2305         OS << "#endif\n";
   2306         InIfdef = false;
   2307       }
   2308       if (!InIfdef && IsA64) {
   2309         OS << "#ifdef __aarch64__\n";
   2310         InIfdef = true;
   2311       }
   2312 
   2313       char M = '2' + (NumMembers - 2);
   2314       Type VT(TS, M);
   2315       OS << "typedef struct " << VT.str() << " {\n";
   2316       OS << "  " << T.str() << " val";
   2317       OS << "[" << utostr(NumMembers) << "]";
   2318       OS << ";\n} ";
   2319       OS << VT.str() << ";\n";
   2320       OS << "\n";
   2321     }
   2322   }
   2323   if (InIfdef)
   2324     OS << "#endif\n";
   2325   OS << "\n";
   2326 
   2327   OS << "#define __ai static inline __attribute__((__always_inline__, "
   2328         "__nodebug__))\n\n";
   2329 
   2330   SmallVector<Intrinsic *, 128> Defs;
   2331   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
   2332   for (auto *R : RV)
   2333     createIntrinsic(R, Defs);
   2334 
   2335   for (auto *I : Defs)
   2336     I->indexBody();
   2337 
   2338   std::stable_sort(
   2339       Defs.begin(), Defs.end(),
   2340       [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; });
   2341 
   2342   // Only emit a def when its requirements have been met.
   2343   // FIXME: This loop could be made faster, but it's fast enough for now.
   2344   bool MadeProgress = true;
   2345   std::string InGuard = "";
   2346   while (!Defs.empty() && MadeProgress) {
   2347     MadeProgress = false;
   2348 
   2349     for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
   2350          I != Defs.end(); /*No step*/) {
   2351       bool DependenciesSatisfied = true;
   2352       for (auto *II : (*I)->getDependencies()) {
   2353         if (std::find(Defs.begin(), Defs.end(), II) != Defs.end())
   2354           DependenciesSatisfied = false;
   2355       }
   2356       if (!DependenciesSatisfied) {
   2357         // Try the next one.
   2358         ++I;
   2359         continue;
   2360       }
   2361 
   2362       // Emit #endif/#if pair if needed.
   2363       if ((*I)->getGuard() != InGuard) {
   2364         if (!InGuard.empty())
   2365           OS << "#endif\n";
   2366         InGuard = (*I)->getGuard();
   2367         if (!InGuard.empty())
   2368           OS << "#if " << InGuard << "\n";
   2369       }
   2370 
   2371       // Actually generate the intrinsic code.
   2372       OS << (*I)->generate();
   2373 
   2374       MadeProgress = true;
   2375       I = Defs.erase(I);
   2376     }
   2377   }
   2378   assert(Defs.empty() && "Some requirements were not satisfied!");
   2379   if (!InGuard.empty())
   2380     OS << "#endif\n";
   2381 
   2382   OS << "\n";
   2383   OS << "#undef __ai\n\n";
   2384   OS << "#endif /* __ARM_NEON_H */\n";
   2385 }
   2386 
   2387 namespace clang {
   2388 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
   2389   NeonEmitter(Records).run(OS);
   2390 }
   2391 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
   2392   NeonEmitter(Records).runHeader(OS);
   2393 }
   2394 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
   2395   llvm_unreachable("Neon test generation no longer implemented!");
   2396 }
   2397 } // End namespace clang
   2398