1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This tablegen backend is responsible for emitting arm_neon.h, which includes 11 // a declaration and definition of each function specified by the ARM NEON 12 // compiler interface. See ARM document DUI0348B. 13 // 14 // Each NEON instruction is implemented in terms of 1 or more functions which 15 // are suffixed with the element type of the input vectors. Functions may be 16 // implemented in terms of generic vector operations such as +, *, -, etc. or 17 // by calling a __builtin_-prefixed function which will be handled by clang's 18 // CodeGen library. 19 // 20 // Additional validation code can be generated by this file when runHeader() is 21 // called, rather than the normal run() entry point. 22 // 23 // See also the documentation in include/clang/Basic/arm_neon.td. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "llvm/ADT/DenseMap.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/ADT/SmallString.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/ADT/StringMap.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/TableGen/Error.h" 35 #include "llvm/TableGen/Record.h" 36 #include "llvm/TableGen/SetTheory.h" 37 #include "llvm/TableGen/TableGenBackend.h" 38 #include <algorithm> 39 #include <deque> 40 #include <map> 41 #include <sstream> 42 #include <string> 43 #include <utility> 44 #include <vector> 45 using namespace llvm; 46 47 namespace { 48 49 // While globals are generally bad, this one allows us to perform assertions 50 // liberally and somehow still trace them back to the def they indirectly 51 // came from. 52 static Record *CurrentRecord = nullptr; 53 static void assert_with_loc(bool Assertion, const std::string &Str) { 54 if (!Assertion) { 55 if (CurrentRecord) 56 PrintFatalError(CurrentRecord->getLoc(), Str); 57 else 58 PrintFatalError(Str); 59 } 60 } 61 62 enum ClassKind { 63 ClassNone, 64 ClassI, // generic integer instruction, e.g., "i8" suffix 65 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 66 ClassW, // width-specific instruction, e.g., "8" suffix 67 ClassB, // bitcast arguments with enum argument to specify type 68 ClassL, // Logical instructions which are op instructions 69 // but we need to not emit any suffix for in our 70 // tests. 71 ClassNoTest // Instructions which we do not test since they are 72 // not TRUE instructions. 73 }; 74 75 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 76 /// builtins. These must be kept in sync with the flags in 77 /// include/clang/Basic/TargetBuiltins.h. 78 namespace NeonTypeFlags { 79 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; 80 81 enum EltType { 82 Int8, 83 Int16, 84 Int32, 85 Int64, 86 Poly8, 87 Poly16, 88 Poly64, 89 Poly128, 90 Float16, 91 Float32, 92 Float64 93 }; 94 } 95 96 class Intrinsic; 97 class NeonEmitter; 98 class Type; 99 class Variable; 100 101 //===----------------------------------------------------------------------===// 102 // TypeSpec 103 //===----------------------------------------------------------------------===// 104 105 /// A TypeSpec is just a simple wrapper around a string, but gets its own type 106 /// for strong typing purposes. 107 /// 108 /// A TypeSpec can be used to create a type. 109 class TypeSpec : public std::string { 110 public: 111 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { 112 std::vector<TypeSpec> Ret; 113 TypeSpec Acc; 114 for (char I : Str.str()) { 115 if (islower(I)) { 116 Acc.push_back(I); 117 Ret.push_back(TypeSpec(Acc)); 118 Acc.clear(); 119 } else { 120 Acc.push_back(I); 121 } 122 } 123 return Ret; 124 } 125 }; 126 127 //===----------------------------------------------------------------------===// 128 // Type 129 //===----------------------------------------------------------------------===// 130 131 /// A Type. Not much more to say here. 132 class Type { 133 private: 134 TypeSpec TS; 135 136 bool Float, Signed, Immediate, Void, Poly, Constant, Pointer; 137 // ScalarForMangling and NoManglingQ are really not suited to live here as 138 // they are not related to the type. But they live in the TypeSpec (not the 139 // prototype), so this is really the only place to store them. 140 bool ScalarForMangling, NoManglingQ; 141 unsigned Bitwidth, ElementBitwidth, NumVectors; 142 143 public: 144 Type() 145 : Float(false), Signed(false), Immediate(false), Void(true), Poly(false), 146 Constant(false), Pointer(false), ScalarForMangling(false), 147 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} 148 149 Type(TypeSpec TS, char CharMod) 150 : TS(std::move(TS)), Float(false), Signed(false), Immediate(false), 151 Void(false), Poly(false), Constant(false), Pointer(false), 152 ScalarForMangling(false), NoManglingQ(false), Bitwidth(0), 153 ElementBitwidth(0), NumVectors(0) { 154 applyModifier(CharMod); 155 } 156 157 /// Returns a type representing "void". 158 static Type getVoid() { return Type(); } 159 160 bool operator==(const Type &Other) const { return str() == Other.str(); } 161 bool operator!=(const Type &Other) const { return !operator==(Other); } 162 163 // 164 // Query functions 165 // 166 bool isScalarForMangling() const { return ScalarForMangling; } 167 bool noManglingQ() const { return NoManglingQ; } 168 169 bool isPointer() const { return Pointer; } 170 bool isFloating() const { return Float; } 171 bool isInteger() const { return !Float && !Poly; } 172 bool isSigned() const { return Signed; } 173 bool isImmediate() const { return Immediate; } 174 bool isScalar() const { return NumVectors == 0; } 175 bool isVector() const { return NumVectors > 0; } 176 bool isFloat() const { return Float && ElementBitwidth == 32; } 177 bool isDouble() const { return Float && ElementBitwidth == 64; } 178 bool isHalf() const { return Float && ElementBitwidth == 16; } 179 bool isPoly() const { return Poly; } 180 bool isChar() const { return ElementBitwidth == 8; } 181 bool isShort() const { return !Float && ElementBitwidth == 16; } 182 bool isInt() const { return !Float && ElementBitwidth == 32; } 183 bool isLong() const { return !Float && ElementBitwidth == 64; } 184 bool isVoid() const { return Void; } 185 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } 186 unsigned getSizeInBits() const { return Bitwidth; } 187 unsigned getElementSizeInBits() const { return ElementBitwidth; } 188 unsigned getNumVectors() const { return NumVectors; } 189 190 // 191 // Mutator functions 192 // 193 void makeUnsigned() { Signed = false; } 194 void makeSigned() { Signed = true; } 195 void makeInteger(unsigned ElemWidth, bool Sign) { 196 Float = false; 197 Poly = false; 198 Signed = Sign; 199 Immediate = false; 200 ElementBitwidth = ElemWidth; 201 } 202 void makeImmediate(unsigned ElemWidth) { 203 Float = false; 204 Poly = false; 205 Signed = true; 206 Immediate = true; 207 ElementBitwidth = ElemWidth; 208 } 209 void makeScalar() { 210 Bitwidth = ElementBitwidth; 211 NumVectors = 0; 212 } 213 void makeOneVector() { 214 assert(isVector()); 215 NumVectors = 1; 216 } 217 void doubleLanes() { 218 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); 219 Bitwidth = 128; 220 } 221 void halveLanes() { 222 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); 223 Bitwidth = 64; 224 } 225 226 /// Return the C string representation of a type, which is the typename 227 /// defined in stdint.h or arm_neon.h. 228 std::string str() const; 229 230 /// Return the string representation of a type, which is an encoded 231 /// string for passing to the BUILTIN() macro in Builtins.def. 232 std::string builtin_str() const; 233 234 /// Return the value in NeonTypeFlags for this type. 235 unsigned getNeonEnum() const; 236 237 /// Parse a type from a stdint.h or arm_neon.h typedef name, 238 /// for example uint32x2_t or int64_t. 239 static Type fromTypedefName(StringRef Name); 240 241 private: 242 /// Creates the type based on the typespec string in TS. 243 /// Sets "Quad" to true if the "Q" or "H" modifiers were 244 /// seen. This is needed by applyModifier as some modifiers 245 /// only take effect if the type size was changed by "Q" or "H". 246 void applyTypespec(bool &Quad); 247 /// Applies a prototype modifier to the type. 248 void applyModifier(char Mod); 249 }; 250 251 //===----------------------------------------------------------------------===// 252 // Variable 253 //===----------------------------------------------------------------------===// 254 255 /// A variable is a simple class that just has a type and a name. 256 class Variable { 257 Type T; 258 std::string N; 259 260 public: 261 Variable() : T(Type::getVoid()), N("") {} 262 Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {} 263 264 Type getType() const { return T; } 265 std::string getName() const { return "__" + N; } 266 }; 267 268 //===----------------------------------------------------------------------===// 269 // Intrinsic 270 //===----------------------------------------------------------------------===// 271 272 /// The main grunt class. This represents an instantiation of an intrinsic with 273 /// a particular typespec and prototype. 274 class Intrinsic { 275 friend class DagEmitter; 276 277 /// The Record this intrinsic was created from. 278 Record *R; 279 /// The unmangled name and prototype. 280 std::string Name, Proto; 281 /// The input and output typespecs. InTS == OutTS except when 282 /// CartesianProductOfTypes is 1 - this is the case for vreinterpret. 283 TypeSpec OutTS, InTS; 284 /// The base class kind. Most intrinsics use ClassS, which has full type 285 /// info for integers (s32/u32). Some use ClassI, which doesn't care about 286 /// signedness (i32), while some (ClassB) have no type at all, only a width 287 /// (32). 288 ClassKind CK; 289 /// The list of DAGs for the body. May be empty, in which case we should 290 /// emit a builtin call. 291 ListInit *Body; 292 /// The architectural #ifdef guard. 293 std::string Guard; 294 /// Set if the Unvailable bit is 1. This means we don't generate a body, 295 /// just an "unavailable" attribute on a declaration. 296 bool IsUnavailable; 297 /// Is this intrinsic safe for big-endian? or does it need its arguments 298 /// reversing? 299 bool BigEndianSafe; 300 301 /// The types of return value [0] and parameters [1..]. 302 std::vector<Type> Types; 303 /// The local variables defined. 304 std::map<std::string, Variable> Variables; 305 /// NeededEarly - set if any other intrinsic depends on this intrinsic. 306 bool NeededEarly; 307 /// UseMacro - set if we should implement using a macro or unset for a 308 /// function. 309 bool UseMacro; 310 /// The set of intrinsics that this intrinsic uses/requires. 311 std::set<Intrinsic *> Dependencies; 312 /// The "base type", which is Type('d', OutTS). InBaseType is only 313 /// different if CartesianProductOfTypes = 1 (for vreinterpret). 314 Type BaseType, InBaseType; 315 /// The return variable. 316 Variable RetVar; 317 /// A postfix to apply to every variable. Defaults to "". 318 std::string VariablePostfix; 319 320 NeonEmitter &Emitter; 321 std::stringstream OS; 322 323 public: 324 Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, 325 TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, 326 StringRef Guard, bool IsUnavailable, bool BigEndianSafe) 327 : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS), 328 CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable), 329 BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false), 330 BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) { 331 // If this builtin takes an immediate argument, we need to #define it rather 332 // than use a standard declaration, so that SemaChecking can range check 333 // the immediate passed by the user. 334 if (Proto.find('i') != std::string::npos) 335 UseMacro = true; 336 337 // Pointer arguments need to use macros to avoid hiding aligned attributes 338 // from the pointer type. 339 if (Proto.find('p') != std::string::npos || 340 Proto.find('c') != std::string::npos) 341 UseMacro = true; 342 343 // It is not permitted to pass or return an __fp16 by value, so intrinsics 344 // taking a scalar float16_t must be implemented as macros. 345 if (OutTS.find('h') != std::string::npos && 346 Proto.find('s') != std::string::npos) 347 UseMacro = true; 348 349 // Modify the TypeSpec per-argument to get a concrete Type, and create 350 // known variables for each. 351 // Types[0] is the return value. 352 Types.emplace_back(OutTS, Proto[0]); 353 for (unsigned I = 1; I < Proto.size(); ++I) 354 Types.emplace_back(InTS, Proto[I]); 355 } 356 357 /// Get the Record that this intrinsic is based off. 358 Record *getRecord() const { return R; } 359 /// Get the set of Intrinsics that this intrinsic calls. 360 /// this is the set of immediate dependencies, NOT the 361 /// transitive closure. 362 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } 363 /// Get the architectural guard string (#ifdef). 364 std::string getGuard() const { return Guard; } 365 /// Get the non-mangled name. 366 std::string getName() const { return Name; } 367 368 /// Return true if the intrinsic takes an immediate operand. 369 bool hasImmediate() const { 370 return Proto.find('i') != std::string::npos; 371 } 372 /// Return the parameter index of the immediate operand. 373 unsigned getImmediateIdx() const { 374 assert(hasImmediate()); 375 unsigned Idx = Proto.find('i'); 376 assert(Idx > 0 && "Can't return an immediate!"); 377 return Idx - 1; 378 } 379 380 /// Return true if the intrinsic takes an splat operand. 381 bool hasSplat() const { return Proto.find('a') != std::string::npos; } 382 /// Return the parameter index of the splat operand. 383 unsigned getSplatIdx() const { 384 assert(hasSplat()); 385 unsigned Idx = Proto.find('a'); 386 assert(Idx > 0 && "Can't return a splat!"); 387 return Idx - 1; 388 } 389 390 unsigned getNumParams() const { return Proto.size() - 1; } 391 Type getReturnType() const { return Types[0]; } 392 Type getParamType(unsigned I) const { return Types[I + 1]; } 393 Type getBaseType() const { return BaseType; } 394 /// Return the raw prototype string. 395 std::string getProto() const { return Proto; } 396 397 /// Return true if the prototype has a scalar argument. 398 /// This does not return true for the "splat" code ('a'). 399 bool protoHasScalar() const; 400 401 /// Return the index that parameter PIndex will sit at 402 /// in a generated function call. This is often just PIndex, 403 /// but may not be as things such as multiple-vector operands 404 /// and sret parameters need to be taken into accont. 405 unsigned getGeneratedParamIdx(unsigned PIndex) { 406 unsigned Idx = 0; 407 if (getReturnType().getNumVectors() > 1) 408 // Multiple vectors are passed as sret. 409 ++Idx; 410 411 for (unsigned I = 0; I < PIndex; ++I) 412 Idx += std::max(1U, getParamType(I).getNumVectors()); 413 414 return Idx; 415 } 416 417 bool hasBody() const { return Body && Body->getValues().size() > 0; } 418 419 void setNeededEarly() { NeededEarly = true; } 420 421 bool operator<(const Intrinsic &Other) const { 422 // Sort lexicographically on a two-tuple (Guard, Name) 423 if (Guard != Other.Guard) 424 return Guard < Other.Guard; 425 return Name < Other.Name; 426 } 427 428 ClassKind getClassKind(bool UseClassBIfScalar = false) { 429 if (UseClassBIfScalar && !protoHasScalar()) 430 return ClassB; 431 return CK; 432 } 433 434 /// Return the name, mangled with type information. 435 /// If ForceClassS is true, use ClassS (u32/s32) instead 436 /// of the intrinsic's own type class. 437 std::string getMangledName(bool ForceClassS = false) const; 438 /// Return the type code for a builtin function call. 439 std::string getInstTypeCode(Type T, ClassKind CK) const; 440 /// Return the type string for a BUILTIN() macro in Builtins.def. 441 std::string getBuiltinTypeStr(); 442 443 /// Generate the intrinsic, returning code. 444 std::string generate(); 445 /// Perform type checking and populate the dependency graph, but 446 /// don't generate code yet. 447 void indexBody(); 448 449 private: 450 std::string mangleName(std::string Name, ClassKind CK) const; 451 452 void initVariables(); 453 std::string replaceParamsIn(std::string S); 454 455 void emitBodyAsBuiltinCall(); 456 457 void generateImpl(bool ReverseArguments, 458 StringRef NamePrefix, StringRef CallPrefix); 459 void emitReturn(); 460 void emitBody(StringRef CallPrefix); 461 void emitShadowedArgs(); 462 void emitArgumentReversal(); 463 void emitReturnReversal(); 464 void emitReverseVariable(Variable &Dest, Variable &Src); 465 void emitNewLine(); 466 void emitClosingBrace(); 467 void emitOpeningBrace(); 468 void emitPrototype(StringRef NamePrefix); 469 470 class DagEmitter { 471 Intrinsic &Intr; 472 StringRef CallPrefix; 473 474 public: 475 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : 476 Intr(Intr), CallPrefix(CallPrefix) { 477 } 478 std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName); 479 std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI); 480 std::pair<Type, std::string> emitDagSplat(DagInit *DI); 481 std::pair<Type, std::string> emitDagDup(DagInit *DI); 482 std::pair<Type, std::string> emitDagShuffle(DagInit *DI); 483 std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); 484 std::pair<Type, std::string> emitDagCall(DagInit *DI); 485 std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); 486 std::pair<Type, std::string> emitDagLiteral(DagInit *DI); 487 std::pair<Type, std::string> emitDagOp(DagInit *DI); 488 std::pair<Type, std::string> emitDag(DagInit *DI); 489 }; 490 491 }; 492 493 //===----------------------------------------------------------------------===// 494 // NeonEmitter 495 //===----------------------------------------------------------------------===// 496 497 class NeonEmitter { 498 RecordKeeper &Records; 499 DenseMap<Record *, ClassKind> ClassMap; 500 std::map<std::string, std::deque<Intrinsic>> IntrinsicMap; 501 unsigned UniqueNumber; 502 503 void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out); 504 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); 505 void genOverloadTypeCheckCode(raw_ostream &OS, 506 SmallVectorImpl<Intrinsic *> &Defs); 507 void genIntrinsicRangeCheckCode(raw_ostream &OS, 508 SmallVectorImpl<Intrinsic *> &Defs); 509 510 public: 511 /// Called by Intrinsic - this attempts to get an intrinsic that takes 512 /// the given types as arguments. 513 Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types); 514 515 /// Called by Intrinsic - returns a globally-unique number. 516 unsigned getUniqueNumber() { return UniqueNumber++; } 517 518 NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { 519 Record *SI = R.getClass("SInst"); 520 Record *II = R.getClass("IInst"); 521 Record *WI = R.getClass("WInst"); 522 Record *SOpI = R.getClass("SOpInst"); 523 Record *IOpI = R.getClass("IOpInst"); 524 Record *WOpI = R.getClass("WOpInst"); 525 Record *LOpI = R.getClass("LOpInst"); 526 Record *NoTestOpI = R.getClass("NoTestOpInst"); 527 528 ClassMap[SI] = ClassS; 529 ClassMap[II] = ClassI; 530 ClassMap[WI] = ClassW; 531 ClassMap[SOpI] = ClassS; 532 ClassMap[IOpI] = ClassI; 533 ClassMap[WOpI] = ClassW; 534 ClassMap[LOpI] = ClassL; 535 ClassMap[NoTestOpI] = ClassNoTest; 536 } 537 538 // run - Emit arm_neon.h.inc 539 void run(raw_ostream &o); 540 541 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 542 void runHeader(raw_ostream &o); 543 544 // runTests - Emit tests for all the Neon intrinsics. 545 void runTests(raw_ostream &o); 546 }; 547 548 } // end anonymous namespace 549 550 //===----------------------------------------------------------------------===// 551 // Type implementation 552 //===----------------------------------------------------------------------===// 553 554 std::string Type::str() const { 555 if (Void) 556 return "void"; 557 std::string S; 558 559 if (!Signed && isInteger()) 560 S += "u"; 561 562 if (Poly) 563 S += "poly"; 564 else if (Float) 565 S += "float"; 566 else 567 S += "int"; 568 569 S += utostr(ElementBitwidth); 570 if (isVector()) 571 S += "x" + utostr(getNumElements()); 572 if (NumVectors > 1) 573 S += "x" + utostr(NumVectors); 574 S += "_t"; 575 576 if (Constant) 577 S += " const"; 578 if (Pointer) 579 S += " *"; 580 581 return S; 582 } 583 584 std::string Type::builtin_str() const { 585 std::string S; 586 if (isVoid()) 587 return "v"; 588 589 if (Pointer) 590 // All pointers are void pointers. 591 S += "v"; 592 else if (isInteger()) 593 switch (ElementBitwidth) { 594 case 8: S += "c"; break; 595 case 16: S += "s"; break; 596 case 32: S += "i"; break; 597 case 64: S += "Wi"; break; 598 case 128: S += "LLLi"; break; 599 default: llvm_unreachable("Unhandled case!"); 600 } 601 else 602 switch (ElementBitwidth) { 603 case 16: S += "h"; break; 604 case 32: S += "f"; break; 605 case 64: S += "d"; break; 606 default: llvm_unreachable("Unhandled case!"); 607 } 608 609 if (isChar() && !Pointer) 610 // Make chars explicitly signed. 611 S = "S" + S; 612 else if (isInteger() && !Pointer && !Signed) 613 S = "U" + S; 614 615 // Constant indices are "int", but have the "constant expression" modifier. 616 if (isImmediate()) { 617 assert(isInteger() && isSigned()); 618 S = "I" + S; 619 } 620 621 if (isScalar()) { 622 if (Constant) S += "C"; 623 if (Pointer) S += "*"; 624 return S; 625 } 626 627 std::string Ret; 628 for (unsigned I = 0; I < NumVectors; ++I) 629 Ret += "V" + utostr(getNumElements()) + S; 630 631 return Ret; 632 } 633 634 unsigned Type::getNeonEnum() const { 635 unsigned Addend; 636 switch (ElementBitwidth) { 637 case 8: Addend = 0; break; 638 case 16: Addend = 1; break; 639 case 32: Addend = 2; break; 640 case 64: Addend = 3; break; 641 case 128: Addend = 4; break; 642 default: llvm_unreachable("Unhandled element bitwidth!"); 643 } 644 645 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; 646 if (Poly) { 647 // Adjustment needed because Poly32 doesn't exist. 648 if (Addend >= 2) 649 --Addend; 650 Base = (unsigned)NeonTypeFlags::Poly8 + Addend; 651 } 652 if (Float) { 653 assert(Addend != 0 && "Float8 doesn't exist!"); 654 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); 655 } 656 657 if (Bitwidth == 128) 658 Base |= (unsigned)NeonTypeFlags::QuadFlag; 659 if (isInteger() && !Signed) 660 Base |= (unsigned)NeonTypeFlags::UnsignedFlag; 661 662 return Base; 663 } 664 665 Type Type::fromTypedefName(StringRef Name) { 666 Type T; 667 T.Void = false; 668 T.Float = false; 669 T.Poly = false; 670 671 if (Name.front() == 'u') { 672 T.Signed = false; 673 Name = Name.drop_front(); 674 } else { 675 T.Signed = true; 676 } 677 678 if (Name.startswith("float")) { 679 T.Float = true; 680 Name = Name.drop_front(5); 681 } else if (Name.startswith("poly")) { 682 T.Poly = true; 683 Name = Name.drop_front(4); 684 } else { 685 assert(Name.startswith("int")); 686 Name = Name.drop_front(3); 687 } 688 689 unsigned I = 0; 690 for (I = 0; I < Name.size(); ++I) { 691 if (!isdigit(Name[I])) 692 break; 693 } 694 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); 695 Name = Name.drop_front(I); 696 697 T.Bitwidth = T.ElementBitwidth; 698 T.NumVectors = 1; 699 700 if (Name.front() == 'x') { 701 Name = Name.drop_front(); 702 unsigned I = 0; 703 for (I = 0; I < Name.size(); ++I) { 704 if (!isdigit(Name[I])) 705 break; 706 } 707 unsigned NumLanes; 708 Name.substr(0, I).getAsInteger(10, NumLanes); 709 Name = Name.drop_front(I); 710 T.Bitwidth = T.ElementBitwidth * NumLanes; 711 } else { 712 // Was scalar. 713 T.NumVectors = 0; 714 } 715 if (Name.front() == 'x') { 716 Name = Name.drop_front(); 717 unsigned I = 0; 718 for (I = 0; I < Name.size(); ++I) { 719 if (!isdigit(Name[I])) 720 break; 721 } 722 Name.substr(0, I).getAsInteger(10, T.NumVectors); 723 Name = Name.drop_front(I); 724 } 725 726 assert(Name.startswith("_t") && "Malformed typedef!"); 727 return T; 728 } 729 730 void Type::applyTypespec(bool &Quad) { 731 std::string S = TS; 732 ScalarForMangling = false; 733 Void = false; 734 Poly = Float = false; 735 ElementBitwidth = ~0U; 736 Signed = true; 737 NumVectors = 1; 738 739 for (char I : S) { 740 switch (I) { 741 case 'S': 742 ScalarForMangling = true; 743 break; 744 case 'H': 745 NoManglingQ = true; 746 Quad = true; 747 break; 748 case 'Q': 749 Quad = true; 750 break; 751 case 'P': 752 Poly = true; 753 break; 754 case 'U': 755 Signed = false; 756 break; 757 case 'c': 758 ElementBitwidth = 8; 759 break; 760 case 'h': 761 Float = true; 762 // Fall through 763 case 's': 764 ElementBitwidth = 16; 765 break; 766 case 'f': 767 Float = true; 768 // Fall through 769 case 'i': 770 ElementBitwidth = 32; 771 break; 772 case 'd': 773 Float = true; 774 // Fall through 775 case 'l': 776 ElementBitwidth = 64; 777 break; 778 case 'k': 779 ElementBitwidth = 128; 780 // Poly doesn't have a 128x1 type. 781 if (Poly) 782 NumVectors = 0; 783 break; 784 default: 785 llvm_unreachable("Unhandled type code!"); 786 } 787 } 788 assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); 789 790 Bitwidth = Quad ? 128 : 64; 791 } 792 793 void Type::applyModifier(char Mod) { 794 bool AppliedQuad = false; 795 applyTypespec(AppliedQuad); 796 797 switch (Mod) { 798 case 'v': 799 Void = true; 800 break; 801 case 't': 802 if (Poly) { 803 Poly = false; 804 Signed = false; 805 } 806 break; 807 case 'b': 808 Signed = false; 809 Float = false; 810 Poly = false; 811 NumVectors = 0; 812 Bitwidth = ElementBitwidth; 813 break; 814 case '$': 815 Signed = true; 816 Float = false; 817 Poly = false; 818 NumVectors = 0; 819 Bitwidth = ElementBitwidth; 820 break; 821 case 'u': 822 Signed = false; 823 Poly = false; 824 Float = false; 825 break; 826 case 'x': 827 Signed = true; 828 assert(!Poly && "'u' can't be used with poly types!"); 829 Float = false; 830 break; 831 case 'o': 832 Bitwidth = ElementBitwidth = 64; 833 NumVectors = 0; 834 Float = true; 835 break; 836 case 'y': 837 Bitwidth = ElementBitwidth = 32; 838 NumVectors = 0; 839 Float = true; 840 break; 841 case 'f': 842 Float = true; 843 ElementBitwidth = 32; 844 break; 845 case 'F': 846 Float = true; 847 ElementBitwidth = 64; 848 break; 849 case 'g': 850 if (AppliedQuad) 851 Bitwidth /= 2; 852 break; 853 case 'j': 854 if (!AppliedQuad) 855 Bitwidth *= 2; 856 break; 857 case 'w': 858 ElementBitwidth *= 2; 859 Bitwidth *= 2; 860 break; 861 case 'n': 862 ElementBitwidth *= 2; 863 break; 864 case 'i': 865 Float = false; 866 Poly = false; 867 ElementBitwidth = Bitwidth = 32; 868 NumVectors = 0; 869 Signed = true; 870 Immediate = true; 871 break; 872 case 'l': 873 Float = false; 874 Poly = false; 875 ElementBitwidth = Bitwidth = 64; 876 NumVectors = 0; 877 Signed = false; 878 Immediate = true; 879 break; 880 case 'z': 881 ElementBitwidth /= 2; 882 Bitwidth = ElementBitwidth; 883 NumVectors = 0; 884 break; 885 case 'r': 886 ElementBitwidth *= 2; 887 Bitwidth = ElementBitwidth; 888 NumVectors = 0; 889 break; 890 case 's': 891 case 'a': 892 Bitwidth = ElementBitwidth; 893 NumVectors = 0; 894 break; 895 case 'k': 896 Bitwidth *= 2; 897 break; 898 case 'c': 899 Constant = true; 900 // Fall through 901 case 'p': 902 Pointer = true; 903 Bitwidth = ElementBitwidth; 904 NumVectors = 0; 905 break; 906 case 'h': 907 ElementBitwidth /= 2; 908 break; 909 case 'q': 910 ElementBitwidth /= 2; 911 Bitwidth *= 2; 912 break; 913 case 'e': 914 ElementBitwidth /= 2; 915 Signed = false; 916 break; 917 case 'm': 918 ElementBitwidth /= 2; 919 Bitwidth /= 2; 920 break; 921 case 'd': 922 break; 923 case '2': 924 NumVectors = 2; 925 break; 926 case '3': 927 NumVectors = 3; 928 break; 929 case '4': 930 NumVectors = 4; 931 break; 932 case 'B': 933 NumVectors = 2; 934 if (!AppliedQuad) 935 Bitwidth *= 2; 936 break; 937 case 'C': 938 NumVectors = 3; 939 if (!AppliedQuad) 940 Bitwidth *= 2; 941 break; 942 case 'D': 943 NumVectors = 4; 944 if (!AppliedQuad) 945 Bitwidth *= 2; 946 break; 947 default: 948 llvm_unreachable("Unhandled character!"); 949 } 950 } 951 952 //===----------------------------------------------------------------------===// 953 // Intrinsic implementation 954 //===----------------------------------------------------------------------===// 955 956 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { 957 char typeCode = '\0'; 958 bool printNumber = true; 959 960 if (CK == ClassB) 961 return ""; 962 963 if (T.isPoly()) 964 typeCode = 'p'; 965 else if (T.isInteger()) 966 typeCode = T.isSigned() ? 's' : 'u'; 967 else 968 typeCode = 'f'; 969 970 if (CK == ClassI) { 971 switch (typeCode) { 972 default: 973 break; 974 case 's': 975 case 'u': 976 case 'p': 977 typeCode = 'i'; 978 break; 979 } 980 } 981 if (CK == ClassB) { 982 typeCode = '\0'; 983 } 984 985 std::string S; 986 if (typeCode != '\0') 987 S.push_back(typeCode); 988 if (printNumber) 989 S += utostr(T.getElementSizeInBits()); 990 991 return S; 992 } 993 994 static bool isFloatingPointProtoModifier(char Mod) { 995 return Mod == 'F' || Mod == 'f'; 996 } 997 998 std::string Intrinsic::getBuiltinTypeStr() { 999 ClassKind LocalCK = getClassKind(true); 1000 std::string S; 1001 1002 Type RetT = getReturnType(); 1003 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && 1004 !RetT.isFloating()) 1005 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1006 1007 // Since the return value must be one type, return a vector type of the 1008 // appropriate width which we will bitcast. An exception is made for 1009 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 1010 // fashion, storing them to a pointer arg. 1011 if (RetT.getNumVectors() > 1) { 1012 S += "vv*"; // void result with void* first argument 1013 } else { 1014 if (RetT.isPoly()) 1015 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1016 if (!RetT.isScalar() && !RetT.isSigned()) 1017 RetT.makeSigned(); 1018 1019 bool ForcedVectorFloatingType = isFloatingPointProtoModifier(Proto[0]); 1020 if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType) 1021 // Cast to vector of 8-bit elements. 1022 RetT.makeInteger(8, true); 1023 1024 S += RetT.builtin_str(); 1025 } 1026 1027 for (unsigned I = 0; I < getNumParams(); ++I) { 1028 Type T = getParamType(I); 1029 if (T.isPoly()) 1030 T.makeInteger(T.getElementSizeInBits(), false); 1031 1032 bool ForcedFloatingType = isFloatingPointProtoModifier(Proto[I + 1]); 1033 if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType) 1034 T.makeInteger(8, true); 1035 // Halves always get converted to 8-bit elements. 1036 if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) 1037 T.makeInteger(8, true); 1038 1039 if (LocalCK == ClassI) 1040 T.makeSigned(); 1041 1042 if (hasImmediate() && getImmediateIdx() == I) 1043 T.makeImmediate(32); 1044 1045 S += T.builtin_str(); 1046 } 1047 1048 // Extra constant integer to hold type class enum for this function, e.g. s8 1049 if (LocalCK == ClassB) 1050 S += "i"; 1051 1052 return S; 1053 } 1054 1055 std::string Intrinsic::getMangledName(bool ForceClassS) const { 1056 // Check if the prototype has a scalar operand with the type of the vector 1057 // elements. If not, bitcasting the args will take care of arg checking. 1058 // The actual signedness etc. will be taken care of with special enums. 1059 ClassKind LocalCK = CK; 1060 if (!protoHasScalar()) 1061 LocalCK = ClassB; 1062 1063 return mangleName(Name, ForceClassS ? ClassS : LocalCK); 1064 } 1065 1066 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const { 1067 std::string typeCode = getInstTypeCode(BaseType, LocalCK); 1068 std::string S = Name; 1069 1070 if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" || 1071 Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32") 1072 return Name; 1073 1074 if (typeCode.size() > 0) { 1075 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. 1076 if (Name.size() >= 3 && isdigit(Name.back()) && 1077 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') 1078 S.insert(S.length() - 3, "_" + typeCode); 1079 else 1080 S += "_" + typeCode; 1081 } 1082 1083 if (BaseType != InBaseType) { 1084 // A reinterpret - out the input base type at the end. 1085 S += "_" + getInstTypeCode(InBaseType, LocalCK); 1086 } 1087 1088 if (LocalCK == ClassB) 1089 S += "_v"; 1090 1091 // Insert a 'q' before the first '_' character so that it ends up before 1092 // _lane or _n on vector-scalar operations. 1093 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { 1094 size_t Pos = S.find('_'); 1095 S.insert(Pos, "q"); 1096 } 1097 1098 char Suffix = '\0'; 1099 if (BaseType.isScalarForMangling()) { 1100 switch (BaseType.getElementSizeInBits()) { 1101 case 8: Suffix = 'b'; break; 1102 case 16: Suffix = 'h'; break; 1103 case 32: Suffix = 's'; break; 1104 case 64: Suffix = 'd'; break; 1105 default: llvm_unreachable("Bad suffix!"); 1106 } 1107 } 1108 if (Suffix != '\0') { 1109 size_t Pos = S.find('_'); 1110 S.insert(Pos, &Suffix, 1); 1111 } 1112 1113 return S; 1114 } 1115 1116 std::string Intrinsic::replaceParamsIn(std::string S) { 1117 while (S.find('$') != std::string::npos) { 1118 size_t Pos = S.find('$'); 1119 size_t End = Pos + 1; 1120 while (isalpha(S[End])) 1121 ++End; 1122 1123 std::string VarName = S.substr(Pos + 1, End - Pos - 1); 1124 assert_with_loc(Variables.find(VarName) != Variables.end(), 1125 "Variable not defined!"); 1126 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); 1127 } 1128 1129 return S; 1130 } 1131 1132 void Intrinsic::initVariables() { 1133 Variables.clear(); 1134 1135 // Modify the TypeSpec per-argument to get a concrete Type, and create 1136 // known variables for each. 1137 for (unsigned I = 1; I < Proto.size(); ++I) { 1138 char NameC = '0' + (I - 1); 1139 std::string Name = "p"; 1140 Name.push_back(NameC); 1141 1142 Variables[Name] = Variable(Types[I], Name + VariablePostfix); 1143 } 1144 RetVar = Variable(Types[0], "ret" + VariablePostfix); 1145 } 1146 1147 void Intrinsic::emitPrototype(StringRef NamePrefix) { 1148 if (UseMacro) 1149 OS << "#define "; 1150 else 1151 OS << "__ai " << Types[0].str() << " "; 1152 1153 OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; 1154 1155 for (unsigned I = 0; I < getNumParams(); ++I) { 1156 if (I != 0) 1157 OS << ", "; 1158 1159 char NameC = '0' + I; 1160 std::string Name = "p"; 1161 Name.push_back(NameC); 1162 assert(Variables.find(Name) != Variables.end()); 1163 Variable &V = Variables[Name]; 1164 1165 if (!UseMacro) 1166 OS << V.getType().str() << " "; 1167 OS << V.getName(); 1168 } 1169 1170 OS << ")"; 1171 } 1172 1173 void Intrinsic::emitOpeningBrace() { 1174 if (UseMacro) 1175 OS << " __extension__ ({"; 1176 else 1177 OS << " {"; 1178 emitNewLine(); 1179 } 1180 1181 void Intrinsic::emitClosingBrace() { 1182 if (UseMacro) 1183 OS << "})"; 1184 else 1185 OS << "}"; 1186 } 1187 1188 void Intrinsic::emitNewLine() { 1189 if (UseMacro) 1190 OS << " \\\n"; 1191 else 1192 OS << "\n"; 1193 } 1194 1195 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { 1196 if (Dest.getType().getNumVectors() > 1) { 1197 emitNewLine(); 1198 1199 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { 1200 OS << " " << Dest.getName() << ".val[" << K << "] = " 1201 << "__builtin_shufflevector(" 1202 << Src.getName() << ".val[" << K << "], " 1203 << Src.getName() << ".val[" << K << "]"; 1204 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1205 OS << ", " << J; 1206 OS << ");"; 1207 emitNewLine(); 1208 } 1209 } else { 1210 OS << " " << Dest.getName() 1211 << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); 1212 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1213 OS << ", " << J; 1214 OS << ");"; 1215 emitNewLine(); 1216 } 1217 } 1218 1219 void Intrinsic::emitArgumentReversal() { 1220 if (BigEndianSafe) 1221 return; 1222 1223 // Reverse all vector arguments. 1224 for (unsigned I = 0; I < getNumParams(); ++I) { 1225 std::string Name = "p" + utostr(I); 1226 std::string NewName = "rev" + utostr(I); 1227 1228 Variable &V = Variables[Name]; 1229 Variable NewV(V.getType(), NewName + VariablePostfix); 1230 1231 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) 1232 continue; 1233 1234 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; 1235 emitReverseVariable(NewV, V); 1236 V = NewV; 1237 } 1238 } 1239 1240 void Intrinsic::emitReturnReversal() { 1241 if (BigEndianSafe) 1242 return; 1243 if (!getReturnType().isVector() || getReturnType().isVoid() || 1244 getReturnType().getNumElements() == 1) 1245 return; 1246 emitReverseVariable(RetVar, RetVar); 1247 } 1248 1249 1250 void Intrinsic::emitShadowedArgs() { 1251 // Macro arguments are not type-checked like inline function arguments, 1252 // so assign them to local temporaries to get the right type checking. 1253 if (!UseMacro) 1254 return; 1255 1256 for (unsigned I = 0; I < getNumParams(); ++I) { 1257 // Do not create a temporary for an immediate argument. 1258 // That would defeat the whole point of using a macro! 1259 if (hasImmediate() && Proto[I+1] == 'i') 1260 continue; 1261 // Do not create a temporary for pointer arguments. The input 1262 // pointer may have an alignment hint. 1263 if (getParamType(I).isPointer()) 1264 continue; 1265 1266 std::string Name = "p" + utostr(I); 1267 1268 assert(Variables.find(Name) != Variables.end()); 1269 Variable &V = Variables[Name]; 1270 1271 std::string NewName = "s" + utostr(I); 1272 Variable V2(V.getType(), NewName + VariablePostfix); 1273 1274 OS << " " << V2.getType().str() << " " << V2.getName() << " = " 1275 << V.getName() << ";"; 1276 emitNewLine(); 1277 1278 V = V2; 1279 } 1280 } 1281 1282 // We don't check 'a' in this function, because for builtin function the 1283 // argument matching to 'a' uses a vector type splatted from a scalar type. 1284 bool Intrinsic::protoHasScalar() const { 1285 return (Proto.find('s') != std::string::npos || 1286 Proto.find('z') != std::string::npos || 1287 Proto.find('r') != std::string::npos || 1288 Proto.find('b') != std::string::npos || 1289 Proto.find('$') != std::string::npos || 1290 Proto.find('y') != std::string::npos || 1291 Proto.find('o') != std::string::npos); 1292 } 1293 1294 void Intrinsic::emitBodyAsBuiltinCall() { 1295 std::string S; 1296 1297 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1298 // sret-like argument. 1299 bool SRet = getReturnType().getNumVectors() >= 2; 1300 1301 StringRef N = Name; 1302 if (hasSplat()) { 1303 // Call the non-splat builtin: chop off the "_n" suffix from the name. 1304 assert(N.endswith("_n")); 1305 N = N.drop_back(2); 1306 } 1307 1308 ClassKind LocalCK = CK; 1309 if (!protoHasScalar()) 1310 LocalCK = ClassB; 1311 1312 if (!getReturnType().isVoid() && !SRet) 1313 S += "(" + RetVar.getType().str() + ") "; 1314 1315 S += "__builtin_neon_" + mangleName(N, LocalCK) + "("; 1316 1317 if (SRet) 1318 S += "&" + RetVar.getName() + ", "; 1319 1320 for (unsigned I = 0; I < getNumParams(); ++I) { 1321 Variable &V = Variables["p" + utostr(I)]; 1322 Type T = V.getType(); 1323 1324 // Handle multiple-vector values specially, emitting each subvector as an 1325 // argument to the builtin. 1326 if (T.getNumVectors() > 1) { 1327 // Check if an explicit cast is needed. 1328 std::string Cast; 1329 if (T.isChar() || T.isPoly() || !T.isSigned()) { 1330 Type T2 = T; 1331 T2.makeOneVector(); 1332 T2.makeInteger(8, /*Signed=*/true); 1333 Cast = "(" + T2.str() + ")"; 1334 } 1335 1336 for (unsigned J = 0; J < T.getNumVectors(); ++J) 1337 S += Cast + V.getName() + ".val[" + utostr(J) + "], "; 1338 continue; 1339 } 1340 1341 std::string Arg; 1342 Type CastToType = T; 1343 if (hasSplat() && I == getSplatIdx()) { 1344 Arg = "(" + BaseType.str() + ") {"; 1345 for (unsigned J = 0; J < BaseType.getNumElements(); ++J) { 1346 if (J != 0) 1347 Arg += ", "; 1348 Arg += V.getName(); 1349 } 1350 Arg += "}"; 1351 1352 CastToType = BaseType; 1353 } else { 1354 Arg = V.getName(); 1355 } 1356 1357 // Check if an explicit cast is needed. 1358 if (CastToType.isVector()) { 1359 CastToType.makeInteger(8, true); 1360 Arg = "(" + CastToType.str() + ")" + Arg; 1361 } 1362 1363 S += Arg + ", "; 1364 } 1365 1366 // Extra constant integer to hold type class enum for this function, e.g. s8 1367 if (getClassKind(true) == ClassB) { 1368 Type ThisTy = getReturnType(); 1369 if (Proto[0] == 'v' || isFloatingPointProtoModifier(Proto[0])) 1370 ThisTy = getParamType(0); 1371 if (ThisTy.isPointer()) 1372 ThisTy = getParamType(1); 1373 1374 S += utostr(ThisTy.getNeonEnum()); 1375 } else { 1376 // Remove extraneous ", ". 1377 S.pop_back(); 1378 S.pop_back(); 1379 } 1380 S += ");"; 1381 1382 std::string RetExpr; 1383 if (!SRet && !RetVar.getType().isVoid()) 1384 RetExpr = RetVar.getName() + " = "; 1385 1386 OS << " " << RetExpr << S; 1387 emitNewLine(); 1388 } 1389 1390 void Intrinsic::emitBody(StringRef CallPrefix) { 1391 std::vector<std::string> Lines; 1392 1393 assert(RetVar.getType() == Types[0]); 1394 // Create a return variable, if we're not void. 1395 if (!RetVar.getType().isVoid()) { 1396 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; 1397 emitNewLine(); 1398 } 1399 1400 if (!Body || Body->getValues().size() == 0) { 1401 // Nothing specific to output - must output a builtin. 1402 emitBodyAsBuiltinCall(); 1403 return; 1404 } 1405 1406 // We have a list of "things to output". The last should be returned. 1407 for (auto *I : Body->getValues()) { 1408 if (StringInit *SI = dyn_cast<StringInit>(I)) { 1409 Lines.push_back(replaceParamsIn(SI->getAsString())); 1410 } else if (DagInit *DI = dyn_cast<DagInit>(I)) { 1411 DagEmitter DE(*this, CallPrefix); 1412 Lines.push_back(DE.emitDag(DI).second + ";"); 1413 } 1414 } 1415 1416 assert(!Lines.empty() && "Empty def?"); 1417 if (!RetVar.getType().isVoid()) 1418 Lines.back().insert(0, RetVar.getName() + " = "); 1419 1420 for (auto &L : Lines) { 1421 OS << " " << L; 1422 emitNewLine(); 1423 } 1424 } 1425 1426 void Intrinsic::emitReturn() { 1427 if (RetVar.getType().isVoid()) 1428 return; 1429 if (UseMacro) 1430 OS << " " << RetVar.getName() << ";"; 1431 else 1432 OS << " return " << RetVar.getName() << ";"; 1433 emitNewLine(); 1434 } 1435 1436 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { 1437 // At this point we should only be seeing a def. 1438 DefInit *DefI = cast<DefInit>(DI->getOperator()); 1439 std::string Op = DefI->getAsString(); 1440 1441 if (Op == "cast" || Op == "bitcast") 1442 return emitDagCast(DI, Op == "bitcast"); 1443 if (Op == "shuffle") 1444 return emitDagShuffle(DI); 1445 if (Op == "dup") 1446 return emitDagDup(DI); 1447 if (Op == "splat") 1448 return emitDagSplat(DI); 1449 if (Op == "save_temp") 1450 return emitDagSaveTemp(DI); 1451 if (Op == "op") 1452 return emitDagOp(DI); 1453 if (Op == "call") 1454 return emitDagCall(DI); 1455 if (Op == "name_replace") 1456 return emitDagNameReplace(DI); 1457 if (Op == "literal") 1458 return emitDagLiteral(DI); 1459 assert_with_loc(false, "Unknown operation!"); 1460 return std::make_pair(Type::getVoid(), ""); 1461 } 1462 1463 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { 1464 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1465 if (DI->getNumArgs() == 2) { 1466 // Unary op. 1467 std::pair<Type, std::string> R = 1468 emitDagArg(DI->getArg(1), DI->getArgName(1)); 1469 return std::make_pair(R.first, Op + R.second); 1470 } else { 1471 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); 1472 std::pair<Type, std::string> R1 = 1473 emitDagArg(DI->getArg(1), DI->getArgName(1)); 1474 std::pair<Type, std::string> R2 = 1475 emitDagArg(DI->getArg(2), DI->getArgName(2)); 1476 assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); 1477 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); 1478 } 1479 } 1480 1481 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) { 1482 std::vector<Type> Types; 1483 std::vector<std::string> Values; 1484 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1485 std::pair<Type, std::string> R = 1486 emitDagArg(DI->getArg(I + 1), DI->getArgName(I + 1)); 1487 Types.push_back(R.first); 1488 Values.push_back(R.second); 1489 } 1490 1491 // Look up the called intrinsic. 1492 std::string N; 1493 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) 1494 N = SI->getAsUnquotedString(); 1495 else 1496 N = emitDagArg(DI->getArg(0), "").second; 1497 Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types); 1498 1499 // Make sure the callee is known as an early def. 1500 Callee.setNeededEarly(); 1501 Intr.Dependencies.insert(&Callee); 1502 1503 // Now create the call itself. 1504 std::string S = CallPrefix.str() + Callee.getMangledName(true) + "("; 1505 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1506 if (I != 0) 1507 S += ", "; 1508 S += Values[I]; 1509 } 1510 S += ")"; 1511 1512 return std::make_pair(Callee.getReturnType(), S); 1513 } 1514 1515 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, 1516 bool IsBitCast){ 1517 // (cast MOD* VAL) -> cast VAL to type given by MOD. 1518 std::pair<Type, std::string> R = emitDagArg( 1519 DI->getArg(DI->getNumArgs() - 1), DI->getArgName(DI->getNumArgs() - 1)); 1520 Type castToType = R.first; 1521 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { 1522 1523 // MOD can take several forms: 1524 // 1. $X - take the type of parameter / variable X. 1525 // 2. The value "R" - take the type of the return type. 1526 // 3. a type string 1527 // 4. The value "U" or "S" to switch the signedness. 1528 // 5. The value "H" or "D" to half or double the bitwidth. 1529 // 6. The value "8" to convert to 8-bit (signed) integer lanes. 1530 if (DI->getArgName(ArgIdx).size()) { 1531 assert_with_loc(Intr.Variables.find(DI->getArgName(ArgIdx)) != 1532 Intr.Variables.end(), 1533 "Variable not found"); 1534 castToType = Intr.Variables[DI->getArgName(ArgIdx)].getType(); 1535 } else { 1536 StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); 1537 assert_with_loc(SI, "Expected string type or $Name for cast type"); 1538 1539 if (SI->getAsUnquotedString() == "R") { 1540 castToType = Intr.getReturnType(); 1541 } else if (SI->getAsUnquotedString() == "U") { 1542 castToType.makeUnsigned(); 1543 } else if (SI->getAsUnquotedString() == "S") { 1544 castToType.makeSigned(); 1545 } else if (SI->getAsUnquotedString() == "H") { 1546 castToType.halveLanes(); 1547 } else if (SI->getAsUnquotedString() == "D") { 1548 castToType.doubleLanes(); 1549 } else if (SI->getAsUnquotedString() == "8") { 1550 castToType.makeInteger(8, true); 1551 } else { 1552 castToType = Type::fromTypedefName(SI->getAsUnquotedString()); 1553 assert_with_loc(!castToType.isVoid(), "Unknown typedef"); 1554 } 1555 } 1556 } 1557 1558 std::string S; 1559 if (IsBitCast) { 1560 // Emit a reinterpret cast. The second operand must be an lvalue, so create 1561 // a temporary. 1562 std::string N = "reint"; 1563 unsigned I = 0; 1564 while (Intr.Variables.find(N) != Intr.Variables.end()) 1565 N = "reint" + utostr(++I); 1566 Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix); 1567 1568 Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = " 1569 << R.second << ";"; 1570 Intr.emitNewLine(); 1571 1572 S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + ""; 1573 } else { 1574 // Emit a normal (static) cast. 1575 S = "(" + castToType.str() + ")(" + R.second + ")"; 1576 } 1577 1578 return std::make_pair(castToType, S); 1579 } 1580 1581 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ 1582 // See the documentation in arm_neon.td for a description of these operators. 1583 class LowHalf : public SetTheory::Operator { 1584 public: 1585 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1586 ArrayRef<SMLoc> Loc) override { 1587 SetTheory::RecSet Elts2; 1588 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1589 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); 1590 } 1591 }; 1592 class HighHalf : public SetTheory::Operator { 1593 public: 1594 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1595 ArrayRef<SMLoc> Loc) override { 1596 SetTheory::RecSet Elts2; 1597 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1598 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); 1599 } 1600 }; 1601 class Rev : public SetTheory::Operator { 1602 unsigned ElementSize; 1603 1604 public: 1605 Rev(unsigned ElementSize) : ElementSize(ElementSize) {} 1606 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1607 ArrayRef<SMLoc> Loc) override { 1608 SetTheory::RecSet Elts2; 1609 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); 1610 1611 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); 1612 VectorSize /= ElementSize; 1613 1614 std::vector<Record *> Revved; 1615 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { 1616 for (int LI = VectorSize - 1; LI >= 0; --LI) { 1617 Revved.push_back(Elts2[VI + LI]); 1618 } 1619 } 1620 1621 Elts.insert(Revved.begin(), Revved.end()); 1622 } 1623 }; 1624 class MaskExpander : public SetTheory::Expander { 1625 unsigned N; 1626 1627 public: 1628 MaskExpander(unsigned N) : N(N) {} 1629 void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override { 1630 unsigned Addend = 0; 1631 if (R->getName() == "mask0") 1632 Addend = 0; 1633 else if (R->getName() == "mask1") 1634 Addend = N; 1635 else 1636 return; 1637 for (unsigned I = 0; I < N; ++I) 1638 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); 1639 } 1640 }; 1641 1642 // (shuffle arg1, arg2, sequence) 1643 std::pair<Type, std::string> Arg1 = 1644 emitDagArg(DI->getArg(0), DI->getArgName(0)); 1645 std::pair<Type, std::string> Arg2 = 1646 emitDagArg(DI->getArg(1), DI->getArgName(1)); 1647 assert_with_loc(Arg1.first == Arg2.first, 1648 "Different types in arguments to shuffle!"); 1649 1650 SetTheory ST; 1651 SetTheory::RecSet Elts; 1652 ST.addOperator("lowhalf", llvm::make_unique<LowHalf>()); 1653 ST.addOperator("highhalf", llvm::make_unique<HighHalf>()); 1654 ST.addOperator("rev", 1655 llvm::make_unique<Rev>(Arg1.first.getElementSizeInBits())); 1656 ST.addExpander("MaskExpand", 1657 llvm::make_unique<MaskExpander>(Arg1.first.getNumElements())); 1658 ST.evaluate(DI->getArg(2), Elts, None); 1659 1660 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; 1661 for (auto &E : Elts) { 1662 StringRef Name = E->getName(); 1663 assert_with_loc(Name.startswith("sv"), 1664 "Incorrect element kind in shuffle mask!"); 1665 S += ", " + Name.drop_front(2).str(); 1666 } 1667 S += ")"; 1668 1669 // Recalculate the return type - the shuffle may have halved or doubled it. 1670 Type T(Arg1.first); 1671 if (Elts.size() > T.getNumElements()) { 1672 assert_with_loc( 1673 Elts.size() == T.getNumElements() * 2, 1674 "Can only double or half the number of elements in a shuffle!"); 1675 T.doubleLanes(); 1676 } else if (Elts.size() < T.getNumElements()) { 1677 assert_with_loc( 1678 Elts.size() == T.getNumElements() / 2, 1679 "Can only double or half the number of elements in a shuffle!"); 1680 T.halveLanes(); 1681 } 1682 1683 return std::make_pair(T, S); 1684 } 1685 1686 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { 1687 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); 1688 std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0)); 1689 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); 1690 1691 Type T = Intr.getBaseType(); 1692 assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); 1693 std::string S = "(" + T.str() + ") {"; 1694 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1695 if (I != 0) 1696 S += ", "; 1697 S += A.second; 1698 } 1699 S += "}"; 1700 1701 return std::make_pair(T, S); 1702 } 1703 1704 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { 1705 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); 1706 std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0)); 1707 std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), DI->getArgName(1)); 1708 1709 assert_with_loc(B.first.isScalar(), 1710 "splat() requires a scalar int as the second argument"); 1711 1712 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; 1713 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { 1714 S += ", " + B.second; 1715 } 1716 S += ")"; 1717 1718 return std::make_pair(Intr.getBaseType(), S); 1719 } 1720 1721 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { 1722 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); 1723 std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), DI->getArgName(1)); 1724 1725 assert_with_loc(!A.first.isVoid(), 1726 "Argument to save_temp() must have non-void type!"); 1727 1728 std::string N = DI->getArgName(0); 1729 assert_with_loc(N.size(), "save_temp() expects a name as the first argument"); 1730 1731 assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), 1732 "Variable already defined!"); 1733 Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); 1734 1735 std::string S = 1736 A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; 1737 1738 return std::make_pair(Type::getVoid(), S); 1739 } 1740 1741 std::pair<Type, std::string> 1742 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) { 1743 std::string S = Intr.Name; 1744 1745 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); 1746 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1747 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1748 1749 size_t Idx = S.find(ToReplace); 1750 1751 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); 1752 S.replace(Idx, ToReplace.size(), ReplaceWith); 1753 1754 return std::make_pair(Type::getVoid(), S); 1755 } 1756 1757 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){ 1758 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1759 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1760 return std::make_pair(Type::fromTypedefName(Ty), Value); 1761 } 1762 1763 std::pair<Type, std::string> 1764 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) { 1765 if (ArgName.size()) { 1766 assert_with_loc(!Arg->isComplete(), 1767 "Arguments must either be DAGs or names, not both!"); 1768 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), 1769 "Variable not defined!"); 1770 Variable &V = Intr.Variables[ArgName]; 1771 return std::make_pair(V.getType(), V.getName()); 1772 } 1773 1774 assert(Arg && "Neither ArgName nor Arg?!"); 1775 DagInit *DI = dyn_cast<DagInit>(Arg); 1776 assert_with_loc(DI, "Arguments must either be DAGs or names!"); 1777 1778 return emitDag(DI); 1779 } 1780 1781 std::string Intrinsic::generate() { 1782 // Little endian intrinsics are simple and don't require any argument 1783 // swapping. 1784 OS << "#ifdef __LITTLE_ENDIAN__\n"; 1785 1786 generateImpl(false, "", ""); 1787 1788 OS << "#else\n"; 1789 1790 // Big endian intrinsics are more complex. The user intended these 1791 // intrinsics to operate on a vector "as-if" loaded by (V)LDR, 1792 // but we load as-if (V)LD1. So we should swap all arguments and 1793 // swap the return value too. 1794 // 1795 // If we call sub-intrinsics, we should call a version that does 1796 // not re-swap the arguments! 1797 generateImpl(true, "", "__noswap_"); 1798 1799 // If we're needed early, create a non-swapping variant for 1800 // big-endian. 1801 if (NeededEarly) { 1802 generateImpl(false, "__noswap_", "__noswap_"); 1803 } 1804 OS << "#endif\n\n"; 1805 1806 return OS.str(); 1807 } 1808 1809 void Intrinsic::generateImpl(bool ReverseArguments, 1810 StringRef NamePrefix, StringRef CallPrefix) { 1811 CurrentRecord = R; 1812 1813 // If we call a macro, our local variables may be corrupted due to 1814 // lack of proper lexical scoping. So, add a globally unique postfix 1815 // to every variable. 1816 // 1817 // indexBody() should have set up the Dependencies set by now. 1818 for (auto *I : Dependencies) 1819 if (I->UseMacro) { 1820 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); 1821 break; 1822 } 1823 1824 initVariables(); 1825 1826 emitPrototype(NamePrefix); 1827 1828 if (IsUnavailable) { 1829 OS << " __attribute__((unavailable));"; 1830 } else { 1831 emitOpeningBrace(); 1832 emitShadowedArgs(); 1833 if (ReverseArguments) 1834 emitArgumentReversal(); 1835 emitBody(CallPrefix); 1836 if (ReverseArguments) 1837 emitReturnReversal(); 1838 emitReturn(); 1839 emitClosingBrace(); 1840 } 1841 OS << "\n"; 1842 1843 CurrentRecord = nullptr; 1844 } 1845 1846 void Intrinsic::indexBody() { 1847 CurrentRecord = R; 1848 1849 initVariables(); 1850 emitBody(""); 1851 OS.str(""); 1852 1853 CurrentRecord = nullptr; 1854 } 1855 1856 //===----------------------------------------------------------------------===// 1857 // NeonEmitter implementation 1858 //===----------------------------------------------------------------------===// 1859 1860 Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) { 1861 // First, look up the name in the intrinsic map. 1862 assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), 1863 ("Intrinsic '" + Name + "' not found!").str()); 1864 auto &V = IntrinsicMap.find(Name.str())->second; 1865 std::vector<Intrinsic *> GoodVec; 1866 1867 // Create a string to print if we end up failing. 1868 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; 1869 for (unsigned I = 0; I < Types.size(); ++I) { 1870 if (I != 0) 1871 ErrMsg += ", "; 1872 ErrMsg += Types[I].str(); 1873 } 1874 ErrMsg += ")'\n"; 1875 ErrMsg += "Available overloads:\n"; 1876 1877 // Now, look through each intrinsic implementation and see if the types are 1878 // compatible. 1879 for (auto &I : V) { 1880 ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName(); 1881 ErrMsg += "("; 1882 for (unsigned A = 0; A < I.getNumParams(); ++A) { 1883 if (A != 0) 1884 ErrMsg += ", "; 1885 ErrMsg += I.getParamType(A).str(); 1886 } 1887 ErrMsg += ")\n"; 1888 1889 if (I.getNumParams() != Types.size()) 1890 continue; 1891 1892 bool Good = true; 1893 for (unsigned Arg = 0; Arg < Types.size(); ++Arg) { 1894 if (I.getParamType(Arg) != Types[Arg]) { 1895 Good = false; 1896 break; 1897 } 1898 } 1899 if (Good) 1900 GoodVec.push_back(&I); 1901 } 1902 1903 assert_with_loc(GoodVec.size() > 0, 1904 "No compatible intrinsic found - " + ErrMsg); 1905 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); 1906 1907 return *GoodVec.front(); 1908 } 1909 1910 void NeonEmitter::createIntrinsic(Record *R, 1911 SmallVectorImpl<Intrinsic *> &Out) { 1912 std::string Name = R->getValueAsString("Name"); 1913 std::string Proto = R->getValueAsString("Prototype"); 1914 std::string Types = R->getValueAsString("Types"); 1915 Record *OperationRec = R->getValueAsDef("Operation"); 1916 bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes"); 1917 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); 1918 std::string Guard = R->getValueAsString("ArchGuard"); 1919 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); 1920 1921 // Set the global current record. This allows assert_with_loc to produce 1922 // decent location information even when highly nested. 1923 CurrentRecord = R; 1924 1925 ListInit *Body = OperationRec->getValueAsListInit("Ops"); 1926 1927 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); 1928 1929 ClassKind CK = ClassNone; 1930 if (R->getSuperClasses().size() >= 2) 1931 CK = ClassMap[R->getSuperClasses()[1].first]; 1932 1933 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; 1934 for (auto TS : TypeSpecs) { 1935 if (CartesianProductOfTypes) { 1936 Type DefaultT(TS, 'd'); 1937 for (auto SrcTS : TypeSpecs) { 1938 Type DefaultSrcT(SrcTS, 'd'); 1939 if (TS == SrcTS || 1940 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) 1941 continue; 1942 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); 1943 } 1944 } else { 1945 NewTypeSpecs.push_back(std::make_pair(TS, TS)); 1946 } 1947 } 1948 1949 std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end()); 1950 NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()), 1951 NewTypeSpecs.end()); 1952 auto &Entry = IntrinsicMap[Name]; 1953 1954 for (auto &I : NewTypeSpecs) { 1955 Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this, 1956 Guard, IsUnavailable, BigEndianSafe); 1957 Out.push_back(&Entry.back()); 1958 } 1959 1960 CurrentRecord = nullptr; 1961 } 1962 1963 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 1964 /// declaration of builtins, checking for unique builtin declarations. 1965 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 1966 SmallVectorImpl<Intrinsic *> &Defs) { 1967 OS << "#ifdef GET_NEON_BUILTINS\n"; 1968 1969 // We only want to emit a builtin once, and we want to emit them in 1970 // alphabetical order, so use a std::set. 1971 std::set<std::string> Builtins; 1972 1973 for (auto *Def : Defs) { 1974 if (Def->hasBody()) 1975 continue; 1976 // Functions with 'a' (the splat code) in the type prototype should not get 1977 // their own builtin as they use the non-splat variant. 1978 if (Def->hasSplat()) 1979 continue; 1980 1981 std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \""; 1982 1983 S += Def->getBuiltinTypeStr(); 1984 S += "\", \"n\")"; 1985 1986 Builtins.insert(S); 1987 } 1988 1989 for (auto &S : Builtins) 1990 OS << S << "\n"; 1991 OS << "#endif\n\n"; 1992 } 1993 1994 /// Generate the ARM and AArch64 overloaded type checking code for 1995 /// SemaChecking.cpp, checking for unique builtin declarations. 1996 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 1997 SmallVectorImpl<Intrinsic *> &Defs) { 1998 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 1999 2000 // We record each overload check line before emitting because subsequent Inst 2001 // definitions may extend the number of permitted types (i.e. augment the 2002 // Mask). Use std::map to avoid sorting the table by hash number. 2003 struct OverloadInfo { 2004 uint64_t Mask; 2005 int PtrArgNum; 2006 bool HasConstPtr; 2007 OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {} 2008 }; 2009 std::map<std::string, OverloadInfo> OverloadMap; 2010 2011 for (auto *Def : Defs) { 2012 // If the def has a body (that is, it has Operation DAGs), it won't call 2013 // __builtin_neon_* so we don't need to generate a definition for it. 2014 if (Def->hasBody()) 2015 continue; 2016 // Functions with 'a' (the splat code) in the type prototype should not get 2017 // their own builtin as they use the non-splat variant. 2018 if (Def->hasSplat()) 2019 continue; 2020 // Functions which have a scalar argument cannot be overloaded, no need to 2021 // check them if we are emitting the type checking code. 2022 if (Def->protoHasScalar()) 2023 continue; 2024 2025 uint64_t Mask = 0ULL; 2026 Type Ty = Def->getReturnType(); 2027 if (Def->getProto()[0] == 'v' || 2028 isFloatingPointProtoModifier(Def->getProto()[0])) 2029 Ty = Def->getParamType(0); 2030 if (Ty.isPointer()) 2031 Ty = Def->getParamType(1); 2032 2033 Mask |= 1ULL << Ty.getNeonEnum(); 2034 2035 // Check if the function has a pointer or const pointer argument. 2036 std::string Proto = Def->getProto(); 2037 int PtrArgNum = -1; 2038 bool HasConstPtr = false; 2039 for (unsigned I = 0; I < Def->getNumParams(); ++I) { 2040 char ArgType = Proto[I + 1]; 2041 if (ArgType == 'c') { 2042 HasConstPtr = true; 2043 PtrArgNum = I; 2044 break; 2045 } 2046 if (ArgType == 'p') { 2047 PtrArgNum = I; 2048 break; 2049 } 2050 } 2051 // For sret builtins, adjust the pointer argument index. 2052 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) 2053 PtrArgNum += 1; 2054 2055 std::string Name = Def->getName(); 2056 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2057 // and vst1_lane intrinsics. Using a pointer to the vector element 2058 // type with one of those operations causes codegen to select an aligned 2059 // load/store instruction. If you want an unaligned operation, 2060 // the pointer argument needs to have less alignment than element type, 2061 // so just accept any pointer type. 2062 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") { 2063 PtrArgNum = -1; 2064 HasConstPtr = false; 2065 } 2066 2067 if (Mask) { 2068 std::string Name = Def->getMangledName(); 2069 OverloadMap.insert(std::make_pair(Name, OverloadInfo())); 2070 OverloadInfo &OI = OverloadMap[Name]; 2071 OI.Mask |= Mask; 2072 OI.PtrArgNum |= PtrArgNum; 2073 OI.HasConstPtr = HasConstPtr; 2074 } 2075 } 2076 2077 for (auto &I : OverloadMap) { 2078 OverloadInfo &OI = I.second; 2079 2080 OS << "case NEON::BI__builtin_neon_" << I.first << ": "; 2081 OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL"; 2082 if (OI.PtrArgNum >= 0) 2083 OS << "; PtrArgNum = " << OI.PtrArgNum; 2084 if (OI.HasConstPtr) 2085 OS << "; HasConstPtr = true"; 2086 OS << "; break;\n"; 2087 } 2088 OS << "#endif\n\n"; 2089 } 2090 2091 void 2092 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2093 SmallVectorImpl<Intrinsic *> &Defs) { 2094 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2095 2096 std::set<std::string> Emitted; 2097 2098 for (auto *Def : Defs) { 2099 if (Def->hasBody()) 2100 continue; 2101 // Functions with 'a' (the splat code) in the type prototype should not get 2102 // their own builtin as they use the non-splat variant. 2103 if (Def->hasSplat()) 2104 continue; 2105 // Functions which do not have an immediate do not need to have range 2106 // checking code emitted. 2107 if (!Def->hasImmediate()) 2108 continue; 2109 if (Emitted.find(Def->getMangledName()) != Emitted.end()) 2110 continue; 2111 2112 std::string LowerBound, UpperBound; 2113 2114 Record *R = Def->getRecord(); 2115 if (R->getValueAsBit("isVCVT_N")) { 2116 // VCVT between floating- and fixed-point values takes an immediate 2117 // in the range [1, 32) for f32 or [1, 64) for f64. 2118 LowerBound = "1"; 2119 if (Def->getBaseType().getElementSizeInBits() == 32) 2120 UpperBound = "31"; 2121 else 2122 UpperBound = "63"; 2123 } else if (R->getValueAsBit("isScalarShift")) { 2124 // Right shifts have an 'r' in the name, left shifts do not. Convert 2125 // instructions have the same bounds and right shifts. 2126 if (Def->getName().find('r') != std::string::npos || 2127 Def->getName().find("cvt") != std::string::npos) 2128 LowerBound = "1"; 2129 2130 UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1); 2131 } else if (R->getValueAsBit("isShift")) { 2132 // Builtins which are overloaded by type will need to have their upper 2133 // bound computed at Sema time based on the type constant. 2134 2135 // Right shifts have an 'r' in the name, left shifts do not. 2136 if (Def->getName().find('r') != std::string::npos) 2137 LowerBound = "1"; 2138 UpperBound = "RFT(TV, true)"; 2139 } else if (Def->getClassKind(true) == ClassB) { 2140 // ClassB intrinsics have a type (and hence lane number) that is only 2141 // known at runtime. 2142 if (R->getValueAsBit("isLaneQ")) 2143 UpperBound = "RFT(TV, false, true)"; 2144 else 2145 UpperBound = "RFT(TV, false, false)"; 2146 } else { 2147 // The immediate generally refers to a lane in the preceding argument. 2148 assert(Def->getImmediateIdx() > 0); 2149 Type T = Def->getParamType(Def->getImmediateIdx() - 1); 2150 UpperBound = utostr(T.getNumElements() - 1); 2151 } 2152 2153 // Calculate the index of the immediate that should be range checked. 2154 unsigned Idx = Def->getNumParams(); 2155 if (Def->hasImmediate()) 2156 Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx()); 2157 2158 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": " 2159 << "i = " << Idx << ";"; 2160 if (LowerBound.size()) 2161 OS << " l = " << LowerBound << ";"; 2162 if (UpperBound.size()) 2163 OS << " u = " << UpperBound << ";"; 2164 OS << " break;\n"; 2165 2166 Emitted.insert(Def->getMangledName()); 2167 } 2168 2169 OS << "#endif\n\n"; 2170 } 2171 2172 /// runHeader - Emit a file with sections defining: 2173 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2174 /// 2. the SemaChecking code for the type overload checking. 2175 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2176 void NeonEmitter::runHeader(raw_ostream &OS) { 2177 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2178 2179 SmallVector<Intrinsic *, 128> Defs; 2180 for (auto *R : RV) 2181 createIntrinsic(R, Defs); 2182 2183 // Generate shared BuiltinsXXX.def 2184 genBuiltinsDef(OS, Defs); 2185 2186 // Generate ARM overloaded type checking code for SemaChecking.cpp 2187 genOverloadTypeCheckCode(OS, Defs); 2188 2189 // Generate ARM range checking code for shift/lane immediates. 2190 genIntrinsicRangeCheckCode(OS, Defs); 2191 } 2192 2193 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2194 /// is comprised of type definitions and function declarations. 2195 void NeonEmitter::run(raw_ostream &OS) { 2196 OS << "/*===---- arm_neon.h - ARM Neon intrinsics " 2197 "------------------------------" 2198 "---===\n" 2199 " *\n" 2200 " * Permission is hereby granted, free of charge, to any person " 2201 "obtaining " 2202 "a copy\n" 2203 " * of this software and associated documentation files (the " 2204 "\"Software\")," 2205 " to deal\n" 2206 " * in the Software without restriction, including without limitation " 2207 "the " 2208 "rights\n" 2209 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2210 "and/or sell\n" 2211 " * copies of the Software, and to permit persons to whom the Software " 2212 "is\n" 2213 " * furnished to do so, subject to the following conditions:\n" 2214 " *\n" 2215 " * The above copyright notice and this permission notice shall be " 2216 "included in\n" 2217 " * all copies or substantial portions of the Software.\n" 2218 " *\n" 2219 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2220 "EXPRESS OR\n" 2221 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2222 "MERCHANTABILITY,\n" 2223 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2224 "SHALL THE\n" 2225 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2226 "OTHER\n" 2227 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2228 "ARISING FROM,\n" 2229 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2230 "DEALINGS IN\n" 2231 " * THE SOFTWARE.\n" 2232 " *\n" 2233 " *===-----------------------------------------------------------------" 2234 "---" 2235 "---===\n" 2236 " */\n\n"; 2237 2238 OS << "#ifndef __ARM_NEON_H\n"; 2239 OS << "#define __ARM_NEON_H\n\n"; 2240 2241 OS << "#if !defined(__ARM_NEON)\n"; 2242 OS << "#error \"NEON support not enabled\"\n"; 2243 OS << "#endif\n\n"; 2244 2245 OS << "#include <stdint.h>\n\n"; 2246 2247 // Emit NEON-specific scalar typedefs. 2248 OS << "typedef float float32_t;\n"; 2249 OS << "typedef __fp16 float16_t;\n"; 2250 2251 OS << "#ifdef __aarch64__\n"; 2252 OS << "typedef double float64_t;\n"; 2253 OS << "#endif\n\n"; 2254 2255 // For now, signedness of polynomial types depends on target 2256 OS << "#ifdef __aarch64__\n"; 2257 OS << "typedef uint8_t poly8_t;\n"; 2258 OS << "typedef uint16_t poly16_t;\n"; 2259 OS << "typedef uint64_t poly64_t;\n"; 2260 OS << "typedef __uint128_t poly128_t;\n"; 2261 OS << "#else\n"; 2262 OS << "typedef int8_t poly8_t;\n"; 2263 OS << "typedef int16_t poly16_t;\n"; 2264 OS << "#endif\n"; 2265 2266 // Emit Neon vector typedefs. 2267 std::string TypedefTypes( 2268 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); 2269 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); 2270 2271 // Emit vector typedefs. 2272 bool InIfdef = false; 2273 for (auto &TS : TDTypeVec) { 2274 bool IsA64 = false; 2275 Type T(TS, 'd'); 2276 if (T.isDouble() || (T.isPoly() && T.isLong())) 2277 IsA64 = true; 2278 2279 if (InIfdef && !IsA64) { 2280 OS << "#endif\n"; 2281 InIfdef = false; 2282 } 2283 if (!InIfdef && IsA64) { 2284 OS << "#ifdef __aarch64__\n"; 2285 InIfdef = true; 2286 } 2287 2288 if (T.isPoly()) 2289 OS << "typedef __attribute__((neon_polyvector_type("; 2290 else 2291 OS << "typedef __attribute__((neon_vector_type("; 2292 2293 Type T2 = T; 2294 T2.makeScalar(); 2295 OS << utostr(T.getNumElements()) << "))) "; 2296 OS << T2.str(); 2297 OS << " " << T.str() << ";\n"; 2298 } 2299 if (InIfdef) 2300 OS << "#endif\n"; 2301 OS << "\n"; 2302 2303 // Emit struct typedefs. 2304 InIfdef = false; 2305 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { 2306 for (auto &TS : TDTypeVec) { 2307 bool IsA64 = false; 2308 Type T(TS, 'd'); 2309 if (T.isDouble() || (T.isPoly() && T.isLong())) 2310 IsA64 = true; 2311 2312 if (InIfdef && !IsA64) { 2313 OS << "#endif\n"; 2314 InIfdef = false; 2315 } 2316 if (!InIfdef && IsA64) { 2317 OS << "#ifdef __aarch64__\n"; 2318 InIfdef = true; 2319 } 2320 2321 char M = '2' + (NumMembers - 2); 2322 Type VT(TS, M); 2323 OS << "typedef struct " << VT.str() << " {\n"; 2324 OS << " " << T.str() << " val"; 2325 OS << "[" << utostr(NumMembers) << "]"; 2326 OS << ";\n} "; 2327 OS << VT.str() << ";\n"; 2328 OS << "\n"; 2329 } 2330 } 2331 if (InIfdef) 2332 OS << "#endif\n"; 2333 OS << "\n"; 2334 2335 OS << "#define __ai static inline __attribute__((__always_inline__, " 2336 "__nodebug__))\n\n"; 2337 2338 SmallVector<Intrinsic *, 128> Defs; 2339 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2340 for (auto *R : RV) 2341 createIntrinsic(R, Defs); 2342 2343 for (auto *I : Defs) 2344 I->indexBody(); 2345 2346 std::stable_sort( 2347 Defs.begin(), Defs.end(), 2348 [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; }); 2349 2350 // Only emit a def when its requirements have been met. 2351 // FIXME: This loop could be made faster, but it's fast enough for now. 2352 bool MadeProgress = true; 2353 std::string InGuard = ""; 2354 while (!Defs.empty() && MadeProgress) { 2355 MadeProgress = false; 2356 2357 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2358 I != Defs.end(); /*No step*/) { 2359 bool DependenciesSatisfied = true; 2360 for (auto *II : (*I)->getDependencies()) { 2361 if (std::find(Defs.begin(), Defs.end(), II) != Defs.end()) 2362 DependenciesSatisfied = false; 2363 } 2364 if (!DependenciesSatisfied) { 2365 // Try the next one. 2366 ++I; 2367 continue; 2368 } 2369 2370 // Emit #endif/#if pair if needed. 2371 if ((*I)->getGuard() != InGuard) { 2372 if (!InGuard.empty()) 2373 OS << "#endif\n"; 2374 InGuard = (*I)->getGuard(); 2375 if (!InGuard.empty()) 2376 OS << "#if " << InGuard << "\n"; 2377 } 2378 2379 // Actually generate the intrinsic code. 2380 OS << (*I)->generate(); 2381 2382 MadeProgress = true; 2383 I = Defs.erase(I); 2384 } 2385 } 2386 assert(Defs.empty() && "Some requirements were not satisfied!"); 2387 if (!InGuard.empty()) 2388 OS << "#endif\n"; 2389 2390 OS << "\n"; 2391 OS << "#undef __ai\n\n"; 2392 OS << "#endif /* __ARM_NEON_H */\n"; 2393 } 2394 2395 namespace clang { 2396 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2397 NeonEmitter(Records).run(OS); 2398 } 2399 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2400 NeonEmitter(Records).runHeader(OS); 2401 } 2402 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2403 llvm_unreachable("Neon test generation no longer implemented!"); 2404 } 2405 } // End namespace clang 2406