1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This tablegen backend is responsible for emitting arm_neon.h, which includes 11 // a declaration and definition of each function specified by the ARM NEON 12 // compiler interface. See ARM document DUI0348B. 13 // 14 // Each NEON instruction is implemented in terms of 1 or more functions which 15 // are suffixed with the element type of the input vectors. Functions may be 16 // implemented in terms of generic vector operations such as +, *, -, etc. or 17 // by calling a __builtin_-prefixed function which will be handled by clang's 18 // CodeGen library. 19 // 20 // Additional validation code can be generated by this file when runHeader() is 21 // called, rather than the normal run() entry point. 22 // 23 // See also the documentation in include/clang/Basic/arm_neon.td. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "llvm/ADT/DenseMap.h" 28 #include "llvm/ADT/SmallString.h" 29 #include "llvm/ADT/SmallVector.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/ADT/StringMap.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/TableGen/Error.h" 34 #include "llvm/TableGen/Record.h" 35 #include "llvm/TableGen/SetTheory.h" 36 #include "llvm/TableGen/TableGenBackend.h" 37 #include <string> 38 #include <sstream> 39 #include <vector> 40 #include <map> 41 #include <algorithm> 42 using namespace llvm; 43 44 namespace { 45 46 // While globals are generally bad, this one allows us to perform assertions 47 // liberally and somehow still trace them back to the def they indirectly 48 // came from. 49 static Record *CurrentRecord = nullptr; 50 static void assert_with_loc(bool Assertion, const std::string &Str) { 51 if (!Assertion) { 52 if (CurrentRecord) 53 PrintFatalError(CurrentRecord->getLoc(), Str); 54 else 55 PrintFatalError(Str); 56 } 57 } 58 59 enum ClassKind { 60 ClassNone, 61 ClassI, // generic integer instruction, e.g., "i8" suffix 62 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 63 ClassW, // width-specific instruction, e.g., "8" suffix 64 ClassB, // bitcast arguments with enum argument to specify type 65 ClassL, // Logical instructions which are op instructions 66 // but we need to not emit any suffix for in our 67 // tests. 68 ClassNoTest // Instructions which we do not test since they are 69 // not TRUE instructions. 70 }; 71 72 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 73 /// builtins. These must be kept in sync with the flags in 74 /// include/clang/Basic/TargetBuiltins.h. 75 namespace NeonTypeFlags { 76 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; 77 78 enum EltType { 79 Int8, 80 Int16, 81 Int32, 82 Int64, 83 Poly8, 84 Poly16, 85 Poly64, 86 Poly128, 87 Float16, 88 Float32, 89 Float64 90 }; 91 } 92 93 class Intrinsic; 94 class NeonEmitter; 95 class Type; 96 class Variable; 97 98 //===----------------------------------------------------------------------===// 99 // TypeSpec 100 //===----------------------------------------------------------------------===// 101 102 /// A TypeSpec is just a simple wrapper around a string, but gets its own type 103 /// for strong typing purposes. 104 /// 105 /// A TypeSpec can be used to create a type. 106 class TypeSpec : public std::string { 107 public: 108 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { 109 std::vector<TypeSpec> Ret; 110 TypeSpec Acc; 111 for (char I : Str.str()) { 112 if (islower(I)) { 113 Acc.push_back(I); 114 Ret.push_back(TypeSpec(Acc)); 115 Acc.clear(); 116 } else { 117 Acc.push_back(I); 118 } 119 } 120 return Ret; 121 } 122 }; 123 124 //===----------------------------------------------------------------------===// 125 // Type 126 //===----------------------------------------------------------------------===// 127 128 /// A Type. Not much more to say here. 129 class Type { 130 private: 131 TypeSpec TS; 132 133 bool Float, Signed, Void, Poly, Constant, Pointer; 134 // ScalarForMangling and NoManglingQ are really not suited to live here as 135 // they are not related to the type. But they live in the TypeSpec (not the 136 // prototype), so this is really the only place to store them. 137 bool ScalarForMangling, NoManglingQ; 138 unsigned Bitwidth, ElementBitwidth, NumVectors; 139 140 public: 141 Type() 142 : Float(false), Signed(false), Void(true), Poly(false), Constant(false), 143 Pointer(false), ScalarForMangling(false), NoManglingQ(false), 144 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} 145 146 Type(TypeSpec TS, char CharMod) 147 : TS(TS), Float(false), Signed(false), Void(false), Poly(false), 148 Constant(false), Pointer(false), ScalarForMangling(false), 149 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { 150 applyModifier(CharMod); 151 } 152 153 /// Returns a type representing "void". 154 static Type getVoid() { return Type(); } 155 156 bool operator==(const Type &Other) const { return str() == Other.str(); } 157 bool operator!=(const Type &Other) const { return !operator==(Other); } 158 159 // 160 // Query functions 161 // 162 bool isScalarForMangling() const { return ScalarForMangling; } 163 bool noManglingQ() const { return NoManglingQ; } 164 165 bool isPointer() const { return Pointer; } 166 bool isFloating() const { return Float; } 167 bool isInteger() const { return !Float && !Poly; } 168 bool isSigned() const { return Signed; } 169 bool isScalar() const { return NumVectors == 0; } 170 bool isVector() const { return NumVectors > 0; } 171 bool isFloat() const { return Float && ElementBitwidth == 32; } 172 bool isDouble() const { return Float && ElementBitwidth == 64; } 173 bool isHalf() const { return Float && ElementBitwidth == 16; } 174 bool isPoly() const { return Poly; } 175 bool isChar() const { return ElementBitwidth == 8; } 176 bool isShort() const { return !Float && ElementBitwidth == 16; } 177 bool isInt() const { return !Float && ElementBitwidth == 32; } 178 bool isLong() const { return !Float && ElementBitwidth == 64; } 179 bool isVoid() const { return Void; } 180 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } 181 unsigned getSizeInBits() const { return Bitwidth; } 182 unsigned getElementSizeInBits() const { return ElementBitwidth; } 183 unsigned getNumVectors() const { return NumVectors; } 184 185 // 186 // Mutator functions 187 // 188 void makeUnsigned() { Signed = false; } 189 void makeSigned() { Signed = true; } 190 void makeInteger(unsigned ElemWidth, bool Sign) { 191 Float = false; 192 Poly = false; 193 Signed = Sign; 194 ElementBitwidth = ElemWidth; 195 } 196 void makeScalar() { 197 Bitwidth = ElementBitwidth; 198 NumVectors = 0; 199 } 200 void makeOneVector() { 201 assert(isVector()); 202 NumVectors = 1; 203 } 204 void doubleLanes() { 205 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); 206 Bitwidth = 128; 207 } 208 void halveLanes() { 209 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); 210 Bitwidth = 64; 211 } 212 213 /// Return the C string representation of a type, which is the typename 214 /// defined in stdint.h or arm_neon.h. 215 std::string str() const; 216 217 /// Return the string representation of a type, which is an encoded 218 /// string for passing to the BUILTIN() macro in Builtins.def. 219 std::string builtin_str() const; 220 221 /// Return the value in NeonTypeFlags for this type. 222 unsigned getNeonEnum() const; 223 224 /// Parse a type from a stdint.h or arm_neon.h typedef name, 225 /// for example uint32x2_t or int64_t. 226 static Type fromTypedefName(StringRef Name); 227 228 private: 229 /// Creates the type based on the typespec string in TS. 230 /// Sets "Quad" to true if the "Q" or "H" modifiers were 231 /// seen. This is needed by applyModifier as some modifiers 232 /// only take effect if the type size was changed by "Q" or "H". 233 void applyTypespec(bool &Quad); 234 /// Applies a prototype modifier to the type. 235 void applyModifier(char Mod); 236 }; 237 238 //===----------------------------------------------------------------------===// 239 // Variable 240 //===----------------------------------------------------------------------===// 241 242 /// A variable is a simple class that just has a type and a name. 243 class Variable { 244 Type T; 245 std::string N; 246 247 public: 248 Variable() : T(Type::getVoid()), N("") {} 249 Variable(Type T, std::string N) : T(T), N(N) {} 250 251 Type getType() const { return T; } 252 std::string getName() const { return "__" + N; } 253 }; 254 255 //===----------------------------------------------------------------------===// 256 // Intrinsic 257 //===----------------------------------------------------------------------===// 258 259 /// The main grunt class. This represents an instantiation of an intrinsic with 260 /// a particular typespec and prototype. 261 class Intrinsic { 262 friend class DagEmitter; 263 264 /// The Record this intrinsic was created from. 265 Record *R; 266 /// The unmangled name and prototype. 267 std::string Name, Proto; 268 /// The input and output typespecs. InTS == OutTS except when 269 /// CartesianProductOfTypes is 1 - this is the case for vreinterpret. 270 TypeSpec OutTS, InTS; 271 /// The base class kind. Most intrinsics use ClassS, which has full type 272 /// info for integers (s32/u32). Some use ClassI, which doesn't care about 273 /// signedness (i32), while some (ClassB) have no type at all, only a width 274 /// (32). 275 ClassKind CK; 276 /// The list of DAGs for the body. May be empty, in which case we should 277 /// emit a builtin call. 278 ListInit *Body; 279 /// The architectural #ifdef guard. 280 std::string Guard; 281 /// Set if the Unvailable bit is 1. This means we don't generate a body, 282 /// just an "unavailable" attribute on a declaration. 283 bool IsUnavailable; 284 /// Is this intrinsic safe for big-endian? or does it need its arguments 285 /// reversing? 286 bool BigEndianSafe; 287 288 /// The types of return value [0] and parameters [1..]. 289 std::vector<Type> Types; 290 /// The local variables defined. 291 std::map<std::string, Variable> Variables; 292 /// NeededEarly - set if any other intrinsic depends on this intrinsic. 293 bool NeededEarly; 294 /// UseMacro - set if we should implement using a macro or unset for a 295 /// function. 296 bool UseMacro; 297 /// The set of intrinsics that this intrinsic uses/requires. 298 std::set<Intrinsic *> Dependencies; 299 /// The "base type", which is Type('d', OutTS). InBaseType is only 300 /// different if CartesianProductOfTypes = 1 (for vreinterpret). 301 Type BaseType, InBaseType; 302 /// The return variable. 303 Variable RetVar; 304 /// A postfix to apply to every variable. Defaults to "". 305 std::string VariablePostfix; 306 307 NeonEmitter &Emitter; 308 std::stringstream OS; 309 310 public: 311 Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, 312 TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, 313 StringRef Guard, bool IsUnavailable, bool BigEndianSafe) 314 : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS), 315 CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable), 316 BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false), 317 BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) { 318 // If this builtin takes an immediate argument, we need to #define it rather 319 // than use a standard declaration, so that SemaChecking can range check 320 // the immediate passed by the user. 321 if (Proto.find('i') != std::string::npos) 322 UseMacro = true; 323 324 // Pointer arguments need to use macros to avoid hiding aligned attributes 325 // from the pointer type. 326 if (Proto.find('p') != std::string::npos || 327 Proto.find('c') != std::string::npos) 328 UseMacro = true; 329 330 // It is not permitted to pass or return an __fp16 by value, so intrinsics 331 // taking a scalar float16_t must be implemented as macros. 332 if (OutTS.find('h') != std::string::npos && 333 Proto.find('s') != std::string::npos) 334 UseMacro = true; 335 336 // Modify the TypeSpec per-argument to get a concrete Type, and create 337 // known variables for each. 338 // Types[0] is the return value. 339 Types.push_back(Type(OutTS, Proto[0])); 340 for (unsigned I = 1; I < Proto.size(); ++I) 341 Types.push_back(Type(InTS, Proto[I])); 342 } 343 344 /// Get the Record that this intrinsic is based off. 345 Record *getRecord() const { return R; } 346 /// Get the set of Intrinsics that this intrinsic calls. 347 /// this is the set of immediate dependencies, NOT the 348 /// transitive closure. 349 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } 350 /// Get the architectural guard string (#ifdef). 351 std::string getGuard() const { return Guard; } 352 /// Get the non-mangled name. 353 std::string getName() const { return Name; } 354 355 /// Return true if the intrinsic takes an immediate operand. 356 bool hasImmediate() const { 357 return Proto.find('i') != std::string::npos; 358 } 359 /// Return the parameter index of the immediate operand. 360 unsigned getImmediateIdx() const { 361 assert(hasImmediate()); 362 unsigned Idx = Proto.find('i'); 363 assert(Idx > 0 && "Can't return an immediate!"); 364 return Idx - 1; 365 } 366 367 /// Return true if the intrinsic takes an splat operand. 368 bool hasSplat() const { return Proto.find('a') != std::string::npos; } 369 /// Return the parameter index of the splat operand. 370 unsigned getSplatIdx() const { 371 assert(hasSplat()); 372 unsigned Idx = Proto.find('a'); 373 assert(Idx > 0 && "Can't return a splat!"); 374 return Idx - 1; 375 } 376 377 unsigned getNumParams() const { return Proto.size() - 1; } 378 Type getReturnType() const { return Types[0]; } 379 Type getParamType(unsigned I) const { return Types[I + 1]; } 380 Type getBaseType() const { return BaseType; } 381 /// Return the raw prototype string. 382 std::string getProto() const { return Proto; } 383 384 /// Return true if the prototype has a scalar argument. 385 /// This does not return true for the "splat" code ('a'). 386 bool protoHasScalar(); 387 388 /// Return the index that parameter PIndex will sit at 389 /// in a generated function call. This is often just PIndex, 390 /// but may not be as things such as multiple-vector operands 391 /// and sret parameters need to be taken into accont. 392 unsigned getGeneratedParamIdx(unsigned PIndex) { 393 unsigned Idx = 0; 394 if (getReturnType().getNumVectors() > 1) 395 // Multiple vectors are passed as sret. 396 ++Idx; 397 398 for (unsigned I = 0; I < PIndex; ++I) 399 Idx += std::max(1U, getParamType(I).getNumVectors()); 400 401 return Idx; 402 } 403 404 bool hasBody() const { return Body && Body->getValues().size() > 0; } 405 406 void setNeededEarly() { NeededEarly = true; } 407 408 bool operator<(const Intrinsic &Other) const { 409 // Sort lexicographically on a two-tuple (Guard, Name) 410 if (Guard != Other.Guard) 411 return Guard < Other.Guard; 412 return Name < Other.Name; 413 } 414 415 ClassKind getClassKind(bool UseClassBIfScalar = false) { 416 if (UseClassBIfScalar && !protoHasScalar()) 417 return ClassB; 418 return CK; 419 } 420 421 /// Return the name, mangled with type information. 422 /// If ForceClassS is true, use ClassS (u32/s32) instead 423 /// of the intrinsic's own type class. 424 std::string getMangledName(bool ForceClassS = false); 425 /// Return the type code for a builtin function call. 426 std::string getInstTypeCode(Type T, ClassKind CK); 427 /// Return the type string for a BUILTIN() macro in Builtins.def. 428 std::string getBuiltinTypeStr(); 429 430 /// Generate the intrinsic, returning code. 431 std::string generate(); 432 /// Perform type checking and populate the dependency graph, but 433 /// don't generate code yet. 434 void indexBody(); 435 436 private: 437 std::string mangleName(std::string Name, ClassKind CK); 438 439 void initVariables(); 440 std::string replaceParamsIn(std::string S); 441 442 void emitBodyAsBuiltinCall(); 443 444 void generateImpl(bool ReverseArguments, 445 StringRef NamePrefix, StringRef CallPrefix); 446 void emitReturn(); 447 void emitBody(StringRef CallPrefix); 448 void emitShadowedArgs(); 449 void emitArgumentReversal(); 450 void emitReturnReversal(); 451 void emitReverseVariable(Variable &Dest, Variable &Src); 452 void emitNewLine(); 453 void emitClosingBrace(); 454 void emitOpeningBrace(); 455 void emitPrototype(StringRef NamePrefix); 456 457 class DagEmitter { 458 Intrinsic &Intr; 459 StringRef CallPrefix; 460 461 public: 462 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : 463 Intr(Intr), CallPrefix(CallPrefix) { 464 } 465 std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName); 466 std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI); 467 std::pair<Type, std::string> emitDagSplat(DagInit *DI); 468 std::pair<Type, std::string> emitDagDup(DagInit *DI); 469 std::pair<Type, std::string> emitDagShuffle(DagInit *DI); 470 std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); 471 std::pair<Type, std::string> emitDagCall(DagInit *DI); 472 std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); 473 std::pair<Type, std::string> emitDagLiteral(DagInit *DI); 474 std::pair<Type, std::string> emitDagOp(DagInit *DI); 475 std::pair<Type, std::string> emitDag(DagInit *DI); 476 }; 477 478 }; 479 480 //===----------------------------------------------------------------------===// 481 // NeonEmitter 482 //===----------------------------------------------------------------------===// 483 484 class NeonEmitter { 485 RecordKeeper &Records; 486 DenseMap<Record *, ClassKind> ClassMap; 487 std::map<std::string, std::vector<Intrinsic *>> IntrinsicMap; 488 unsigned UniqueNumber; 489 490 void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out); 491 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); 492 void genOverloadTypeCheckCode(raw_ostream &OS, 493 SmallVectorImpl<Intrinsic *> &Defs); 494 void genIntrinsicRangeCheckCode(raw_ostream &OS, 495 SmallVectorImpl<Intrinsic *> &Defs); 496 497 public: 498 /// Called by Intrinsic - this attempts to get an intrinsic that takes 499 /// the given types as arguments. 500 Intrinsic *getIntrinsic(StringRef Name, ArrayRef<Type> Types); 501 502 /// Called by Intrinsic - returns a globally-unique number. 503 unsigned getUniqueNumber() { return UniqueNumber++; } 504 505 NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { 506 Record *SI = R.getClass("SInst"); 507 Record *II = R.getClass("IInst"); 508 Record *WI = R.getClass("WInst"); 509 Record *SOpI = R.getClass("SOpInst"); 510 Record *IOpI = R.getClass("IOpInst"); 511 Record *WOpI = R.getClass("WOpInst"); 512 Record *LOpI = R.getClass("LOpInst"); 513 Record *NoTestOpI = R.getClass("NoTestOpInst"); 514 515 ClassMap[SI] = ClassS; 516 ClassMap[II] = ClassI; 517 ClassMap[WI] = ClassW; 518 ClassMap[SOpI] = ClassS; 519 ClassMap[IOpI] = ClassI; 520 ClassMap[WOpI] = ClassW; 521 ClassMap[LOpI] = ClassL; 522 ClassMap[NoTestOpI] = ClassNoTest; 523 } 524 525 // run - Emit arm_neon.h.inc 526 void run(raw_ostream &o); 527 528 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 529 void runHeader(raw_ostream &o); 530 531 // runTests - Emit tests for all the Neon intrinsics. 532 void runTests(raw_ostream &o); 533 }; 534 535 } // end anonymous namespace 536 537 //===----------------------------------------------------------------------===// 538 // Type implementation 539 //===----------------------------------------------------------------------===// 540 541 std::string Type::str() const { 542 if (Void) 543 return "void"; 544 std::string S; 545 546 if (!Signed && isInteger()) 547 S += "u"; 548 549 if (Poly) 550 S += "poly"; 551 else if (Float) 552 S += "float"; 553 else 554 S += "int"; 555 556 S += utostr(ElementBitwidth); 557 if (isVector()) 558 S += "x" + utostr(getNumElements()); 559 if (NumVectors > 1) 560 S += "x" + utostr(NumVectors); 561 S += "_t"; 562 563 if (Constant) 564 S += " const"; 565 if (Pointer) 566 S += " *"; 567 568 return S; 569 } 570 571 std::string Type::builtin_str() const { 572 std::string S; 573 if (isVoid()) 574 return "v"; 575 576 if (Pointer) 577 // All pointers are void pointers. 578 S += "v"; 579 else if (isInteger()) 580 switch (ElementBitwidth) { 581 case 8: S += "c"; break; 582 case 16: S += "s"; break; 583 case 32: S += "i"; break; 584 case 64: S += "Wi"; break; 585 case 128: S += "LLLi"; break; 586 default: llvm_unreachable("Unhandled case!"); 587 } 588 else 589 switch (ElementBitwidth) { 590 case 16: S += "h"; break; 591 case 32: S += "f"; break; 592 case 64: S += "d"; break; 593 default: llvm_unreachable("Unhandled case!"); 594 } 595 596 if (isChar() && !Pointer) 597 // Make chars explicitly signed. 598 S = "S" + S; 599 else if (isInteger() && !Pointer && !Signed) 600 S = "U" + S; 601 602 if (isScalar()) { 603 if (Constant) S += "C"; 604 if (Pointer) S += "*"; 605 return S; 606 } 607 608 std::string Ret; 609 for (unsigned I = 0; I < NumVectors; ++I) 610 Ret += "V" + utostr(getNumElements()) + S; 611 612 return Ret; 613 } 614 615 unsigned Type::getNeonEnum() const { 616 unsigned Addend; 617 switch (ElementBitwidth) { 618 case 8: Addend = 0; break; 619 case 16: Addend = 1; break; 620 case 32: Addend = 2; break; 621 case 64: Addend = 3; break; 622 case 128: Addend = 4; break; 623 default: llvm_unreachable("Unhandled element bitwidth!"); 624 } 625 626 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; 627 if (Poly) { 628 // Adjustment needed because Poly32 doesn't exist. 629 if (Addend >= 2) 630 --Addend; 631 Base = (unsigned)NeonTypeFlags::Poly8 + Addend; 632 } 633 if (Float) { 634 assert(Addend != 0 && "Float8 doesn't exist!"); 635 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); 636 } 637 638 if (Bitwidth == 128) 639 Base |= (unsigned)NeonTypeFlags::QuadFlag; 640 if (isInteger() && !Signed) 641 Base |= (unsigned)NeonTypeFlags::UnsignedFlag; 642 643 return Base; 644 } 645 646 Type Type::fromTypedefName(StringRef Name) { 647 Type T; 648 T.Void = false; 649 T.Float = false; 650 T.Poly = false; 651 652 if (Name.front() == 'u') { 653 T.Signed = false; 654 Name = Name.drop_front(); 655 } else { 656 T.Signed = true; 657 } 658 659 if (Name.startswith("float")) { 660 T.Float = true; 661 Name = Name.drop_front(5); 662 } else if (Name.startswith("poly")) { 663 T.Poly = true; 664 Name = Name.drop_front(4); 665 } else { 666 assert(Name.startswith("int")); 667 Name = Name.drop_front(3); 668 } 669 670 unsigned I = 0; 671 for (I = 0; I < Name.size(); ++I) { 672 if (!isdigit(Name[I])) 673 break; 674 } 675 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); 676 Name = Name.drop_front(I); 677 678 T.Bitwidth = T.ElementBitwidth; 679 T.NumVectors = 1; 680 681 if (Name.front() == 'x') { 682 Name = Name.drop_front(); 683 unsigned I = 0; 684 for (I = 0; I < Name.size(); ++I) { 685 if (!isdigit(Name[I])) 686 break; 687 } 688 unsigned NumLanes; 689 Name.substr(0, I).getAsInteger(10, NumLanes); 690 Name = Name.drop_front(I); 691 T.Bitwidth = T.ElementBitwidth * NumLanes; 692 } else { 693 // Was scalar. 694 T.NumVectors = 0; 695 } 696 if (Name.front() == 'x') { 697 Name = Name.drop_front(); 698 unsigned I = 0; 699 for (I = 0; I < Name.size(); ++I) { 700 if (!isdigit(Name[I])) 701 break; 702 } 703 Name.substr(0, I).getAsInteger(10, T.NumVectors); 704 Name = Name.drop_front(I); 705 } 706 707 assert(Name.startswith("_t") && "Malformed typedef!"); 708 return T; 709 } 710 711 void Type::applyTypespec(bool &Quad) { 712 std::string S = TS; 713 ScalarForMangling = false; 714 Void = false; 715 Poly = Float = false; 716 ElementBitwidth = ~0U; 717 Signed = true; 718 NumVectors = 1; 719 720 for (char I : S) { 721 switch (I) { 722 case 'S': 723 ScalarForMangling = true; 724 break; 725 case 'H': 726 NoManglingQ = true; 727 Quad = true; 728 break; 729 case 'Q': 730 Quad = true; 731 break; 732 case 'P': 733 Poly = true; 734 break; 735 case 'U': 736 Signed = false; 737 break; 738 case 'c': 739 ElementBitwidth = 8; 740 break; 741 case 'h': 742 Float = true; 743 // Fall through 744 case 's': 745 ElementBitwidth = 16; 746 break; 747 case 'f': 748 Float = true; 749 // Fall through 750 case 'i': 751 ElementBitwidth = 32; 752 break; 753 case 'd': 754 Float = true; 755 // Fall through 756 case 'l': 757 ElementBitwidth = 64; 758 break; 759 case 'k': 760 ElementBitwidth = 128; 761 // Poly doesn't have a 128x1 type. 762 if (Poly) 763 NumVectors = 0; 764 break; 765 default: 766 llvm_unreachable("Unhandled type code!"); 767 } 768 } 769 assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); 770 771 Bitwidth = Quad ? 128 : 64; 772 } 773 774 void Type::applyModifier(char Mod) { 775 bool AppliedQuad = false; 776 applyTypespec(AppliedQuad); 777 778 switch (Mod) { 779 case 'v': 780 Void = true; 781 break; 782 case 't': 783 if (Poly) { 784 Poly = false; 785 Signed = false; 786 } 787 break; 788 case 'b': 789 Signed = false; 790 Float = false; 791 Poly = false; 792 NumVectors = 0; 793 Bitwidth = ElementBitwidth; 794 break; 795 case '$': 796 Signed = true; 797 Float = false; 798 Poly = false; 799 NumVectors = 0; 800 Bitwidth = ElementBitwidth; 801 break; 802 case 'u': 803 Signed = false; 804 Poly = false; 805 Float = false; 806 break; 807 case 'x': 808 Signed = true; 809 assert(!Poly && "'u' can't be used with poly types!"); 810 Float = false; 811 break; 812 case 'o': 813 Bitwidth = ElementBitwidth = 64; 814 NumVectors = 0; 815 Float = true; 816 break; 817 case 'y': 818 Bitwidth = ElementBitwidth = 32; 819 NumVectors = 0; 820 Float = true; 821 break; 822 case 'f': 823 // Special case - if we're half-precision, a floating 824 // point argument needs to be 128-bits (double size). 825 if (isHalf()) 826 Bitwidth = 128; 827 Float = true; 828 ElementBitwidth = 32; 829 break; 830 case 'F': 831 Float = true; 832 ElementBitwidth = 64; 833 break; 834 case 'g': 835 if (AppliedQuad) 836 Bitwidth /= 2; 837 break; 838 case 'j': 839 if (!AppliedQuad) 840 Bitwidth *= 2; 841 break; 842 case 'w': 843 ElementBitwidth *= 2; 844 Bitwidth *= 2; 845 break; 846 case 'n': 847 ElementBitwidth *= 2; 848 break; 849 case 'i': 850 Float = false; 851 Poly = false; 852 ElementBitwidth = Bitwidth = 32; 853 NumVectors = 0; 854 Signed = true; 855 break; 856 case 'l': 857 Float = false; 858 Poly = false; 859 ElementBitwidth = Bitwidth = 64; 860 NumVectors = 0; 861 Signed = false; 862 break; 863 case 'z': 864 ElementBitwidth /= 2; 865 Bitwidth = ElementBitwidth; 866 NumVectors = 0; 867 break; 868 case 'r': 869 ElementBitwidth *= 2; 870 Bitwidth = ElementBitwidth; 871 NumVectors = 0; 872 break; 873 case 's': 874 case 'a': 875 Bitwidth = ElementBitwidth; 876 NumVectors = 0; 877 break; 878 case 'k': 879 Bitwidth *= 2; 880 break; 881 case 'c': 882 Constant = true; 883 // Fall through 884 case 'p': 885 Pointer = true; 886 Bitwidth = ElementBitwidth; 887 NumVectors = 0; 888 break; 889 case 'h': 890 ElementBitwidth /= 2; 891 break; 892 case 'q': 893 ElementBitwidth /= 2; 894 Bitwidth *= 2; 895 break; 896 case 'e': 897 ElementBitwidth /= 2; 898 Signed = false; 899 break; 900 case 'm': 901 ElementBitwidth /= 2; 902 Bitwidth /= 2; 903 break; 904 case 'd': 905 break; 906 case '2': 907 NumVectors = 2; 908 break; 909 case '3': 910 NumVectors = 3; 911 break; 912 case '4': 913 NumVectors = 4; 914 break; 915 case 'B': 916 NumVectors = 2; 917 if (!AppliedQuad) 918 Bitwidth *= 2; 919 break; 920 case 'C': 921 NumVectors = 3; 922 if (!AppliedQuad) 923 Bitwidth *= 2; 924 break; 925 case 'D': 926 NumVectors = 4; 927 if (!AppliedQuad) 928 Bitwidth *= 2; 929 break; 930 default: 931 llvm_unreachable("Unhandled character!"); 932 } 933 } 934 935 //===----------------------------------------------------------------------===// 936 // Intrinsic implementation 937 //===----------------------------------------------------------------------===// 938 939 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) { 940 char typeCode = '\0'; 941 bool printNumber = true; 942 943 if (CK == ClassB) 944 return ""; 945 946 if (T.isPoly()) 947 typeCode = 'p'; 948 else if (T.isInteger()) 949 typeCode = T.isSigned() ? 's' : 'u'; 950 else 951 typeCode = 'f'; 952 953 if (CK == ClassI) { 954 switch (typeCode) { 955 default: 956 break; 957 case 's': 958 case 'u': 959 case 'p': 960 typeCode = 'i'; 961 break; 962 } 963 } 964 if (CK == ClassB) { 965 typeCode = '\0'; 966 } 967 968 std::string S; 969 if (typeCode != '\0') 970 S.push_back(typeCode); 971 if (printNumber) 972 S += utostr(T.getElementSizeInBits()); 973 974 return S; 975 } 976 977 std::string Intrinsic::getBuiltinTypeStr() { 978 ClassKind LocalCK = getClassKind(true); 979 std::string S; 980 981 Type RetT = getReturnType(); 982 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && 983 !RetT.isFloating()) 984 RetT.makeInteger(RetT.getElementSizeInBits(), false); 985 986 // Since the return value must be one type, return a vector type of the 987 // appropriate width which we will bitcast. An exception is made for 988 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 989 // fashion, storing them to a pointer arg. 990 if (RetT.getNumVectors() > 1) { 991 S += "vv*"; // void result with void* first argument 992 } else { 993 if (RetT.isPoly()) 994 RetT.makeInteger(RetT.getElementSizeInBits(), false); 995 if (!RetT.isScalar() && !RetT.isSigned()) 996 RetT.makeSigned(); 997 998 bool ForcedVectorFloatingType = Proto[0] == 'F' || Proto[0] == 'f'; 999 if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType) 1000 // Cast to vector of 8-bit elements. 1001 RetT.makeInteger(8, true); 1002 1003 S += RetT.builtin_str(); 1004 } 1005 1006 for (unsigned I = 0; I < getNumParams(); ++I) { 1007 Type T = getParamType(I); 1008 if (T.isPoly()) 1009 T.makeInteger(T.getElementSizeInBits(), false); 1010 1011 bool ForcedFloatingType = Proto[I + 1] == 'F' || Proto[I + 1] == 'f'; 1012 if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType) 1013 T.makeInteger(8, true); 1014 // Halves always get converted to 8-bit elements. 1015 if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) 1016 T.makeInteger(8, true); 1017 1018 if (LocalCK == ClassI) 1019 T.makeSigned(); 1020 1021 // Constant indices are always just "int". 1022 if (hasImmediate() && getImmediateIdx() == I) 1023 T.makeInteger(32, true); 1024 1025 S += T.builtin_str(); 1026 } 1027 1028 // Extra constant integer to hold type class enum for this function, e.g. s8 1029 if (LocalCK == ClassB) 1030 S += "i"; 1031 1032 return S; 1033 } 1034 1035 std::string Intrinsic::getMangledName(bool ForceClassS) { 1036 // Check if the prototype has a scalar operand with the type of the vector 1037 // elements. If not, bitcasting the args will take care of arg checking. 1038 // The actual signedness etc. will be taken care of with special enums. 1039 ClassKind LocalCK = CK; 1040 if (!protoHasScalar()) 1041 LocalCK = ClassB; 1042 1043 return mangleName(Name, ForceClassS ? ClassS : LocalCK); 1044 } 1045 1046 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) { 1047 std::string typeCode = getInstTypeCode(BaseType, LocalCK); 1048 std::string S = Name; 1049 1050 if (Name == "vcvt_f32_f16" || Name == "vcvt_f32_f64" || 1051 Name == "vcvt_f64_f32") 1052 return Name; 1053 1054 if (typeCode.size() > 0) { 1055 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. 1056 if (Name.size() >= 3 && isdigit(Name.back()) && 1057 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') 1058 S.insert(S.length() - 3, "_" + typeCode); 1059 else 1060 S += "_" + typeCode; 1061 } 1062 1063 if (BaseType != InBaseType) { 1064 // A reinterpret - out the input base type at the end. 1065 S += "_" + getInstTypeCode(InBaseType, LocalCK); 1066 } 1067 1068 if (LocalCK == ClassB) 1069 S += "_v"; 1070 1071 // Insert a 'q' before the first '_' character so that it ends up before 1072 // _lane or _n on vector-scalar operations. 1073 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { 1074 size_t Pos = S.find('_'); 1075 S.insert(Pos, "q"); 1076 } 1077 1078 char Suffix = '\0'; 1079 if (BaseType.isScalarForMangling()) { 1080 switch (BaseType.getElementSizeInBits()) { 1081 case 8: Suffix = 'b'; break; 1082 case 16: Suffix = 'h'; break; 1083 case 32: Suffix = 's'; break; 1084 case 64: Suffix = 'd'; break; 1085 default: llvm_unreachable("Bad suffix!"); 1086 } 1087 } 1088 if (Suffix != '\0') { 1089 size_t Pos = S.find('_'); 1090 S.insert(Pos, &Suffix, 1); 1091 } 1092 1093 return S; 1094 } 1095 1096 std::string Intrinsic::replaceParamsIn(std::string S) { 1097 while (S.find('$') != std::string::npos) { 1098 size_t Pos = S.find('$'); 1099 size_t End = Pos + 1; 1100 while (isalpha(S[End])) 1101 ++End; 1102 1103 std::string VarName = S.substr(Pos + 1, End - Pos - 1); 1104 assert_with_loc(Variables.find(VarName) != Variables.end(), 1105 "Variable not defined!"); 1106 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); 1107 } 1108 1109 return S; 1110 } 1111 1112 void Intrinsic::initVariables() { 1113 Variables.clear(); 1114 1115 // Modify the TypeSpec per-argument to get a concrete Type, and create 1116 // known variables for each. 1117 for (unsigned I = 1; I < Proto.size(); ++I) { 1118 char NameC = '0' + (I - 1); 1119 std::string Name = "p"; 1120 Name.push_back(NameC); 1121 1122 Variables[Name] = Variable(Types[I], Name + VariablePostfix); 1123 } 1124 RetVar = Variable(Types[0], "ret" + VariablePostfix); 1125 } 1126 1127 void Intrinsic::emitPrototype(StringRef NamePrefix) { 1128 if (UseMacro) 1129 OS << "#define "; 1130 else 1131 OS << "__ai " << Types[0].str() << " "; 1132 1133 OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; 1134 1135 for (unsigned I = 0; I < getNumParams(); ++I) { 1136 if (I != 0) 1137 OS << ", "; 1138 1139 char NameC = '0' + I; 1140 std::string Name = "p"; 1141 Name.push_back(NameC); 1142 assert(Variables.find(Name) != Variables.end()); 1143 Variable &V = Variables[Name]; 1144 1145 if (!UseMacro) 1146 OS << V.getType().str() << " "; 1147 OS << V.getName(); 1148 } 1149 1150 OS << ")"; 1151 } 1152 1153 void Intrinsic::emitOpeningBrace() { 1154 if (UseMacro) 1155 OS << " __extension__ ({"; 1156 else 1157 OS << " {"; 1158 emitNewLine(); 1159 } 1160 1161 void Intrinsic::emitClosingBrace() { 1162 if (UseMacro) 1163 OS << "})"; 1164 else 1165 OS << "}"; 1166 } 1167 1168 void Intrinsic::emitNewLine() { 1169 if (UseMacro) 1170 OS << " \\\n"; 1171 else 1172 OS << "\n"; 1173 } 1174 1175 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { 1176 if (Dest.getType().getNumVectors() > 1) { 1177 emitNewLine(); 1178 1179 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { 1180 OS << " " << Dest.getName() << ".val[" << utostr(K) << "] = " 1181 << "__builtin_shufflevector(" 1182 << Src.getName() << ".val[" << utostr(K) << "], " 1183 << Src.getName() << ".val[" << utostr(K) << "]"; 1184 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1185 OS << ", " << utostr(J); 1186 OS << ");"; 1187 emitNewLine(); 1188 } 1189 } else { 1190 OS << " " << Dest.getName() 1191 << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); 1192 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1193 OS << ", " << utostr(J); 1194 OS << ");"; 1195 emitNewLine(); 1196 } 1197 } 1198 1199 void Intrinsic::emitArgumentReversal() { 1200 if (BigEndianSafe) 1201 return; 1202 1203 // Reverse all vector arguments. 1204 for (unsigned I = 0; I < getNumParams(); ++I) { 1205 std::string Name = "p" + utostr(I); 1206 std::string NewName = "rev" + utostr(I); 1207 1208 Variable &V = Variables[Name]; 1209 Variable NewV(V.getType(), NewName + VariablePostfix); 1210 1211 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) 1212 continue; 1213 1214 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; 1215 emitReverseVariable(NewV, V); 1216 V = NewV; 1217 } 1218 } 1219 1220 void Intrinsic::emitReturnReversal() { 1221 if (BigEndianSafe) 1222 return; 1223 if (!getReturnType().isVector() || getReturnType().isVoid() || 1224 getReturnType().getNumElements() == 1) 1225 return; 1226 emitReverseVariable(RetVar, RetVar); 1227 } 1228 1229 1230 void Intrinsic::emitShadowedArgs() { 1231 // Macro arguments are not type-checked like inline function arguments, 1232 // so assign them to local temporaries to get the right type checking. 1233 if (!UseMacro) 1234 return; 1235 1236 for (unsigned I = 0; I < getNumParams(); ++I) { 1237 // Do not create a temporary for an immediate argument. 1238 // That would defeat the whole point of using a macro! 1239 if (hasImmediate() && Proto[I+1] == 'i') 1240 continue; 1241 // Do not create a temporary for pointer arguments. The input 1242 // pointer may have an alignment hint. 1243 if (getParamType(I).isPointer()) 1244 continue; 1245 1246 std::string Name = "p" + utostr(I); 1247 1248 assert(Variables.find(Name) != Variables.end()); 1249 Variable &V = Variables[Name]; 1250 1251 std::string NewName = "s" + utostr(I); 1252 Variable V2(V.getType(), NewName + VariablePostfix); 1253 1254 OS << " " << V2.getType().str() << " " << V2.getName() << " = " 1255 << V.getName() << ";"; 1256 emitNewLine(); 1257 1258 V = V2; 1259 } 1260 } 1261 1262 // We don't check 'a' in this function, because for builtin function the 1263 // argument matching to 'a' uses a vector type splatted from a scalar type. 1264 bool Intrinsic::protoHasScalar() { 1265 return (Proto.find('s') != std::string::npos || 1266 Proto.find('z') != std::string::npos || 1267 Proto.find('r') != std::string::npos || 1268 Proto.find('b') != std::string::npos || 1269 Proto.find('$') != std::string::npos || 1270 Proto.find('y') != std::string::npos || 1271 Proto.find('o') != std::string::npos); 1272 } 1273 1274 void Intrinsic::emitBodyAsBuiltinCall() { 1275 std::string S; 1276 1277 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1278 // sret-like argument. 1279 bool SRet = getReturnType().getNumVectors() >= 2; 1280 1281 StringRef N = Name; 1282 if (hasSplat()) { 1283 // Call the non-splat builtin: chop off the "_n" suffix from the name. 1284 assert(N.endswith("_n")); 1285 N = N.drop_back(2); 1286 } 1287 1288 ClassKind LocalCK = CK; 1289 if (!protoHasScalar()) 1290 LocalCK = ClassB; 1291 1292 if (!getReturnType().isVoid() && !SRet) 1293 S += "(" + RetVar.getType().str() + ") "; 1294 1295 S += "__builtin_neon_" + mangleName(N, LocalCK) + "("; 1296 1297 if (SRet) 1298 S += "&" + RetVar.getName() + ", "; 1299 1300 for (unsigned I = 0; I < getNumParams(); ++I) { 1301 Variable &V = Variables["p" + utostr(I)]; 1302 Type T = V.getType(); 1303 1304 // Handle multiple-vector values specially, emitting each subvector as an 1305 // argument to the builtin. 1306 if (T.getNumVectors() > 1) { 1307 // Check if an explicit cast is needed. 1308 std::string Cast; 1309 if (T.isChar() || T.isPoly() || !T.isSigned()) { 1310 Type T2 = T; 1311 T2.makeOneVector(); 1312 T2.makeInteger(8, /*Signed=*/true); 1313 Cast = "(" + T2.str() + ")"; 1314 } 1315 1316 for (unsigned J = 0; J < T.getNumVectors(); ++J) 1317 S += Cast + V.getName() + ".val[" + utostr(J) + "], "; 1318 continue; 1319 } 1320 1321 std::string Arg; 1322 Type CastToType = T; 1323 if (hasSplat() && I == getSplatIdx()) { 1324 Arg = "(" + BaseType.str() + ") {"; 1325 for (unsigned J = 0; J < BaseType.getNumElements(); ++J) { 1326 if (J != 0) 1327 Arg += ", "; 1328 Arg += V.getName(); 1329 } 1330 Arg += "}"; 1331 1332 CastToType = BaseType; 1333 } else { 1334 Arg = V.getName(); 1335 } 1336 1337 // Check if an explicit cast is needed. 1338 if (CastToType.isVector()) { 1339 CastToType.makeInteger(8, true); 1340 Arg = "(" + CastToType.str() + ")" + Arg; 1341 } 1342 1343 S += Arg + ", "; 1344 } 1345 1346 // Extra constant integer to hold type class enum for this function, e.g. s8 1347 if (getClassKind(true) == ClassB) { 1348 Type ThisTy = getReturnType(); 1349 if (Proto[0] == 'v' || Proto[0] == 'f' || Proto[0] == 'F') 1350 ThisTy = getParamType(0); 1351 if (ThisTy.isPointer()) 1352 ThisTy = getParamType(1); 1353 1354 S += utostr(ThisTy.getNeonEnum()); 1355 } else { 1356 // Remove extraneous ", ". 1357 S.pop_back(); 1358 S.pop_back(); 1359 } 1360 S += ");"; 1361 1362 std::string RetExpr; 1363 if (!SRet && !RetVar.getType().isVoid()) 1364 RetExpr = RetVar.getName() + " = "; 1365 1366 OS << " " << RetExpr << S; 1367 emitNewLine(); 1368 } 1369 1370 void Intrinsic::emitBody(StringRef CallPrefix) { 1371 std::vector<std::string> Lines; 1372 1373 assert(RetVar.getType() == Types[0]); 1374 // Create a return variable, if we're not void. 1375 if (!RetVar.getType().isVoid()) { 1376 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; 1377 emitNewLine(); 1378 } 1379 1380 if (!Body || Body->getValues().size() == 0) { 1381 // Nothing specific to output - must output a builtin. 1382 emitBodyAsBuiltinCall(); 1383 return; 1384 } 1385 1386 // We have a list of "things to output". The last should be returned. 1387 for (auto *I : Body->getValues()) { 1388 if (StringInit *SI = dyn_cast<StringInit>(I)) { 1389 Lines.push_back(replaceParamsIn(SI->getAsString())); 1390 } else if (DagInit *DI = dyn_cast<DagInit>(I)) { 1391 DagEmitter DE(*this, CallPrefix); 1392 Lines.push_back(DE.emitDag(DI).second + ";"); 1393 } 1394 } 1395 1396 assert(Lines.size() && "Empty def?"); 1397 if (!RetVar.getType().isVoid()) 1398 Lines.back().insert(0, RetVar.getName() + " = "); 1399 1400 for (auto &L : Lines) { 1401 OS << " " << L; 1402 emitNewLine(); 1403 } 1404 } 1405 1406 void Intrinsic::emitReturn() { 1407 if (RetVar.getType().isVoid()) 1408 return; 1409 if (UseMacro) 1410 OS << " " << RetVar.getName() << ";"; 1411 else 1412 OS << " return " << RetVar.getName() << ";"; 1413 emitNewLine(); 1414 } 1415 1416 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { 1417 // At this point we should only be seeing a def. 1418 DefInit *DefI = cast<DefInit>(DI->getOperator()); 1419 std::string Op = DefI->getAsString(); 1420 1421 if (Op == "cast" || Op == "bitcast") 1422 return emitDagCast(DI, Op == "bitcast"); 1423 if (Op == "shuffle") 1424 return emitDagShuffle(DI); 1425 if (Op == "dup") 1426 return emitDagDup(DI); 1427 if (Op == "splat") 1428 return emitDagSplat(DI); 1429 if (Op == "save_temp") 1430 return emitDagSaveTemp(DI); 1431 if (Op == "op") 1432 return emitDagOp(DI); 1433 if (Op == "call") 1434 return emitDagCall(DI); 1435 if (Op == "name_replace") 1436 return emitDagNameReplace(DI); 1437 if (Op == "literal") 1438 return emitDagLiteral(DI); 1439 assert_with_loc(false, "Unknown operation!"); 1440 return std::make_pair(Type::getVoid(), ""); 1441 } 1442 1443 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { 1444 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1445 if (DI->getNumArgs() == 2) { 1446 // Unary op. 1447 std::pair<Type, std::string> R = 1448 emitDagArg(DI->getArg(1), DI->getArgName(1)); 1449 return std::make_pair(R.first, Op + R.second); 1450 } else { 1451 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); 1452 std::pair<Type, std::string> R1 = 1453 emitDagArg(DI->getArg(1), DI->getArgName(1)); 1454 std::pair<Type, std::string> R2 = 1455 emitDagArg(DI->getArg(2), DI->getArgName(2)); 1456 assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); 1457 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); 1458 } 1459 } 1460 1461 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) { 1462 std::vector<Type> Types; 1463 std::vector<std::string> Values; 1464 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1465 std::pair<Type, std::string> R = 1466 emitDagArg(DI->getArg(I + 1), DI->getArgName(I + 1)); 1467 Types.push_back(R.first); 1468 Values.push_back(R.second); 1469 } 1470 1471 // Look up the called intrinsic. 1472 std::string N; 1473 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) 1474 N = SI->getAsUnquotedString(); 1475 else 1476 N = emitDagArg(DI->getArg(0), "").second; 1477 Intrinsic *Callee = Intr.Emitter.getIntrinsic(N, Types); 1478 assert(Callee && "getIntrinsic should not return us nullptr!"); 1479 1480 // Make sure the callee is known as an early def. 1481 Callee->setNeededEarly(); 1482 Intr.Dependencies.insert(Callee); 1483 1484 // Now create the call itself. 1485 std::string S = CallPrefix.str() + Callee->getMangledName(true) + "("; 1486 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1487 if (I != 0) 1488 S += ", "; 1489 S += Values[I]; 1490 } 1491 S += ")"; 1492 1493 return std::make_pair(Callee->getReturnType(), S); 1494 } 1495 1496 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, 1497 bool IsBitCast){ 1498 // (cast MOD* VAL) -> cast VAL to type given by MOD. 1499 std::pair<Type, std::string> R = emitDagArg( 1500 DI->getArg(DI->getNumArgs() - 1), DI->getArgName(DI->getNumArgs() - 1)); 1501 Type castToType = R.first; 1502 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { 1503 1504 // MOD can take several forms: 1505 // 1. $X - take the type of parameter / variable X. 1506 // 2. The value "R" - take the type of the return type. 1507 // 3. a type string 1508 // 4. The value "U" or "S" to switch the signedness. 1509 // 5. The value "H" or "D" to half or double the bitwidth. 1510 // 6. The value "8" to convert to 8-bit (signed) integer lanes. 1511 if (DI->getArgName(ArgIdx).size()) { 1512 assert_with_loc(Intr.Variables.find(DI->getArgName(ArgIdx)) != 1513 Intr.Variables.end(), 1514 "Variable not found"); 1515 castToType = Intr.Variables[DI->getArgName(ArgIdx)].getType(); 1516 } else { 1517 StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); 1518 assert_with_loc(SI, "Expected string type or $Name for cast type"); 1519 1520 if (SI->getAsUnquotedString() == "R") { 1521 castToType = Intr.getReturnType(); 1522 } else if (SI->getAsUnquotedString() == "U") { 1523 castToType.makeUnsigned(); 1524 } else if (SI->getAsUnquotedString() == "S") { 1525 castToType.makeSigned(); 1526 } else if (SI->getAsUnquotedString() == "H") { 1527 castToType.halveLanes(); 1528 } else if (SI->getAsUnquotedString() == "D") { 1529 castToType.doubleLanes(); 1530 } else if (SI->getAsUnquotedString() == "8") { 1531 castToType.makeInteger(8, true); 1532 } else { 1533 castToType = Type::fromTypedefName(SI->getAsUnquotedString()); 1534 assert_with_loc(!castToType.isVoid(), "Unknown typedef"); 1535 } 1536 } 1537 } 1538 1539 std::string S; 1540 if (IsBitCast) { 1541 // Emit a reinterpret cast. The second operand must be an lvalue, so create 1542 // a temporary. 1543 std::string N = "reint"; 1544 unsigned I = 0; 1545 while (Intr.Variables.find(N) != Intr.Variables.end()) 1546 N = "reint" + utostr(++I); 1547 Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix); 1548 1549 Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = " 1550 << R.second << ";"; 1551 Intr.emitNewLine(); 1552 1553 S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + ""; 1554 } else { 1555 // Emit a normal (static) cast. 1556 S = "(" + castToType.str() + ")(" + R.second + ")"; 1557 } 1558 1559 return std::make_pair(castToType, S); 1560 } 1561 1562 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ 1563 // See the documentation in arm_neon.td for a description of these operators. 1564 class LowHalf : public SetTheory::Operator { 1565 public: 1566 virtual void anchor() {} 1567 virtual ~LowHalf() {} 1568 virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1569 ArrayRef<SMLoc> Loc) { 1570 SetTheory::RecSet Elts2; 1571 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1572 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); 1573 } 1574 }; 1575 class HighHalf : public SetTheory::Operator { 1576 public: 1577 virtual void anchor() {} 1578 virtual ~HighHalf() {} 1579 virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1580 ArrayRef<SMLoc> Loc) { 1581 SetTheory::RecSet Elts2; 1582 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1583 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); 1584 } 1585 }; 1586 class Rev : public SetTheory::Operator { 1587 unsigned ElementSize; 1588 1589 public: 1590 Rev(unsigned ElementSize) : ElementSize(ElementSize) {} 1591 virtual void anchor() {} 1592 virtual ~Rev() {} 1593 virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1594 ArrayRef<SMLoc> Loc) { 1595 SetTheory::RecSet Elts2; 1596 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); 1597 1598 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); 1599 VectorSize /= ElementSize; 1600 1601 std::vector<Record *> Revved; 1602 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { 1603 for (int LI = VectorSize - 1; LI >= 0; --LI) { 1604 Revved.push_back(Elts2[VI + LI]); 1605 } 1606 } 1607 1608 Elts.insert(Revved.begin(), Revved.end()); 1609 } 1610 }; 1611 class MaskExpander : public SetTheory::Expander { 1612 unsigned N; 1613 1614 public: 1615 MaskExpander(unsigned N) : N(N) {} 1616 virtual void anchor() {} 1617 virtual ~MaskExpander() {} 1618 virtual void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) { 1619 unsigned Addend = 0; 1620 if (R->getName() == "mask0") 1621 Addend = 0; 1622 else if (R->getName() == "mask1") 1623 Addend = N; 1624 else 1625 return; 1626 for (unsigned I = 0; I < N; ++I) 1627 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); 1628 } 1629 }; 1630 1631 // (shuffle arg1, arg2, sequence) 1632 std::pair<Type, std::string> Arg1 = 1633 emitDagArg(DI->getArg(0), DI->getArgName(0)); 1634 std::pair<Type, std::string> Arg2 = 1635 emitDagArg(DI->getArg(1), DI->getArgName(1)); 1636 assert_with_loc(Arg1.first == Arg2.first, 1637 "Different types in arguments to shuffle!"); 1638 1639 SetTheory ST; 1640 LowHalf LH; 1641 HighHalf HH; 1642 MaskExpander ME(Arg1.first.getNumElements()); 1643 Rev R(Arg1.first.getElementSizeInBits()); 1644 SetTheory::RecSet Elts; 1645 ST.addOperator("lowhalf", &LH); 1646 ST.addOperator("highhalf", &HH); 1647 ST.addOperator("rev", &R); 1648 ST.addExpander("MaskExpand", &ME); 1649 ST.evaluate(DI->getArg(2), Elts, ArrayRef<SMLoc>()); 1650 1651 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; 1652 for (auto &E : Elts) { 1653 StringRef Name = E->getName(); 1654 assert_with_loc(Name.startswith("sv"), 1655 "Incorrect element kind in shuffle mask!"); 1656 S += ", " + Name.drop_front(2).str(); 1657 } 1658 S += ")"; 1659 1660 // Recalculate the return type - the shuffle may have halved or doubled it. 1661 Type T(Arg1.first); 1662 if (Elts.size() > T.getNumElements()) { 1663 assert_with_loc( 1664 Elts.size() == T.getNumElements() * 2, 1665 "Can only double or half the number of elements in a shuffle!"); 1666 T.doubleLanes(); 1667 } else if (Elts.size() < T.getNumElements()) { 1668 assert_with_loc( 1669 Elts.size() == T.getNumElements() / 2, 1670 "Can only double or half the number of elements in a shuffle!"); 1671 T.halveLanes(); 1672 } 1673 1674 return std::make_pair(T, S); 1675 } 1676 1677 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { 1678 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); 1679 std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0)); 1680 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); 1681 1682 Type T = Intr.getBaseType(); 1683 assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); 1684 std::string S = "(" + T.str() + ") {"; 1685 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1686 if (I != 0) 1687 S += ", "; 1688 S += A.second; 1689 } 1690 S += "}"; 1691 1692 return std::make_pair(T, S); 1693 } 1694 1695 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { 1696 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); 1697 std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0)); 1698 std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), DI->getArgName(1)); 1699 1700 assert_with_loc(B.first.isScalar(), 1701 "splat() requires a scalar int as the second argument"); 1702 1703 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; 1704 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { 1705 S += ", " + B.second; 1706 } 1707 S += ")"; 1708 1709 return std::make_pair(Intr.getBaseType(), S); 1710 } 1711 1712 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { 1713 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); 1714 std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), DI->getArgName(1)); 1715 1716 assert_with_loc(!A.first.isVoid(), 1717 "Argument to save_temp() must have non-void type!"); 1718 1719 std::string N = DI->getArgName(0); 1720 assert_with_loc(N.size(), "save_temp() expects a name as the first argument"); 1721 1722 assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), 1723 "Variable already defined!"); 1724 Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); 1725 1726 std::string S = 1727 A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; 1728 1729 return std::make_pair(Type::getVoid(), S); 1730 } 1731 1732 std::pair<Type, std::string> 1733 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) { 1734 std::string S = Intr.Name; 1735 1736 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); 1737 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1738 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1739 1740 size_t Idx = S.find(ToReplace); 1741 1742 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); 1743 S.replace(Idx, ToReplace.size(), ReplaceWith); 1744 1745 return std::make_pair(Type::getVoid(), S); 1746 } 1747 1748 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){ 1749 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1750 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1751 return std::make_pair(Type::fromTypedefName(Ty), Value); 1752 } 1753 1754 std::pair<Type, std::string> 1755 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) { 1756 if (ArgName.size()) { 1757 assert_with_loc(!Arg->isComplete(), 1758 "Arguments must either be DAGs or names, not both!"); 1759 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), 1760 "Variable not defined!"); 1761 Variable &V = Intr.Variables[ArgName]; 1762 return std::make_pair(V.getType(), V.getName()); 1763 } 1764 1765 assert(Arg && "Neither ArgName nor Arg?!"); 1766 DagInit *DI = dyn_cast<DagInit>(Arg); 1767 assert_with_loc(DI, "Arguments must either be DAGs or names!"); 1768 1769 return emitDag(DI); 1770 } 1771 1772 std::string Intrinsic::generate() { 1773 // Little endian intrinsics are simple and don't require any argument 1774 // swapping. 1775 OS << "#ifdef __LITTLE_ENDIAN__\n"; 1776 1777 generateImpl(false, "", ""); 1778 1779 OS << "#else\n"; 1780 1781 // Big endian intrinsics are more complex. The user intended these 1782 // intrinsics to operate on a vector "as-if" loaded by (V)LDR, 1783 // but we load as-if (V)LD1. So we should swap all arguments and 1784 // swap the return value too. 1785 // 1786 // If we call sub-intrinsics, we should call a version that does 1787 // not re-swap the arguments! 1788 generateImpl(true, "", "__noswap_"); 1789 1790 // If we're needed early, create a non-swapping variant for 1791 // big-endian. 1792 if (NeededEarly) { 1793 generateImpl(false, "__noswap_", "__noswap_"); 1794 } 1795 OS << "#endif\n\n"; 1796 1797 return OS.str(); 1798 } 1799 1800 void Intrinsic::generateImpl(bool ReverseArguments, 1801 StringRef NamePrefix, StringRef CallPrefix) { 1802 CurrentRecord = R; 1803 1804 // If we call a macro, our local variables may be corrupted due to 1805 // lack of proper lexical scoping. So, add a globally unique postfix 1806 // to every variable. 1807 // 1808 // indexBody() should have set up the Dependencies set by now. 1809 for (auto *I : Dependencies) 1810 if (I->UseMacro) { 1811 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); 1812 break; 1813 } 1814 1815 initVariables(); 1816 1817 emitPrototype(NamePrefix); 1818 1819 if (IsUnavailable) { 1820 OS << " __attribute__((unavailable));"; 1821 } else { 1822 emitOpeningBrace(); 1823 emitShadowedArgs(); 1824 if (ReverseArguments) 1825 emitArgumentReversal(); 1826 emitBody(CallPrefix); 1827 if (ReverseArguments) 1828 emitReturnReversal(); 1829 emitReturn(); 1830 emitClosingBrace(); 1831 } 1832 OS << "\n"; 1833 1834 CurrentRecord = nullptr; 1835 } 1836 1837 void Intrinsic::indexBody() { 1838 CurrentRecord = R; 1839 1840 initVariables(); 1841 emitBody(""); 1842 OS.str(""); 1843 1844 CurrentRecord = nullptr; 1845 } 1846 1847 //===----------------------------------------------------------------------===// 1848 // NeonEmitter implementation 1849 //===----------------------------------------------------------------------===// 1850 1851 Intrinsic *NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) { 1852 // First, look up the name in the intrinsic map. 1853 assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), 1854 ("Intrinsic '" + Name + "' not found!").str()); 1855 std::vector<Intrinsic *> &V = IntrinsicMap[Name.str()]; 1856 std::vector<Intrinsic *> GoodVec; 1857 1858 // Create a string to print if we end up failing. 1859 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; 1860 for (unsigned I = 0; I < Types.size(); ++I) { 1861 if (I != 0) 1862 ErrMsg += ", "; 1863 ErrMsg += Types[I].str(); 1864 } 1865 ErrMsg += ")'\n"; 1866 ErrMsg += "Available overloads:\n"; 1867 1868 // Now, look through each intrinsic implementation and see if the types are 1869 // compatible. 1870 for (auto *I : V) { 1871 ErrMsg += " - " + I->getReturnType().str() + " " + I->getMangledName(); 1872 ErrMsg += "("; 1873 for (unsigned A = 0; A < I->getNumParams(); ++A) { 1874 if (A != 0) 1875 ErrMsg += ", "; 1876 ErrMsg += I->getParamType(A).str(); 1877 } 1878 ErrMsg += ")\n"; 1879 1880 if (I->getNumParams() != Types.size()) 1881 continue; 1882 1883 bool Good = true; 1884 for (unsigned Arg = 0; Arg < Types.size(); ++Arg) { 1885 if (I->getParamType(Arg) != Types[Arg]) { 1886 Good = false; 1887 break; 1888 } 1889 } 1890 if (Good) 1891 GoodVec.push_back(I); 1892 } 1893 1894 assert_with_loc(GoodVec.size() > 0, 1895 "No compatible intrinsic found - " + ErrMsg); 1896 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); 1897 1898 return GoodVec.front(); 1899 } 1900 1901 void NeonEmitter::createIntrinsic(Record *R, 1902 SmallVectorImpl<Intrinsic *> &Out) { 1903 std::string Name = R->getValueAsString("Name"); 1904 std::string Proto = R->getValueAsString("Prototype"); 1905 std::string Types = R->getValueAsString("Types"); 1906 Record *OperationRec = R->getValueAsDef("Operation"); 1907 bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes"); 1908 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); 1909 std::string Guard = R->getValueAsString("ArchGuard"); 1910 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); 1911 1912 // Set the global current record. This allows assert_with_loc to produce 1913 // decent location information even when highly nested. 1914 CurrentRecord = R; 1915 1916 ListInit *Body = OperationRec->getValueAsListInit("Ops"); 1917 1918 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); 1919 1920 ClassKind CK = ClassNone; 1921 if (R->getSuperClasses().size() >= 2) 1922 CK = ClassMap[R->getSuperClasses()[1]]; 1923 1924 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; 1925 for (auto TS : TypeSpecs) { 1926 if (CartesianProductOfTypes) { 1927 Type DefaultT(TS, 'd'); 1928 for (auto SrcTS : TypeSpecs) { 1929 Type DefaultSrcT(SrcTS, 'd'); 1930 if (TS == SrcTS || 1931 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) 1932 continue; 1933 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); 1934 } 1935 } else { 1936 NewTypeSpecs.push_back(std::make_pair(TS, TS)); 1937 } 1938 } 1939 1940 std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end()); 1941 std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()); 1942 1943 for (auto &I : NewTypeSpecs) { 1944 Intrinsic *IT = new Intrinsic(R, Name, Proto, I.first, I.second, CK, Body, 1945 *this, Guard, IsUnavailable, BigEndianSafe); 1946 1947 IntrinsicMap[Name].push_back(IT); 1948 Out.push_back(IT); 1949 } 1950 1951 CurrentRecord = nullptr; 1952 } 1953 1954 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 1955 /// declaration of builtins, checking for unique builtin declarations. 1956 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 1957 SmallVectorImpl<Intrinsic *> &Defs) { 1958 OS << "#ifdef GET_NEON_BUILTINS\n"; 1959 1960 // We only want to emit a builtin once, and we want to emit them in 1961 // alphabetical order, so use a std::set. 1962 std::set<std::string> Builtins; 1963 1964 for (auto *Def : Defs) { 1965 if (Def->hasBody()) 1966 continue; 1967 // Functions with 'a' (the splat code) in the type prototype should not get 1968 // their own builtin as they use the non-splat variant. 1969 if (Def->hasSplat()) 1970 continue; 1971 1972 std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \""; 1973 1974 S += Def->getBuiltinTypeStr(); 1975 S += "\", \"n\")"; 1976 1977 Builtins.insert(S); 1978 } 1979 1980 for (auto &S : Builtins) 1981 OS << S << "\n"; 1982 OS << "#endif\n\n"; 1983 } 1984 1985 /// Generate the ARM and AArch64 overloaded type checking code for 1986 /// SemaChecking.cpp, checking for unique builtin declarations. 1987 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 1988 SmallVectorImpl<Intrinsic *> &Defs) { 1989 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 1990 1991 // We record each overload check line before emitting because subsequent Inst 1992 // definitions may extend the number of permitted types (i.e. augment the 1993 // Mask). Use std::map to avoid sorting the table by hash number. 1994 struct OverloadInfo { 1995 uint64_t Mask; 1996 int PtrArgNum; 1997 bool HasConstPtr; 1998 OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {} 1999 }; 2000 std::map<std::string, OverloadInfo> OverloadMap; 2001 2002 for (auto *Def : Defs) { 2003 // If the def has a body (that is, it has Operation DAGs), it won't call 2004 // __builtin_neon_* so we don't need to generate a definition for it. 2005 if (Def->hasBody()) 2006 continue; 2007 // Functions with 'a' (the splat code) in the type prototype should not get 2008 // their own builtin as they use the non-splat variant. 2009 if (Def->hasSplat()) 2010 continue; 2011 // Functions which have a scalar argument cannot be overloaded, no need to 2012 // check them if we are emitting the type checking code. 2013 if (Def->protoHasScalar()) 2014 continue; 2015 2016 uint64_t Mask = 0ULL; 2017 Type Ty = Def->getReturnType(); 2018 if (Def->getProto()[0] == 'v' || Def->getProto()[0] == 'f' || 2019 Def->getProto()[0] == 'F') 2020 Ty = Def->getParamType(0); 2021 if (Ty.isPointer()) 2022 Ty = Def->getParamType(1); 2023 2024 Mask |= 1ULL << Ty.getNeonEnum(); 2025 2026 // Check if the function has a pointer or const pointer argument. 2027 std::string Proto = Def->getProto(); 2028 int PtrArgNum = -1; 2029 bool HasConstPtr = false; 2030 for (unsigned I = 0; I < Def->getNumParams(); ++I) { 2031 char ArgType = Proto[I + 1]; 2032 if (ArgType == 'c') { 2033 HasConstPtr = true; 2034 PtrArgNum = I; 2035 break; 2036 } 2037 if (ArgType == 'p') { 2038 PtrArgNum = I; 2039 break; 2040 } 2041 } 2042 // For sret builtins, adjust the pointer argument index. 2043 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) 2044 PtrArgNum += 1; 2045 2046 std::string Name = Def->getName(); 2047 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2048 // and vst1_lane intrinsics. Using a pointer to the vector element 2049 // type with one of those operations causes codegen to select an aligned 2050 // load/store instruction. If you want an unaligned operation, 2051 // the pointer argument needs to have less alignment than element type, 2052 // so just accept any pointer type. 2053 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") { 2054 PtrArgNum = -1; 2055 HasConstPtr = false; 2056 } 2057 2058 if (Mask) { 2059 std::string Name = Def->getMangledName(); 2060 OverloadMap.insert(std::make_pair(Name, OverloadInfo())); 2061 OverloadInfo &OI = OverloadMap[Name]; 2062 OI.Mask |= Mask; 2063 OI.PtrArgNum |= PtrArgNum; 2064 OI.HasConstPtr = HasConstPtr; 2065 } 2066 } 2067 2068 for (auto &I : OverloadMap) { 2069 OverloadInfo &OI = I.second; 2070 2071 OS << "case NEON::BI__builtin_neon_" << I.first << ": "; 2072 OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL"; 2073 if (OI.PtrArgNum >= 0) 2074 OS << "; PtrArgNum = " << OI.PtrArgNum; 2075 if (OI.HasConstPtr) 2076 OS << "; HasConstPtr = true"; 2077 OS << "; break;\n"; 2078 } 2079 OS << "#endif\n\n"; 2080 } 2081 2082 void 2083 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2084 SmallVectorImpl<Intrinsic *> &Defs) { 2085 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2086 2087 std::set<std::string> Emitted; 2088 2089 for (auto *Def : Defs) { 2090 if (Def->hasBody()) 2091 continue; 2092 // Functions with 'a' (the splat code) in the type prototype should not get 2093 // their own builtin as they use the non-splat variant. 2094 if (Def->hasSplat()) 2095 continue; 2096 // Functions which do not have an immediate do not ned to have range 2097 // checking 2098 // code emitted. 2099 if (!Def->hasImmediate()) 2100 continue; 2101 if (Emitted.find(Def->getMangledName()) != Emitted.end()) 2102 continue; 2103 2104 std::string LowerBound, UpperBound; 2105 2106 Record *R = Def->getRecord(); 2107 if (R->getValueAsBit("isVCVT_N")) { 2108 // VCVT between floating- and fixed-point values takes an immediate 2109 // in the range [1, 32) for f32 or [1, 64) for f64. 2110 LowerBound = "1"; 2111 if (Def->getBaseType().getElementSizeInBits() == 32) 2112 UpperBound = "31"; 2113 else 2114 UpperBound = "63"; 2115 } else if (R->getValueAsBit("isScalarShift")) { 2116 // Right shifts have an 'r' in the name, left shifts do not. Convert 2117 // instructions have the same bounds and right shifts. 2118 if (Def->getName().find('r') != std::string::npos || 2119 Def->getName().find("cvt") != std::string::npos) 2120 LowerBound = "1"; 2121 2122 UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1); 2123 } else if (R->getValueAsBit("isShift")) { 2124 // Builtins which are overloaded by type will need to have thier upper 2125 // bound computed at Sema time based on the type constant. 2126 2127 // Right shifts have an 'r' in the name, left shifts do not. 2128 if (Def->getName().find('r') != std::string::npos) 2129 LowerBound = "1"; 2130 UpperBound = "RFT(TV, true)"; 2131 } else if (Def->getClassKind(true) == ClassB) { 2132 // ClassB intrinsics have a type (and hence lane number) that is only 2133 // known at runtime. 2134 if (R->getValueAsBit("isLaneQ")) 2135 UpperBound = "RFT(TV, false, true)"; 2136 else 2137 UpperBound = "RFT(TV, false, false)"; 2138 } else { 2139 // The immediate generally refers to a lane in the preceding argument. 2140 assert(Def->getImmediateIdx() > 0); 2141 Type T = Def->getParamType(Def->getImmediateIdx() - 1); 2142 UpperBound = utostr(T.getNumElements() - 1); 2143 } 2144 2145 // Calculate the index of the immediate that should be range checked. 2146 unsigned Idx = Def->getNumParams(); 2147 if (Def->hasImmediate()) 2148 Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx()); 2149 2150 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": " 2151 << "i = " << Idx << ";"; 2152 if (LowerBound.size()) 2153 OS << " l = " << LowerBound << ";"; 2154 if (UpperBound.size()) 2155 OS << " u = " << UpperBound << ";"; 2156 OS << " break;\n"; 2157 2158 Emitted.insert(Def->getMangledName()); 2159 } 2160 2161 OS << "#endif\n\n"; 2162 } 2163 2164 /// runHeader - Emit a file with sections defining: 2165 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2166 /// 2. the SemaChecking code for the type overload checking. 2167 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2168 void NeonEmitter::runHeader(raw_ostream &OS) { 2169 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2170 2171 SmallVector<Intrinsic *, 128> Defs; 2172 for (auto *R : RV) 2173 createIntrinsic(R, Defs); 2174 2175 // Generate shared BuiltinsXXX.def 2176 genBuiltinsDef(OS, Defs); 2177 2178 // Generate ARM overloaded type checking code for SemaChecking.cpp 2179 genOverloadTypeCheckCode(OS, Defs); 2180 2181 // Generate ARM range checking code for shift/lane immediates. 2182 genIntrinsicRangeCheckCode(OS, Defs); 2183 } 2184 2185 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2186 /// is comprised of type definitions and function declarations. 2187 void NeonEmitter::run(raw_ostream &OS) { 2188 OS << "/*===---- arm_neon.h - ARM Neon intrinsics " 2189 "------------------------------" 2190 "---===\n" 2191 " *\n" 2192 " * Permission is hereby granted, free of charge, to any person " 2193 "obtaining " 2194 "a copy\n" 2195 " * of this software and associated documentation files (the " 2196 "\"Software\")," 2197 " to deal\n" 2198 " * in the Software without restriction, including without limitation " 2199 "the " 2200 "rights\n" 2201 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2202 "and/or sell\n" 2203 " * copies of the Software, and to permit persons to whom the Software " 2204 "is\n" 2205 " * furnished to do so, subject to the following conditions:\n" 2206 " *\n" 2207 " * The above copyright notice and this permission notice shall be " 2208 "included in\n" 2209 " * all copies or substantial portions of the Software.\n" 2210 " *\n" 2211 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2212 "EXPRESS OR\n" 2213 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2214 "MERCHANTABILITY,\n" 2215 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2216 "SHALL THE\n" 2217 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2218 "OTHER\n" 2219 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2220 "ARISING FROM,\n" 2221 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2222 "DEALINGS IN\n" 2223 " * THE SOFTWARE.\n" 2224 " *\n" 2225 " *===-----------------------------------------------------------------" 2226 "---" 2227 "---===\n" 2228 " */\n\n"; 2229 2230 OS << "#ifndef __ARM_NEON_H\n"; 2231 OS << "#define __ARM_NEON_H\n\n"; 2232 2233 OS << "#if !defined(__ARM_NEON)\n"; 2234 OS << "#error \"NEON support not enabled\"\n"; 2235 OS << "#endif\n\n"; 2236 2237 OS << "#include <stdint.h>\n\n"; 2238 2239 // Emit NEON-specific scalar typedefs. 2240 OS << "typedef float float32_t;\n"; 2241 OS << "typedef __fp16 float16_t;\n"; 2242 2243 OS << "#ifdef __aarch64__\n"; 2244 OS << "typedef double float64_t;\n"; 2245 OS << "#endif\n\n"; 2246 2247 // For now, signedness of polynomial types depends on target 2248 OS << "#ifdef __aarch64__\n"; 2249 OS << "typedef uint8_t poly8_t;\n"; 2250 OS << "typedef uint16_t poly16_t;\n"; 2251 OS << "typedef uint64_t poly64_t;\n"; 2252 OS << "typedef __uint128_t poly128_t;\n"; 2253 OS << "#else\n"; 2254 OS << "typedef int8_t poly8_t;\n"; 2255 OS << "typedef int16_t poly16_t;\n"; 2256 OS << "#endif\n"; 2257 2258 // Emit Neon vector typedefs. 2259 std::string TypedefTypes( 2260 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); 2261 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); 2262 2263 // Emit vector typedefs. 2264 bool InIfdef = false; 2265 for (auto &TS : TDTypeVec) { 2266 bool IsA64 = false; 2267 Type T(TS, 'd'); 2268 if (T.isDouble() || (T.isPoly() && T.isLong())) 2269 IsA64 = true; 2270 2271 if (InIfdef && !IsA64) { 2272 OS << "#endif\n"; 2273 InIfdef = false; 2274 } 2275 if (!InIfdef && IsA64) { 2276 OS << "#ifdef __aarch64__\n"; 2277 InIfdef = true; 2278 } 2279 2280 if (T.isPoly()) 2281 OS << "typedef __attribute__((neon_polyvector_type("; 2282 else 2283 OS << "typedef __attribute__((neon_vector_type("; 2284 2285 Type T2 = T; 2286 T2.makeScalar(); 2287 OS << utostr(T.getNumElements()) << "))) "; 2288 OS << T2.str(); 2289 OS << " " << T.str() << ";\n"; 2290 } 2291 if (InIfdef) 2292 OS << "#endif\n"; 2293 OS << "\n"; 2294 2295 // Emit struct typedefs. 2296 InIfdef = false; 2297 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { 2298 for (auto &TS : TDTypeVec) { 2299 bool IsA64 = false; 2300 Type T(TS, 'd'); 2301 if (T.isDouble() || (T.isPoly() && T.isLong())) 2302 IsA64 = true; 2303 2304 if (InIfdef && !IsA64) { 2305 OS << "#endif\n"; 2306 InIfdef = false; 2307 } 2308 if (!InIfdef && IsA64) { 2309 OS << "#ifdef __aarch64__\n"; 2310 InIfdef = true; 2311 } 2312 2313 char M = '2' + (NumMembers - 2); 2314 Type VT(TS, M); 2315 OS << "typedef struct " << VT.str() << " {\n"; 2316 OS << " " << T.str() << " val"; 2317 OS << "[" << utostr(NumMembers) << "]"; 2318 OS << ";\n} "; 2319 OS << VT.str() << ";\n"; 2320 OS << "\n"; 2321 } 2322 } 2323 if (InIfdef) 2324 OS << "#endif\n"; 2325 OS << "\n"; 2326 2327 OS << "#define __ai static inline __attribute__((__always_inline__, " 2328 "__nodebug__))\n\n"; 2329 2330 SmallVector<Intrinsic *, 128> Defs; 2331 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2332 for (auto *R : RV) 2333 createIntrinsic(R, Defs); 2334 2335 for (auto *I : Defs) 2336 I->indexBody(); 2337 2338 std::stable_sort( 2339 Defs.begin(), Defs.end(), 2340 [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; }); 2341 2342 // Only emit a def when its requirements have been met. 2343 // FIXME: This loop could be made faster, but it's fast enough for now. 2344 bool MadeProgress = true; 2345 std::string InGuard = ""; 2346 while (!Defs.empty() && MadeProgress) { 2347 MadeProgress = false; 2348 2349 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2350 I != Defs.end(); /*No step*/) { 2351 bool DependenciesSatisfied = true; 2352 for (auto *II : (*I)->getDependencies()) { 2353 if (std::find(Defs.begin(), Defs.end(), II) != Defs.end()) 2354 DependenciesSatisfied = false; 2355 } 2356 if (!DependenciesSatisfied) { 2357 // Try the next one. 2358 ++I; 2359 continue; 2360 } 2361 2362 // Emit #endif/#if pair if needed. 2363 if ((*I)->getGuard() != InGuard) { 2364 if (!InGuard.empty()) 2365 OS << "#endif\n"; 2366 InGuard = (*I)->getGuard(); 2367 if (!InGuard.empty()) 2368 OS << "#if " << InGuard << "\n"; 2369 } 2370 2371 // Actually generate the intrinsic code. 2372 OS << (*I)->generate(); 2373 2374 MadeProgress = true; 2375 I = Defs.erase(I); 2376 } 2377 } 2378 assert(Defs.empty() && "Some requirements were not satisfied!"); 2379 if (!InGuard.empty()) 2380 OS << "#endif\n"; 2381 2382 OS << "\n"; 2383 OS << "#undef __ai\n\n"; 2384 OS << "#endif /* __ARM_NEON_H */\n"; 2385 } 2386 2387 namespace clang { 2388 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2389 NeonEmitter(Records).run(OS); 2390 } 2391 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2392 NeonEmitter(Records).runHeader(OS); 2393 } 2394 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2395 llvm_unreachable("Neon test generation no longer implemented!"); 2396 } 2397 } // End namespace clang 2398