1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This tablegen backend is responsible for emitting arm_neon.h, which includes 11 // a declaration and definition of each function specified by the ARM NEON 12 // compiler interface. See ARM document DUI0348B. 13 // 14 // Each NEON instruction is implemented in terms of 1 or more functions which 15 // are suffixed with the element type of the input vectors. Functions may be 16 // implemented in terms of generic vector operations such as +, *, -, etc. or 17 // by calling a __builtin_-prefixed function which will be handled by clang's 18 // CodeGen library. 19 // 20 // Additional validation code can be generated by this file when runHeader() is 21 // called, rather than the normal run() entry point. A complete set of tests 22 // for Neon intrinsics can be generated by calling the runTests() entry point. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/ADT/DenseMap.h" 27 #include "llvm/ADT/SmallString.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringExtras.h" 30 #include "llvm/ADT/StringMap.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/TableGen/Error.h" 33 #include "llvm/TableGen/Record.h" 34 #include "llvm/TableGen/TableGenBackend.h" 35 #include <string> 36 using namespace llvm; 37 38 enum OpKind { 39 OpNone, 40 OpUnavailable, 41 OpAdd, 42 OpAddl, 43 OpAddw, 44 OpSub, 45 OpSubl, 46 OpSubw, 47 OpMul, 48 OpMla, 49 OpMlal, 50 OpMls, 51 OpMlsl, 52 OpMulN, 53 OpMlaN, 54 OpMlsN, 55 OpMlalN, 56 OpMlslN, 57 OpMulLane, 58 OpMullLane, 59 OpMlaLane, 60 OpMlsLane, 61 OpMlalLane, 62 OpMlslLane, 63 OpQDMullLane, 64 OpQDMlalLane, 65 OpQDMlslLane, 66 OpQDMulhLane, 67 OpQRDMulhLane, 68 OpEq, 69 OpGe, 70 OpLe, 71 OpGt, 72 OpLt, 73 OpNeg, 74 OpNot, 75 OpAnd, 76 OpOr, 77 OpXor, 78 OpAndNot, 79 OpOrNot, 80 OpCast, 81 OpConcat, 82 OpDup, 83 OpDupLane, 84 OpHi, 85 OpLo, 86 OpSelect, 87 OpRev16, 88 OpRev32, 89 OpRev64, 90 OpReinterpret, 91 OpAbdl, 92 OpAba, 93 OpAbal 94 }; 95 96 enum ClassKind { 97 ClassNone, 98 ClassI, // generic integer instruction, e.g., "i8" suffix 99 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 100 ClassW, // width-specific instruction, e.g., "8" suffix 101 ClassB // bitcast arguments with enum argument to specify type 102 }; 103 104 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 105 /// builtins. These must be kept in sync with the flags in 106 /// include/clang/Basic/TargetBuiltins.h. 107 namespace { 108 class NeonTypeFlags { 109 enum { 110 EltTypeMask = 0xf, 111 UnsignedFlag = 0x10, 112 QuadFlag = 0x20 113 }; 114 uint32_t Flags; 115 116 public: 117 enum EltType { 118 Int8, 119 Int16, 120 Int32, 121 Int64, 122 Poly8, 123 Poly16, 124 Float16, 125 Float32 126 }; 127 128 NeonTypeFlags(unsigned F) : Flags(F) {} 129 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { 130 if (IsUnsigned) 131 Flags |= UnsignedFlag; 132 if (IsQuad) 133 Flags |= QuadFlag; 134 } 135 136 uint32_t getFlags() const { return Flags; } 137 }; 138 } // end anonymous namespace 139 140 namespace { 141 class NeonEmitter { 142 RecordKeeper &Records; 143 StringMap<OpKind> OpMap; 144 DenseMap<Record*, ClassKind> ClassMap; 145 146 public: 147 NeonEmitter(RecordKeeper &R) : Records(R) { 148 OpMap["OP_NONE"] = OpNone; 149 OpMap["OP_UNAVAILABLE"] = OpUnavailable; 150 OpMap["OP_ADD"] = OpAdd; 151 OpMap["OP_ADDL"] = OpAddl; 152 OpMap["OP_ADDW"] = OpAddw; 153 OpMap["OP_SUB"] = OpSub; 154 OpMap["OP_SUBL"] = OpSubl; 155 OpMap["OP_SUBW"] = OpSubw; 156 OpMap["OP_MUL"] = OpMul; 157 OpMap["OP_MLA"] = OpMla; 158 OpMap["OP_MLAL"] = OpMlal; 159 OpMap["OP_MLS"] = OpMls; 160 OpMap["OP_MLSL"] = OpMlsl; 161 OpMap["OP_MUL_N"] = OpMulN; 162 OpMap["OP_MLA_N"] = OpMlaN; 163 OpMap["OP_MLS_N"] = OpMlsN; 164 OpMap["OP_MLAL_N"] = OpMlalN; 165 OpMap["OP_MLSL_N"] = OpMlslN; 166 OpMap["OP_MUL_LN"]= OpMulLane; 167 OpMap["OP_MULL_LN"] = OpMullLane; 168 OpMap["OP_MLA_LN"]= OpMlaLane; 169 OpMap["OP_MLS_LN"]= OpMlsLane; 170 OpMap["OP_MLAL_LN"] = OpMlalLane; 171 OpMap["OP_MLSL_LN"] = OpMlslLane; 172 OpMap["OP_QDMULL_LN"] = OpQDMullLane; 173 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; 174 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; 175 OpMap["OP_QDMULH_LN"] = OpQDMulhLane; 176 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; 177 OpMap["OP_EQ"] = OpEq; 178 OpMap["OP_GE"] = OpGe; 179 OpMap["OP_LE"] = OpLe; 180 OpMap["OP_GT"] = OpGt; 181 OpMap["OP_LT"] = OpLt; 182 OpMap["OP_NEG"] = OpNeg; 183 OpMap["OP_NOT"] = OpNot; 184 OpMap["OP_AND"] = OpAnd; 185 OpMap["OP_OR"] = OpOr; 186 OpMap["OP_XOR"] = OpXor; 187 OpMap["OP_ANDN"] = OpAndNot; 188 OpMap["OP_ORN"] = OpOrNot; 189 OpMap["OP_CAST"] = OpCast; 190 OpMap["OP_CONC"] = OpConcat; 191 OpMap["OP_HI"] = OpHi; 192 OpMap["OP_LO"] = OpLo; 193 OpMap["OP_DUP"] = OpDup; 194 OpMap["OP_DUP_LN"] = OpDupLane; 195 OpMap["OP_SEL"] = OpSelect; 196 OpMap["OP_REV16"] = OpRev16; 197 OpMap["OP_REV32"] = OpRev32; 198 OpMap["OP_REV64"] = OpRev64; 199 OpMap["OP_REINT"] = OpReinterpret; 200 OpMap["OP_ABDL"] = OpAbdl; 201 OpMap["OP_ABA"] = OpAba; 202 OpMap["OP_ABAL"] = OpAbal; 203 204 Record *SI = R.getClass("SInst"); 205 Record *II = R.getClass("IInst"); 206 Record *WI = R.getClass("WInst"); 207 ClassMap[SI] = ClassS; 208 ClassMap[II] = ClassI; 209 ClassMap[WI] = ClassW; 210 } 211 212 // run - Emit arm_neon.h.inc 213 void run(raw_ostream &o); 214 215 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 216 void runHeader(raw_ostream &o); 217 218 // runTests - Emit tests for all the Neon intrinsics. 219 void runTests(raw_ostream &o); 220 221 private: 222 void emitIntrinsic(raw_ostream &OS, Record *R); 223 }; 224 } // end anonymous namespace 225 226 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, 227 /// which each StringRef representing a single type declared in the string. 228 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing 229 /// 2xfloat and 4xfloat respectively. 230 static void ParseTypes(Record *r, std::string &s, 231 SmallVectorImpl<StringRef> &TV) { 232 const char *data = s.data(); 233 int len = 0; 234 235 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { 236 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U') 237 continue; 238 239 switch (data[len]) { 240 case 'c': 241 case 's': 242 case 'i': 243 case 'l': 244 case 'h': 245 case 'f': 246 break; 247 default: 248 PrintFatalError(r->getLoc(), 249 "Unexpected letter: " + std::string(data + len, 1)); 250 } 251 TV.push_back(StringRef(data, len + 1)); 252 data += len + 1; 253 len = -1; 254 } 255 } 256 257 /// Widen - Convert a type code into the next wider type. char -> short, 258 /// short -> int, etc. 259 static char Widen(const char t) { 260 switch (t) { 261 case 'c': 262 return 's'; 263 case 's': 264 return 'i'; 265 case 'i': 266 return 'l'; 267 case 'h': 268 return 'f'; 269 default: 270 PrintFatalError("unhandled type in widen!"); 271 } 272 } 273 274 /// Narrow - Convert a type code into the next smaller type. short -> char, 275 /// float -> half float, etc. 276 static char Narrow(const char t) { 277 switch (t) { 278 case 's': 279 return 'c'; 280 case 'i': 281 return 's'; 282 case 'l': 283 return 'i'; 284 case 'f': 285 return 'h'; 286 default: 287 PrintFatalError("unhandled type in narrow!"); 288 } 289 } 290 291 /// For a particular StringRef, return the base type code, and whether it has 292 /// the quad-vector, polynomial, or unsigned modifiers set. 293 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { 294 unsigned off = 0; 295 296 // remember quad. 297 if (ty[off] == 'Q') { 298 quad = true; 299 ++off; 300 } 301 302 // remember poly. 303 if (ty[off] == 'P') { 304 poly = true; 305 ++off; 306 } 307 308 // remember unsigned. 309 if (ty[off] == 'U') { 310 usgn = true; 311 ++off; 312 } 313 314 // base type to get the type string for. 315 return ty[off]; 316 } 317 318 /// ModType - Transform a type code and its modifiers based on a mod code. The 319 /// mod code definitions may be found at the top of arm_neon.td. 320 static char ModType(const char mod, char type, bool &quad, bool &poly, 321 bool &usgn, bool &scal, bool &cnst, bool &pntr) { 322 switch (mod) { 323 case 't': 324 if (poly) { 325 poly = false; 326 usgn = true; 327 } 328 break; 329 case 'u': 330 usgn = true; 331 poly = false; 332 if (type == 'f') 333 type = 'i'; 334 break; 335 case 'x': 336 usgn = false; 337 poly = false; 338 if (type == 'f') 339 type = 'i'; 340 break; 341 case 'f': 342 if (type == 'h') 343 quad = true; 344 type = 'f'; 345 usgn = false; 346 break; 347 case 'g': 348 quad = false; 349 break; 350 case 'w': 351 type = Widen(type); 352 quad = true; 353 break; 354 case 'n': 355 type = Widen(type); 356 break; 357 case 'i': 358 type = 'i'; 359 scal = true; 360 break; 361 case 'l': 362 type = 'l'; 363 scal = true; 364 usgn = true; 365 break; 366 case 's': 367 case 'a': 368 scal = true; 369 break; 370 case 'k': 371 quad = true; 372 break; 373 case 'c': 374 cnst = true; 375 case 'p': 376 pntr = true; 377 scal = true; 378 break; 379 case 'h': 380 type = Narrow(type); 381 if (type == 'h') 382 quad = false; 383 break; 384 case 'e': 385 type = Narrow(type); 386 usgn = true; 387 break; 388 default: 389 break; 390 } 391 return type; 392 } 393 394 /// TypeString - for a modifier and type, generate the name of the typedef for 395 /// that type. QUc -> uint8x8_t. 396 static std::string TypeString(const char mod, StringRef typestr) { 397 bool quad = false; 398 bool poly = false; 399 bool usgn = false; 400 bool scal = false; 401 bool cnst = false; 402 bool pntr = false; 403 404 if (mod == 'v') 405 return "void"; 406 if (mod == 'i') 407 return "int"; 408 409 // base type to get the type string for. 410 char type = ClassifyType(typestr, quad, poly, usgn); 411 412 // Based on the modifying character, change the type and width if necessary. 413 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 414 415 SmallString<128> s; 416 417 if (usgn) 418 s.push_back('u'); 419 420 switch (type) { 421 case 'c': 422 s += poly ? "poly8" : "int8"; 423 if (scal) 424 break; 425 s += quad ? "x16" : "x8"; 426 break; 427 case 's': 428 s += poly ? "poly16" : "int16"; 429 if (scal) 430 break; 431 s += quad ? "x8" : "x4"; 432 break; 433 case 'i': 434 s += "int32"; 435 if (scal) 436 break; 437 s += quad ? "x4" : "x2"; 438 break; 439 case 'l': 440 s += "int64"; 441 if (scal) 442 break; 443 s += quad ? "x2" : "x1"; 444 break; 445 case 'h': 446 s += "float16"; 447 if (scal) 448 break; 449 s += quad ? "x8" : "x4"; 450 break; 451 case 'f': 452 s += "float32"; 453 if (scal) 454 break; 455 s += quad ? "x4" : "x2"; 456 break; 457 default: 458 PrintFatalError("unhandled type!"); 459 } 460 461 if (mod == '2') 462 s += "x2"; 463 if (mod == '3') 464 s += "x3"; 465 if (mod == '4') 466 s += "x4"; 467 468 // Append _t, finishing the type string typedef type. 469 s += "_t"; 470 471 if (cnst) 472 s += " const"; 473 474 if (pntr) 475 s += " *"; 476 477 return s.str(); 478 } 479 480 /// BuiltinTypeString - for a modifier and type, generate the clang 481 /// BuiltinsARM.def prototype code for the function. See the top of clang's 482 /// Builtins.def for a description of the type strings. 483 static std::string BuiltinTypeString(const char mod, StringRef typestr, 484 ClassKind ck, bool ret) { 485 bool quad = false; 486 bool poly = false; 487 bool usgn = false; 488 bool scal = false; 489 bool cnst = false; 490 bool pntr = false; 491 492 if (mod == 'v') 493 return "v"; // void 494 if (mod == 'i') 495 return "i"; // int 496 497 // base type to get the type string for. 498 char type = ClassifyType(typestr, quad, poly, usgn); 499 500 // Based on the modifying character, change the type and width if necessary. 501 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 502 503 // All pointers are void* pointers. Change type to 'v' now. 504 if (pntr) { 505 usgn = false; 506 poly = false; 507 type = 'v'; 508 } 509 // Treat half-float ('h') types as unsigned short ('s') types. 510 if (type == 'h') { 511 type = 's'; 512 usgn = true; 513 } 514 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f'); 515 516 if (scal) { 517 SmallString<128> s; 518 519 if (usgn) 520 s.push_back('U'); 521 else if (type == 'c') 522 s.push_back('S'); // make chars explicitly signed 523 524 if (type == 'l') // 64-bit long 525 s += "LLi"; 526 else 527 s.push_back(type); 528 529 if (cnst) 530 s.push_back('C'); 531 if (pntr) 532 s.push_back('*'); 533 return s.str(); 534 } 535 536 // Since the return value must be one type, return a vector type of the 537 // appropriate width which we will bitcast. An exception is made for 538 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 539 // fashion, storing them to a pointer arg. 540 if (ret) { 541 if (mod >= '2' && mod <= '4') 542 return "vv*"; // void result with void* first argument 543 if (mod == 'f' || (ck != ClassB && type == 'f')) 544 return quad ? "V4f" : "V2f"; 545 if (ck != ClassB && type == 's') 546 return quad ? "V8s" : "V4s"; 547 if (ck != ClassB && type == 'i') 548 return quad ? "V4i" : "V2i"; 549 if (ck != ClassB && type == 'l') 550 return quad ? "V2LLi" : "V1LLi"; 551 552 return quad ? "V16Sc" : "V8Sc"; 553 } 554 555 // Non-return array types are passed as individual vectors. 556 if (mod == '2') 557 return quad ? "V16ScV16Sc" : "V8ScV8Sc"; 558 if (mod == '3') 559 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; 560 if (mod == '4') 561 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; 562 563 if (mod == 'f' || (ck != ClassB && type == 'f')) 564 return quad ? "V4f" : "V2f"; 565 if (ck != ClassB && type == 's') 566 return quad ? "V8s" : "V4s"; 567 if (ck != ClassB && type == 'i') 568 return quad ? "V4i" : "V2i"; 569 if (ck != ClassB && type == 'l') 570 return quad ? "V2LLi" : "V1LLi"; 571 572 return quad ? "V16Sc" : "V8Sc"; 573 } 574 575 /// MangleName - Append a type or width suffix to a base neon function name, 576 /// and insert a 'q' in the appropriate location if the operation works on 577 /// 128b rather than 64b. E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. 578 static std::string MangleName(const std::string &name, StringRef typestr, 579 ClassKind ck) { 580 if (name == "vcvt_f32_f16") 581 return name; 582 583 bool quad = false; 584 bool poly = false; 585 bool usgn = false; 586 char type = ClassifyType(typestr, quad, poly, usgn); 587 588 std::string s = name; 589 590 switch (type) { 591 case 'c': 592 switch (ck) { 593 case ClassS: s += poly ? "_p8" : usgn ? "_u8" : "_s8"; break; 594 case ClassI: s += "_i8"; break; 595 case ClassW: s += "_8"; break; 596 default: break; 597 } 598 break; 599 case 's': 600 switch (ck) { 601 case ClassS: s += poly ? "_p16" : usgn ? "_u16" : "_s16"; break; 602 case ClassI: s += "_i16"; break; 603 case ClassW: s += "_16"; break; 604 default: break; 605 } 606 break; 607 case 'i': 608 switch (ck) { 609 case ClassS: s += usgn ? "_u32" : "_s32"; break; 610 case ClassI: s += "_i32"; break; 611 case ClassW: s += "_32"; break; 612 default: break; 613 } 614 break; 615 case 'l': 616 switch (ck) { 617 case ClassS: s += usgn ? "_u64" : "_s64"; break; 618 case ClassI: s += "_i64"; break; 619 case ClassW: s += "_64"; break; 620 default: break; 621 } 622 break; 623 case 'h': 624 switch (ck) { 625 case ClassS: 626 case ClassI: s += "_f16"; break; 627 case ClassW: s += "_16"; break; 628 default: break; 629 } 630 break; 631 case 'f': 632 switch (ck) { 633 case ClassS: 634 case ClassI: s += "_f32"; break; 635 case ClassW: s += "_32"; break; 636 default: break; 637 } 638 break; 639 default: 640 PrintFatalError("unhandled type!"); 641 } 642 if (ck == ClassB) 643 s += "_v"; 644 645 // Insert a 'q' before the first '_' character so that it ends up before 646 // _lane or _n on vector-scalar operations. 647 if (quad) { 648 size_t pos = s.find('_'); 649 s = s.insert(pos, "q"); 650 } 651 return s; 652 } 653 654 /// UseMacro - Examine the prototype string to determine if the intrinsic 655 /// should be defined as a preprocessor macro instead of an inline function. 656 static bool UseMacro(const std::string &proto) { 657 // If this builtin takes an immediate argument, we need to #define it rather 658 // than use a standard declaration, so that SemaChecking can range check 659 // the immediate passed by the user. 660 if (proto.find('i') != std::string::npos) 661 return true; 662 663 // Pointer arguments need to use macros to avoid hiding aligned attributes 664 // from the pointer type. 665 if (proto.find('p') != std::string::npos || 666 proto.find('c') != std::string::npos) 667 return true; 668 669 return false; 670 } 671 672 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is 673 /// defined as a macro should be accessed directly instead of being first 674 /// assigned to a local temporary. 675 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { 676 // True for constant ints (i), pointers (p) and const pointers (c). 677 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); 678 } 679 680 // Generate the string "(argtype a, argtype b, ...)" 681 static std::string GenArgs(const std::string &proto, StringRef typestr) { 682 bool define = UseMacro(proto); 683 char arg = 'a'; 684 685 std::string s; 686 s += "("; 687 688 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 689 if (define) { 690 // Some macro arguments are used directly instead of being assigned 691 // to local temporaries; prepend an underscore prefix to make their 692 // names consistent with the local temporaries. 693 if (MacroArgUsedDirectly(proto, i)) 694 s += "__"; 695 } else { 696 s += TypeString(proto[i], typestr) + " __"; 697 } 698 s.push_back(arg); 699 if ((i + 1) < e) 700 s += ", "; 701 } 702 703 s += ")"; 704 return s; 705 } 706 707 // Macro arguments are not type-checked like inline function arguments, so 708 // assign them to local temporaries to get the right type checking. 709 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) { 710 char arg = 'a'; 711 std::string s; 712 bool generatedLocal = false; 713 714 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 715 // Do not create a temporary for an immediate argument. 716 // That would defeat the whole point of using a macro! 717 if (MacroArgUsedDirectly(proto, i)) 718 continue; 719 generatedLocal = true; 720 721 s += TypeString(proto[i], typestr) + " __"; 722 s.push_back(arg); 723 s += " = ("; 724 s.push_back(arg); 725 s += "); "; 726 } 727 728 if (generatedLocal) 729 s += "\\\n "; 730 return s; 731 } 732 733 // Use the vmovl builtin to sign-extend or zero-extend a vector. 734 static std::string Extend(StringRef typestr, const std::string &a) { 735 std::string s; 736 s = MangleName("vmovl", typestr, ClassS); 737 s += "(" + a + ")"; 738 return s; 739 } 740 741 static std::string Duplicate(unsigned nElts, StringRef typestr, 742 const std::string &a) { 743 std::string s; 744 745 s = "(" + TypeString('d', typestr) + "){ "; 746 for (unsigned i = 0; i != nElts; ++i) { 747 s += a; 748 if ((i + 1) < nElts) 749 s += ", "; 750 } 751 s += " }"; 752 753 return s; 754 } 755 756 static std::string SplatLane(unsigned nElts, const std::string &vec, 757 const std::string &lane) { 758 std::string s = "__builtin_shufflevector(" + vec + ", " + vec; 759 for (unsigned i = 0; i < nElts; ++i) 760 s += ", " + lane; 761 s += ")"; 762 return s; 763 } 764 765 static unsigned GetNumElements(StringRef typestr, bool &quad) { 766 quad = false; 767 bool dummy = false; 768 char type = ClassifyType(typestr, quad, dummy, dummy); 769 unsigned nElts = 0; 770 switch (type) { 771 case 'c': nElts = 8; break; 772 case 's': nElts = 4; break; 773 case 'i': nElts = 2; break; 774 case 'l': nElts = 1; break; 775 case 'h': nElts = 4; break; 776 case 'f': nElts = 2; break; 777 default: 778 PrintFatalError("unhandled type!"); 779 } 780 if (quad) nElts <<= 1; 781 return nElts; 782 } 783 784 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. 785 static std::string GenOpString(OpKind op, const std::string &proto, 786 StringRef typestr) { 787 bool quad; 788 unsigned nElts = GetNumElements(typestr, quad); 789 bool define = UseMacro(proto); 790 791 std::string ts = TypeString(proto[0], typestr); 792 std::string s; 793 if (!define) { 794 s = "return "; 795 } 796 797 switch(op) { 798 case OpAdd: 799 s += "__a + __b;"; 800 break; 801 case OpAddl: 802 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; 803 break; 804 case OpAddw: 805 s += "__a + " + Extend(typestr, "__b") + ";"; 806 break; 807 case OpSub: 808 s += "__a - __b;"; 809 break; 810 case OpSubl: 811 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; 812 break; 813 case OpSubw: 814 s += "__a - " + Extend(typestr, "__b") + ";"; 815 break; 816 case OpMulN: 817 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; 818 break; 819 case OpMulLane: 820 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; 821 break; 822 case OpMul: 823 s += "__a * __b;"; 824 break; 825 case OpMullLane: 826 s += MangleName("vmull", typestr, ClassS) + "(__a, " + 827 SplatLane(nElts, "__b", "__c") + ");"; 828 break; 829 case OpMlaN: 830 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 831 break; 832 case OpMlaLane: 833 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 834 break; 835 case OpMla: 836 s += "__a + (__b * __c);"; 837 break; 838 case OpMlalN: 839 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 840 Duplicate(nElts, typestr, "__c") + ");"; 841 break; 842 case OpMlalLane: 843 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 844 SplatLane(nElts, "__c", "__d") + ");"; 845 break; 846 case OpMlal: 847 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 848 break; 849 case OpMlsN: 850 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 851 break; 852 case OpMlsLane: 853 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 854 break; 855 case OpMls: 856 s += "__a - (__b * __c);"; 857 break; 858 case OpMlslN: 859 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 860 Duplicate(nElts, typestr, "__c") + ");"; 861 break; 862 case OpMlslLane: 863 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 864 SplatLane(nElts, "__c", "__d") + ");"; 865 break; 866 case OpMlsl: 867 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 868 break; 869 case OpQDMullLane: 870 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 871 SplatLane(nElts, "__b", "__c") + ");"; 872 break; 873 case OpQDMlalLane: 874 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + 875 SplatLane(nElts, "__c", "__d") + ");"; 876 break; 877 case OpQDMlslLane: 878 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + 879 SplatLane(nElts, "__c", "__d") + ");"; 880 break; 881 case OpQDMulhLane: 882 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 883 SplatLane(nElts, "__b", "__c") + ");"; 884 break; 885 case OpQRDMulhLane: 886 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 887 SplatLane(nElts, "__b", "__c") + ");"; 888 break; 889 case OpEq: 890 s += "(" + ts + ")(__a == __b);"; 891 break; 892 case OpGe: 893 s += "(" + ts + ")(__a >= __b);"; 894 break; 895 case OpLe: 896 s += "(" + ts + ")(__a <= __b);"; 897 break; 898 case OpGt: 899 s += "(" + ts + ")(__a > __b);"; 900 break; 901 case OpLt: 902 s += "(" + ts + ")(__a < __b);"; 903 break; 904 case OpNeg: 905 s += " -__a;"; 906 break; 907 case OpNot: 908 s += " ~__a;"; 909 break; 910 case OpAnd: 911 s += "__a & __b;"; 912 break; 913 case OpOr: 914 s += "__a | __b;"; 915 break; 916 case OpXor: 917 s += "__a ^ __b;"; 918 break; 919 case OpAndNot: 920 s += "__a & ~__b;"; 921 break; 922 case OpOrNot: 923 s += "__a | ~__b;"; 924 break; 925 case OpCast: 926 s += "(" + ts + ")__a;"; 927 break; 928 case OpConcat: 929 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; 930 s += ", (int64x1_t)__b, 0, 1);"; 931 break; 932 case OpHi: 933 s += "(" + ts + 934 ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);"; 935 break; 936 case OpLo: 937 s += "(" + ts + 938 ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);"; 939 break; 940 case OpDup: 941 s += Duplicate(nElts, typestr, "__a") + ";"; 942 break; 943 case OpDupLane: 944 s += SplatLane(nElts, "__a", "__b") + ";"; 945 break; 946 case OpSelect: 947 // ((0 & 1) | (~0 & 2)) 948 s += "(" + ts + ")"; 949 ts = TypeString(proto[1], typestr); 950 s += "((__a & (" + ts + ")__b) | "; 951 s += "(~__a & (" + ts + ")__c));"; 952 break; 953 case OpRev16: 954 s += "__builtin_shufflevector(__a, __a"; 955 for (unsigned i = 2; i <= nElts; i += 2) 956 for (unsigned j = 0; j != 2; ++j) 957 s += ", " + utostr(i - j - 1); 958 s += ");"; 959 break; 960 case OpRev32: { 961 unsigned WordElts = nElts >> (1 + (int)quad); 962 s += "__builtin_shufflevector(__a, __a"; 963 for (unsigned i = WordElts; i <= nElts; i += WordElts) 964 for (unsigned j = 0; j != WordElts; ++j) 965 s += ", " + utostr(i - j - 1); 966 s += ");"; 967 break; 968 } 969 case OpRev64: { 970 unsigned DblWordElts = nElts >> (int)quad; 971 s += "__builtin_shufflevector(__a, __a"; 972 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) 973 for (unsigned j = 0; j != DblWordElts; ++j) 974 s += ", " + utostr(i - j - 1); 975 s += ");"; 976 break; 977 } 978 case OpAbdl: { 979 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; 980 if (typestr[0] != 'U') { 981 // vabd results are always unsigned and must be zero-extended. 982 std::string utype = "U" + typestr.str(); 983 s += "(" + TypeString(proto[0], typestr) + ")"; 984 abd = "(" + TypeString('d', utype) + ")" + abd; 985 s += Extend(utype, abd) + ";"; 986 } else { 987 s += Extend(typestr, abd) + ";"; 988 } 989 break; 990 } 991 case OpAba: 992 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; 993 break; 994 case OpAbal: { 995 s += "__a + "; 996 std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)"; 997 if (typestr[0] != 'U') { 998 // vabd results are always unsigned and must be zero-extended. 999 std::string utype = "U" + typestr.str(); 1000 s += "(" + TypeString(proto[0], typestr) + ")"; 1001 abd = "(" + TypeString('d', utype) + ")" + abd; 1002 s += Extend(utype, abd) + ";"; 1003 } else { 1004 s += Extend(typestr, abd) + ";"; 1005 } 1006 break; 1007 } 1008 default: 1009 PrintFatalError("unknown OpKind!"); 1010 } 1011 return s; 1012 } 1013 1014 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { 1015 unsigned mod = proto[0]; 1016 1017 if (mod == 'v' || mod == 'f') 1018 mod = proto[1]; 1019 1020 bool quad = false; 1021 bool poly = false; 1022 bool usgn = false; 1023 bool scal = false; 1024 bool cnst = false; 1025 bool pntr = false; 1026 1027 // Base type to get the type string for. 1028 char type = ClassifyType(typestr, quad, poly, usgn); 1029 1030 // Based on the modifying character, change the type and width if necessary. 1031 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 1032 1033 NeonTypeFlags::EltType ET; 1034 switch (type) { 1035 case 'c': 1036 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; 1037 break; 1038 case 's': 1039 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; 1040 break; 1041 case 'i': 1042 ET = NeonTypeFlags::Int32; 1043 break; 1044 case 'l': 1045 ET = NeonTypeFlags::Int64; 1046 break; 1047 case 'h': 1048 ET = NeonTypeFlags::Float16; 1049 break; 1050 case 'f': 1051 ET = NeonTypeFlags::Float32; 1052 break; 1053 default: 1054 PrintFatalError("unhandled type!"); 1055 } 1056 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); 1057 return Flags.getFlags(); 1058 } 1059 1060 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) 1061 static std::string GenBuiltin(const std::string &name, const std::string &proto, 1062 StringRef typestr, ClassKind ck) { 1063 std::string s; 1064 1065 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1066 // sret-like argument. 1067 bool sret = (proto[0] >= '2' && proto[0] <= '4'); 1068 1069 bool define = UseMacro(proto); 1070 1071 // Check if the prototype has a scalar operand with the type of the vector 1072 // elements. If not, bitcasting the args will take care of arg checking. 1073 // The actual signedness etc. will be taken care of with special enums. 1074 if (proto.find('s') == std::string::npos) 1075 ck = ClassB; 1076 1077 if (proto[0] != 'v') { 1078 std::string ts = TypeString(proto[0], typestr); 1079 1080 if (define) { 1081 if (sret) 1082 s += ts + " r; "; 1083 else 1084 s += "(" + ts + ")"; 1085 } else if (sret) { 1086 s += ts + " r; "; 1087 } else { 1088 s += "return (" + ts + ")"; 1089 } 1090 } 1091 1092 bool splat = proto.find('a') != std::string::npos; 1093 1094 s += "__builtin_neon_"; 1095 if (splat) { 1096 // Call the non-splat builtin: chop off the "_n" suffix from the name. 1097 std::string vname(name, 0, name.size()-2); 1098 s += MangleName(vname, typestr, ck); 1099 } else { 1100 s += MangleName(name, typestr, ck); 1101 } 1102 s += "("; 1103 1104 // Pass the address of the return variable as the first argument to sret-like 1105 // builtins. 1106 if (sret) 1107 s += "&r, "; 1108 1109 char arg = 'a'; 1110 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1111 std::string args = std::string(&arg, 1); 1112 1113 // Use the local temporaries instead of the macro arguments. 1114 args = "__" + args; 1115 1116 bool argQuad = false; 1117 bool argPoly = false; 1118 bool argUsgn = false; 1119 bool argScalar = false; 1120 bool dummy = false; 1121 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); 1122 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, 1123 dummy, dummy); 1124 1125 // Handle multiple-vector values specially, emitting each subvector as an 1126 // argument to the __builtin. 1127 if (proto[i] >= '2' && proto[i] <= '4') { 1128 // Check if an explicit cast is needed. 1129 if (argType != 'c' || argPoly || argUsgn) 1130 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; 1131 1132 for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) { 1133 s += args + ".val[" + utostr(vi) + "]"; 1134 if ((vi + 1) < ve) 1135 s += ", "; 1136 } 1137 if ((i + 1) < e) 1138 s += ", "; 1139 1140 continue; 1141 } 1142 1143 if (splat && (i + 1) == e) 1144 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); 1145 1146 // Check if an explicit cast is needed. 1147 if ((splat || !argScalar) && 1148 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { 1149 std::string argTypeStr = "c"; 1150 if (ck != ClassB) 1151 argTypeStr = argType; 1152 if (argQuad) 1153 argTypeStr = "Q" + argTypeStr; 1154 args = "(" + TypeString('d', argTypeStr) + ")" + args; 1155 } 1156 1157 s += args; 1158 if ((i + 1) < e) 1159 s += ", "; 1160 } 1161 1162 // Extra constant integer to hold type class enum for this function, e.g. s8 1163 if (ck == ClassB) 1164 s += ", " + utostr(GetNeonEnum(proto, typestr)); 1165 1166 s += ");"; 1167 1168 if (proto[0] != 'v' && sret) { 1169 if (define) 1170 s += " r;"; 1171 else 1172 s += " return r;"; 1173 } 1174 return s; 1175 } 1176 1177 static std::string GenBuiltinDef(const std::string &name, 1178 const std::string &proto, 1179 StringRef typestr, ClassKind ck) { 1180 std::string s("BUILTIN(__builtin_neon_"); 1181 1182 // If all types are the same size, bitcasting the args will take care 1183 // of arg checking. The actual signedness etc. will be taken care of with 1184 // special enums. 1185 if (proto.find('s') == std::string::npos) 1186 ck = ClassB; 1187 1188 s += MangleName(name, typestr, ck); 1189 s += ", \""; 1190 1191 for (unsigned i = 0, e = proto.size(); i != e; ++i) 1192 s += BuiltinTypeString(proto[i], typestr, ck, i == 0); 1193 1194 // Extra constant integer to hold type class enum for this function, e.g. s8 1195 if (ck == ClassB) 1196 s += "i"; 1197 1198 s += "\", \"n\")"; 1199 return s; 1200 } 1201 1202 static std::string GenIntrinsic(const std::string &name, 1203 const std::string &proto, 1204 StringRef outTypeStr, StringRef inTypeStr, 1205 OpKind kind, ClassKind classKind) { 1206 assert(!proto.empty() && ""); 1207 bool define = UseMacro(proto) && kind != OpUnavailable; 1208 std::string s; 1209 1210 // static always inline + return type 1211 if (define) 1212 s += "#define "; 1213 else 1214 s += "__ai " + TypeString(proto[0], outTypeStr) + " "; 1215 1216 // Function name with type suffix 1217 std::string mangledName = MangleName(name, outTypeStr, ClassS); 1218 if (outTypeStr != inTypeStr) { 1219 // If the input type is different (e.g., for vreinterpret), append a suffix 1220 // for the input type. String off a "Q" (quad) prefix so that MangleName 1221 // does not insert another "q" in the name. 1222 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 1223 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 1224 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 1225 } 1226 s += mangledName; 1227 1228 // Function arguments 1229 s += GenArgs(proto, inTypeStr); 1230 1231 // Definition. 1232 if (define) { 1233 s += " __extension__ ({ \\\n "; 1234 s += GenMacroLocals(proto, inTypeStr); 1235 } else if (kind == OpUnavailable) { 1236 s += " __attribute__((unavailable));\n"; 1237 return s; 1238 } else 1239 s += " {\n "; 1240 1241 if (kind != OpNone) 1242 s += GenOpString(kind, proto, outTypeStr); 1243 else 1244 s += GenBuiltin(name, proto, outTypeStr, classKind); 1245 if (define) 1246 s += " })"; 1247 else 1248 s += " }"; 1249 s += "\n"; 1250 return s; 1251 } 1252 1253 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 1254 /// is comprised of type definitions and function declarations. 1255 void NeonEmitter::run(raw_ostream &OS) { 1256 OS << 1257 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" 1258 "---===\n" 1259 " *\n" 1260 " * Permission is hereby granted, free of charge, to any person obtaining " 1261 "a copy\n" 1262 " * of this software and associated documentation files (the \"Software\")," 1263 " to deal\n" 1264 " * in the Software without restriction, including without limitation the " 1265 "rights\n" 1266 " * to use, copy, modify, merge, publish, distribute, sublicense, " 1267 "and/or sell\n" 1268 " * copies of the Software, and to permit persons to whom the Software is\n" 1269 " * furnished to do so, subject to the following conditions:\n" 1270 " *\n" 1271 " * The above copyright notice and this permission notice shall be " 1272 "included in\n" 1273 " * all copies or substantial portions of the Software.\n" 1274 " *\n" 1275 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 1276 "EXPRESS OR\n" 1277 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 1278 "MERCHANTABILITY,\n" 1279 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 1280 "SHALL THE\n" 1281 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 1282 "OTHER\n" 1283 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 1284 "ARISING FROM,\n" 1285 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 1286 "DEALINGS IN\n" 1287 " * THE SOFTWARE.\n" 1288 " *\n" 1289 " *===--------------------------------------------------------------------" 1290 "---===\n" 1291 " */\n\n"; 1292 1293 OS << "#ifndef __ARM_NEON_H\n"; 1294 OS << "#define __ARM_NEON_H\n\n"; 1295 1296 OS << "#ifndef __ARM_NEON__\n"; 1297 OS << "#error \"NEON support not enabled\"\n"; 1298 OS << "#endif\n\n"; 1299 1300 OS << "#include <stdint.h>\n\n"; 1301 1302 // Emit NEON-specific scalar typedefs. 1303 OS << "typedef float float32_t;\n"; 1304 OS << "typedef int8_t poly8_t;\n"; 1305 OS << "typedef int16_t poly16_t;\n"; 1306 OS << "typedef uint16_t float16_t;\n"; 1307 1308 // Emit Neon vector typedefs. 1309 std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs"); 1310 SmallVector<StringRef, 24> TDTypeVec; 1311 ParseTypes(0, TypedefTypes, TDTypeVec); 1312 1313 // Emit vector typedefs. 1314 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 1315 bool dummy, quad = false, poly = false; 1316 (void) ClassifyType(TDTypeVec[i], quad, poly, dummy); 1317 if (poly) 1318 OS << "typedef __attribute__((neon_polyvector_type("; 1319 else 1320 OS << "typedef __attribute__((neon_vector_type("; 1321 1322 unsigned nElts = GetNumElements(TDTypeVec[i], quad); 1323 OS << utostr(nElts) << "))) "; 1324 if (nElts < 10) 1325 OS << " "; 1326 1327 OS << TypeString('s', TDTypeVec[i]); 1328 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; 1329 } 1330 OS << "\n"; 1331 1332 // Emit struct typedefs. 1333 for (unsigned vi = 2; vi != 5; ++vi) { 1334 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 1335 std::string ts = TypeString('d', TDTypeVec[i]); 1336 std::string vs = TypeString('0' + vi, TDTypeVec[i]); 1337 OS << "typedef struct " << vs << " {\n"; 1338 OS << " " << ts << " val"; 1339 OS << "[" << utostr(vi) << "]"; 1340 OS << ";\n} "; 1341 OS << vs << ";\n\n"; 1342 } 1343 } 1344 1345 OS<<"#define __ai static __attribute__((__always_inline__, __nodebug__))\n\n"; 1346 1347 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 1348 1349 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other 1350 // intrinsics. (Some of the saturating multiply instructions are also 1351 // used to implement the corresponding "_lane" variants, but tablegen 1352 // sorts the records into alphabetical order so that the "_lane" variants 1353 // come after the intrinsics they use.) 1354 emitIntrinsic(OS, Records.getDef("VMOVL")); 1355 emitIntrinsic(OS, Records.getDef("VMULL")); 1356 emitIntrinsic(OS, Records.getDef("VABD")); 1357 1358 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1359 Record *R = RV[i]; 1360 if (R->getName() != "VMOVL" && 1361 R->getName() != "VMULL" && 1362 R->getName() != "VABD") 1363 emitIntrinsic(OS, R); 1364 } 1365 1366 OS << "#undef __ai\n\n"; 1367 OS << "#endif /* __ARM_NEON_H */\n"; 1368 } 1369 1370 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the 1371 /// intrinsics specified by record R. 1372 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) { 1373 std::string name = R->getValueAsString("Name"); 1374 std::string Proto = R->getValueAsString("Prototype"); 1375 std::string Types = R->getValueAsString("Types"); 1376 1377 SmallVector<StringRef, 16> TypeVec; 1378 ParseTypes(R, Types, TypeVec); 1379 1380 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 1381 1382 ClassKind classKind = ClassNone; 1383 if (R->getSuperClasses().size() >= 2) 1384 classKind = ClassMap[R->getSuperClasses()[1]]; 1385 if (classKind == ClassNone && kind == OpNone) 1386 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 1387 1388 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1389 if (kind == OpReinterpret) { 1390 bool outQuad = false; 1391 bool dummy = false; 1392 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 1393 for (unsigned srcti = 0, srcte = TypeVec.size(); 1394 srcti != srcte; ++srcti) { 1395 bool inQuad = false; 1396 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 1397 if (srcti == ti || inQuad != outQuad) 1398 continue; 1399 OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], 1400 OpCast, ClassS); 1401 } 1402 } else { 1403 OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], 1404 kind, classKind); 1405 } 1406 } 1407 OS << "\n"; 1408 } 1409 1410 static unsigned RangeFromType(const char mod, StringRef typestr) { 1411 // base type to get the type string for. 1412 bool quad = false, dummy = false; 1413 char type = ClassifyType(typestr, quad, dummy, dummy); 1414 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); 1415 1416 switch (type) { 1417 case 'c': 1418 return (8 << (int)quad) - 1; 1419 case 'h': 1420 case 's': 1421 return (4 << (int)quad) - 1; 1422 case 'f': 1423 case 'i': 1424 return (2 << (int)quad) - 1; 1425 case 'l': 1426 return (1 << (int)quad) - 1; 1427 default: 1428 PrintFatalError("unhandled type!"); 1429 } 1430 } 1431 1432 /// runHeader - Emit a file with sections defining: 1433 /// 1. the NEON section of BuiltinsARM.def. 1434 /// 2. the SemaChecking code for the type overload checking. 1435 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 1436 void NeonEmitter::runHeader(raw_ostream &OS) { 1437 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 1438 1439 StringMap<OpKind> EmittedMap; 1440 1441 // Generate BuiltinsARM.def for NEON 1442 OS << "#ifdef GET_NEON_BUILTINS\n"; 1443 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1444 Record *R = RV[i]; 1445 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 1446 if (k != OpNone) 1447 continue; 1448 1449 std::string Proto = R->getValueAsString("Prototype"); 1450 1451 // Functions with 'a' (the splat code) in the type prototype should not get 1452 // their own builtin as they use the non-splat variant. 1453 if (Proto.find('a') != std::string::npos) 1454 continue; 1455 1456 std::string Types = R->getValueAsString("Types"); 1457 SmallVector<StringRef, 16> TypeVec; 1458 ParseTypes(R, Types, TypeVec); 1459 1460 if (R->getSuperClasses().size() < 2) 1461 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 1462 1463 std::string name = R->getValueAsString("Name"); 1464 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 1465 1466 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1467 // Generate the BuiltinsARM.def declaration for this builtin, ensuring 1468 // that each unique BUILTIN() macro appears only once in the output 1469 // stream. 1470 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); 1471 if (EmittedMap.count(bd)) 1472 continue; 1473 1474 EmittedMap[bd] = OpNone; 1475 OS << bd << "\n"; 1476 } 1477 } 1478 OS << "#endif\n\n"; 1479 1480 // Generate the overloaded type checking code for SemaChecking.cpp 1481 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 1482 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1483 Record *R = RV[i]; 1484 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 1485 if (k != OpNone) 1486 continue; 1487 1488 std::string Proto = R->getValueAsString("Prototype"); 1489 std::string Types = R->getValueAsString("Types"); 1490 std::string name = R->getValueAsString("Name"); 1491 1492 // Functions with 'a' (the splat code) in the type prototype should not get 1493 // their own builtin as they use the non-splat variant. 1494 if (Proto.find('a') != std::string::npos) 1495 continue; 1496 1497 // Functions which have a scalar argument cannot be overloaded, no need to 1498 // check them if we are emitting the type checking code. 1499 if (Proto.find('s') != std::string::npos) 1500 continue; 1501 1502 SmallVector<StringRef, 16> TypeVec; 1503 ParseTypes(R, Types, TypeVec); 1504 1505 if (R->getSuperClasses().size() < 2) 1506 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 1507 1508 int si = -1, qi = -1; 1509 uint64_t mask = 0, qmask = 0; 1510 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1511 // Generate the switch case(s) for this builtin for the type validation. 1512 bool quad = false, poly = false, usgn = false; 1513 (void) ClassifyType(TypeVec[ti], quad, poly, usgn); 1514 1515 if (quad) { 1516 qi = ti; 1517 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 1518 } else { 1519 si = ti; 1520 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 1521 } 1522 } 1523 1524 // Check if the builtin function has a pointer or const pointer argument. 1525 int PtrArgNum = -1; 1526 bool HasConstPtr = false; 1527 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { 1528 char ArgType = Proto[arg]; 1529 if (ArgType == 'c') { 1530 HasConstPtr = true; 1531 PtrArgNum = arg - 1; 1532 break; 1533 } 1534 if (ArgType == 'p') { 1535 PtrArgNum = arg - 1; 1536 break; 1537 } 1538 } 1539 // For sret builtins, adjust the pointer argument index. 1540 if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4')) 1541 PtrArgNum += 1; 1542 1543 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 1544 // and vst1_lane intrinsics. Using a pointer to the vector element 1545 // type with one of those operations causes codegen to select an aligned 1546 // load/store instruction. If you want an unaligned operation, 1547 // the pointer argument needs to have less alignment than element type, 1548 // so just accept any pointer type. 1549 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { 1550 PtrArgNum = -1; 1551 HasConstPtr = false; 1552 } 1553 1554 if (mask) { 1555 OS << "case ARM::BI__builtin_neon_" 1556 << MangleName(name, TypeVec[si], ClassB) 1557 << ": mask = " << "0x" << utohexstr(mask) << "ULL"; 1558 if (PtrArgNum >= 0) 1559 OS << "; PtrArgNum = " << PtrArgNum; 1560 if (HasConstPtr) 1561 OS << "; HasConstPtr = true"; 1562 OS << "; break;\n"; 1563 } 1564 if (qmask) { 1565 OS << "case ARM::BI__builtin_neon_" 1566 << MangleName(name, TypeVec[qi], ClassB) 1567 << ": mask = " << "0x" << utohexstr(qmask) << "ULL"; 1568 if (PtrArgNum >= 0) 1569 OS << "; PtrArgNum = " << PtrArgNum; 1570 if (HasConstPtr) 1571 OS << "; HasConstPtr = true"; 1572 OS << "; break;\n"; 1573 } 1574 } 1575 OS << "#endif\n\n"; 1576 1577 // Generate the intrinsic range checking code for shift/lane immediates. 1578 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 1579 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1580 Record *R = RV[i]; 1581 1582 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 1583 if (k != OpNone) 1584 continue; 1585 1586 std::string name = R->getValueAsString("Name"); 1587 std::string Proto = R->getValueAsString("Prototype"); 1588 std::string Types = R->getValueAsString("Types"); 1589 1590 // Functions with 'a' (the splat code) in the type prototype should not get 1591 // their own builtin as they use the non-splat variant. 1592 if (Proto.find('a') != std::string::npos) 1593 continue; 1594 1595 // Functions which do not have an immediate do not need to have range 1596 // checking code emitted. 1597 size_t immPos = Proto.find('i'); 1598 if (immPos == std::string::npos) 1599 continue; 1600 1601 SmallVector<StringRef, 16> TypeVec; 1602 ParseTypes(R, Types, TypeVec); 1603 1604 if (R->getSuperClasses().size() < 2) 1605 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 1606 1607 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 1608 1609 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1610 std::string namestr, shiftstr, rangestr; 1611 1612 if (R->getValueAsBit("isVCVT_N")) { 1613 // VCVT between floating- and fixed-point values takes an immediate 1614 // in the range 1 to 32. 1615 ck = ClassB; 1616 rangestr = "l = 1; u = 31"; // upper bound = l + u 1617 } else if (Proto.find('s') == std::string::npos) { 1618 // Builtins which are overloaded by type will need to have their upper 1619 // bound computed at Sema time based on the type constant. 1620 ck = ClassB; 1621 if (R->getValueAsBit("isShift")) { 1622 shiftstr = ", true"; 1623 1624 // Right shifts have an 'r' in the name, left shifts do not. 1625 if (name.find('r') != std::string::npos) 1626 rangestr = "l = 1; "; 1627 } 1628 rangestr += "u = RFT(TV" + shiftstr + ")"; 1629 } else { 1630 // The immediate generally refers to a lane in the preceding argument. 1631 assert(immPos > 0 && "unexpected immediate operand"); 1632 rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti])); 1633 } 1634 // Make sure cases appear only once by uniquing them in a string map. 1635 namestr = MangleName(name, TypeVec[ti], ck); 1636 if (EmittedMap.count(namestr)) 1637 continue; 1638 EmittedMap[namestr] = OpNone; 1639 1640 // Calculate the index of the immediate that should be range checked. 1641 unsigned immidx = 0; 1642 1643 // Builtins that return a struct of multiple vectors have an extra 1644 // leading arg for the struct return. 1645 if (Proto[0] >= '2' && Proto[0] <= '4') 1646 ++immidx; 1647 1648 // Add one to the index for each argument until we reach the immediate 1649 // to be checked. Structs of vectors are passed as multiple arguments. 1650 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { 1651 switch (Proto[ii]) { 1652 default: immidx += 1; break; 1653 case '2': immidx += 2; break; 1654 case '3': immidx += 3; break; 1655 case '4': immidx += 4; break; 1656 case 'i': ie = ii + 1; break; 1657 } 1658 } 1659 OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck) 1660 << ": i = " << immidx << "; " << rangestr << "; break;\n"; 1661 } 1662 } 1663 OS << "#endif\n\n"; 1664 } 1665 1666 /// GenTest - Write out a test for the intrinsic specified by the name and 1667 /// type strings, including the embedded patterns for FileCheck to match. 1668 static std::string GenTest(const std::string &name, 1669 const std::string &proto, 1670 StringRef outTypeStr, StringRef inTypeStr, 1671 bool isShift) { 1672 assert(!proto.empty() && ""); 1673 std::string s; 1674 1675 // Function name with type suffix 1676 std::string mangledName = MangleName(name, outTypeStr, ClassS); 1677 if (outTypeStr != inTypeStr) { 1678 // If the input type is different (e.g., for vreinterpret), append a suffix 1679 // for the input type. String off a "Q" (quad) prefix so that MangleName 1680 // does not insert another "q" in the name. 1681 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 1682 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 1683 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 1684 } 1685 1686 // Emit the FileCheck patterns. 1687 s += "// CHECK: test_" + mangledName + "\n"; 1688 // s += "// CHECK: \n"; // FIXME: + expected instruction opcode. 1689 1690 // Emit the start of the test function. 1691 s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; 1692 char arg = 'a'; 1693 std::string comma; 1694 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1695 // Do not create arguments for values that must be immediate constants. 1696 if (proto[i] == 'i') 1697 continue; 1698 s += comma + TypeString(proto[i], inTypeStr) + " "; 1699 s.push_back(arg); 1700 comma = ", "; 1701 } 1702 s += ") {\n "; 1703 1704 if (proto[0] != 'v') 1705 s += "return "; 1706 s += mangledName + "("; 1707 arg = 'a'; 1708 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1709 if (proto[i] == 'i') { 1710 // For immediate operands, test the maximum value. 1711 if (isShift) 1712 s += "1"; // FIXME 1713 else 1714 // The immediate generally refers to a lane in the preceding argument. 1715 s += utostr(RangeFromType(proto[i-1], inTypeStr)); 1716 } else { 1717 s.push_back(arg); 1718 } 1719 if ((i + 1) < e) 1720 s += ", "; 1721 } 1722 s += ");\n}\n\n"; 1723 return s; 1724 } 1725 1726 /// runTests - Write out a complete set of tests for all of the Neon 1727 /// intrinsics. 1728 void NeonEmitter::runTests(raw_ostream &OS) { 1729 OS << 1730 "// RUN: %clang_cc1 -triple thumbv7-apple-darwin \\\n" 1731 "// RUN: -target-cpu cortex-a9 -ffreestanding -S -o - %s | FileCheck %s\n" 1732 "\n" 1733 "#include <arm_neon.h>\n" 1734 "\n"; 1735 1736 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 1737 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 1738 Record *R = RV[i]; 1739 std::string name = R->getValueAsString("Name"); 1740 std::string Proto = R->getValueAsString("Prototype"); 1741 std::string Types = R->getValueAsString("Types"); 1742 bool isShift = R->getValueAsBit("isShift"); 1743 1744 SmallVector<StringRef, 16> TypeVec; 1745 ParseTypes(R, Types, TypeVec); 1746 1747 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 1748 if (kind == OpUnavailable) 1749 continue; 1750 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 1751 if (kind == OpReinterpret) { 1752 bool outQuad = false; 1753 bool dummy = false; 1754 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 1755 for (unsigned srcti = 0, srcte = TypeVec.size(); 1756 srcti != srcte; ++srcti) { 1757 bool inQuad = false; 1758 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 1759 if (srcti == ti || inQuad != outQuad) 1760 continue; 1761 OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], isShift); 1762 } 1763 } else { 1764 OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift); 1765 } 1766 } 1767 OS << "\n"; 1768 } 1769 } 1770 1771 namespace clang { 1772 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 1773 NeonEmitter(Records).run(OS); 1774 } 1775 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 1776 NeonEmitter(Records).runHeader(OS); 1777 } 1778 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 1779 NeonEmitter(Records).runTests(OS); 1780 } 1781 } // End namespace clang 1782