Home | History | Annotate | Download | only in src
      1 //===- subzero/src/IceTargetLowering.cpp - Basic lowering implementation --===//
      2 //
      3 //                        The Subzero Code Generator
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief Implements the skeleton of the TargetLowering class.
     12 ///
     13 /// Specifically this invokes the appropriate lowering method for a given
     14 /// instruction kind and driving global register allocation. It also implements
     15 /// the non-deleted instruction iteration in LoweringContext.
     16 ///
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "IceTargetLowering.h"
     20 
     21 #include "IceBitVector.h"
     22 #include "IceCfg.h" // setError()
     23 #include "IceCfgNode.h"
     24 #include "IceGlobalContext.h"
     25 #include "IceGlobalInits.h"
     26 #include "IceInstVarIter.h"
     27 #include "IceLiveness.h"
     28 #include "IceOperand.h"
     29 #include "IceRegAlloc.h"
     30 
     31 #include <string>
     32 #include <vector>
     33 
     34 #define TARGET_LOWERING_CLASS_FOR(t) Target_##t
     35 
     36 // We prevent target-specific implementation details from leaking outside their
     37 // implementations by forbidding #include of target-specific header files
     38 // anywhere outside their own files. To create target-specific objects
     39 // (TargetLowering, TargetDataLowering, and TargetHeaderLowering) we use the
     40 // following named constructors. For reference, each target Foo needs to
     41 // implement the following named constructors and initializer:
     42 //
     43 // namespace Foo {
     44 //   unique_ptr<Ice::TargetLowering> createTargetLowering(Ice::Cfg *);
     45 //   unique_ptr<Ice::TargetDataLowering>
     46 //       createTargetDataLowering(Ice::GlobalContext*);
     47 //   unique_ptr<Ice::TargetHeaderLowering>
     48 //       createTargetHeaderLowering(Ice::GlobalContext *);
     49 //   void staticInit(::Ice::GlobalContext *);
     50 // }
     51 #define SUBZERO_TARGET(X)                                                      \
     52   namespace X {                                                                \
     53   std::unique_ptr<::Ice::TargetLowering>                                       \
     54   createTargetLowering(::Ice::Cfg *Func);                                      \
     55   std::unique_ptr<::Ice::TargetDataLowering>                                   \
     56   createTargetDataLowering(::Ice::GlobalContext *Ctx);                         \
     57   std::unique_ptr<::Ice::TargetHeaderLowering>                                 \
     58   createTargetHeaderLowering(::Ice::GlobalContext *Ctx);                       \
     59   void staticInit(::Ice::GlobalContext *Ctx);                                  \
     60   bool shouldBePooled(const ::Ice::Constant *C);                               \
     61   ::Ice::Type getPointerType();                                                \
     62   } // end of namespace X
     63 #include "SZTargets.def"
     64 #undef SUBZERO_TARGET
     65 
     66 namespace Ice {
     67 void LoweringContext::init(CfgNode *N) {
     68   Node = N;
     69   End = getNode()->getInsts().end();
     70   rewind();
     71   advanceForward(Next);
     72 }
     73 
     74 void LoweringContext::rewind() {
     75   Begin = getNode()->getInsts().begin();
     76   Cur = Begin;
     77   skipDeleted(Cur);
     78   Next = Cur;
     79   availabilityReset();
     80 }
     81 
     82 void LoweringContext::insert(Inst *Instr) {
     83   getNode()->getInsts().insert(Next, Instr);
     84   LastInserted = Instr;
     85 }
     86 
     87 void LoweringContext::skipDeleted(InstList::iterator &I) const {
     88   while (I != End && I->isDeleted())
     89     ++I;
     90 }
     91 
     92 void LoweringContext::advanceForward(InstList::iterator &I) const {
     93   if (I != End) {
     94     ++I;
     95     skipDeleted(I);
     96   }
     97 }
     98 
     99 Inst *LoweringContext::getLastInserted() const {
    100   assert(LastInserted);
    101   return LastInserted;
    102 }
    103 
    104 void LoweringContext::availabilityReset() {
    105   LastDest = nullptr;
    106   LastSrc = nullptr;
    107 }
    108 
    109 void LoweringContext::availabilityUpdate() {
    110   availabilityReset();
    111   Inst *Instr = LastInserted;
    112   if (Instr == nullptr)
    113     return;
    114   if (!Instr->isVarAssign())
    115     return;
    116   // Since isVarAssign() is true, the source operand must be a Variable.
    117   LastDest = Instr->getDest();
    118   LastSrc = llvm::cast<Variable>(Instr->getSrc(0));
    119 }
    120 
    121 Variable *LoweringContext::availabilityGet(Operand *Src) const {
    122   assert(Src);
    123   if (Src == LastDest)
    124     return LastSrc;
    125   return nullptr;
    126 }
    127 
    128 namespace {
    129 
    130 void printRegisterSet(Ostream &Str, const SmallBitVector &Bitset,
    131                       std::function<std::string(RegNumT)> getRegName,
    132                       const std::string &LineIndentString) {
    133   constexpr size_t RegistersPerLine = 16;
    134   size_t Count = 0;
    135   for (RegNumT RegNum : RegNumBVIter(Bitset)) {
    136     if (Count == 0) {
    137       Str << LineIndentString;
    138     } else {
    139       Str << ",";
    140     }
    141     if (Count > 0 && Count % RegistersPerLine == 0)
    142       Str << "\n" << LineIndentString;
    143     ++Count;
    144     Str << getRegName(RegNum);
    145   }
    146   if (Count)
    147     Str << "\n";
    148 }
    149 
    150 // Splits "<class>:<reg>" into "<class>" plus "<reg>".  If there is no <class>
    151 // component, the result is "" plus "<reg>".
    152 void splitToClassAndName(const std::string &RegName, std::string *SplitRegClass,
    153                          std::string *SplitRegName) {
    154   constexpr const char Separator[] = ":";
    155   constexpr size_t SeparatorWidth = llvm::array_lengthof(Separator) - 1;
    156   size_t Pos = RegName.find(Separator);
    157   if (Pos == std::string::npos) {
    158     *SplitRegClass = "";
    159     *SplitRegName = RegName;
    160   } else {
    161     *SplitRegClass = RegName.substr(0, Pos);
    162     *SplitRegName = RegName.substr(Pos + SeparatorWidth);
    163   }
    164 }
    165 
    166 LLVM_ATTRIBUTE_NORETURN void badTargetFatalError(TargetArch Target) {
    167   llvm::report_fatal_error("Unsupported target: " +
    168                            std::string(targetArchString(Target)));
    169 }
    170 
    171 } // end of anonymous namespace
    172 
    173 void TargetLowering::filterTypeToRegisterSet(
    174     GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[],
    175     size_t TypeToRegisterSetSize,
    176     std::function<std::string(RegNumT)> getRegName,
    177     std::function<const char *(RegClass)> getRegClassName) {
    178   std::vector<SmallBitVector> UseSet(TypeToRegisterSetSize,
    179                                      SmallBitVector(NumRegs));
    180   std::vector<SmallBitVector> ExcludeSet(TypeToRegisterSetSize,
    181                                          SmallBitVector(NumRegs));
    182 
    183   std::unordered_map<std::string, RegNumT> RegNameToIndex;
    184   for (int32_t RegIndex = 0; RegIndex < NumRegs; ++RegIndex) {
    185     const auto RegNum = RegNumT::fromInt(RegIndex);
    186     RegNameToIndex[getRegName(RegNum)] = RegNum;
    187   }
    188 
    189   std::vector<std::string> BadRegNames;
    190 
    191   // The processRegList function iterates across the RegNames vector.  Each
    192   // entry in the vector is a string of the form "<reg>" or "<class>:<reg>".
    193   // The register class and register number are computed, and the corresponding
    194   // bit is set in RegSet[][].  If "<class>:" is missing, then the bit is set
    195   // for all classes.
    196   auto processRegList = [&](const std::vector<std::string> &RegNames,
    197                             std::vector<SmallBitVector> &RegSet) {
    198     for (const std::string &RegClassAndName : RegNames) {
    199       std::string RClass;
    200       std::string RName;
    201       splitToClassAndName(RegClassAndName, &RClass, &RName);
    202       if (!RegNameToIndex.count(RName)) {
    203         BadRegNames.push_back(RName);
    204         continue;
    205       }
    206       const int32_t RegIndex = RegNameToIndex.at(RName);
    207       for (SizeT TypeIndex = 0; TypeIndex < TypeToRegisterSetSize;
    208            ++TypeIndex) {
    209         if (RClass.empty() ||
    210             RClass == getRegClassName(static_cast<RegClass>(TypeIndex))) {
    211           RegSet[TypeIndex][RegIndex] = TypeToRegisterSet[TypeIndex][RegIndex];
    212         }
    213       }
    214     }
    215   };
    216 
    217   processRegList(getFlags().getUseRestrictedRegisters(), UseSet);
    218   processRegList(getFlags().getExcludedRegisters(), ExcludeSet);
    219 
    220   if (!BadRegNames.empty()) {
    221     std::string Buffer;
    222     llvm::raw_string_ostream StrBuf(Buffer);
    223     StrBuf << "Unrecognized use/exclude registers:";
    224     for (const auto &RegName : BadRegNames)
    225       StrBuf << " " << RegName;
    226     llvm::report_fatal_error(StrBuf.str());
    227   }
    228 
    229   // Apply filters.
    230   for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) {
    231     SmallBitVector *TypeBitSet = &TypeToRegisterSet[TypeIndex];
    232     SmallBitVector *UseBitSet = &UseSet[TypeIndex];
    233     SmallBitVector *ExcludeBitSet = &ExcludeSet[TypeIndex];
    234     if (UseBitSet->any())
    235       *TypeBitSet = *UseBitSet;
    236     (*TypeBitSet).reset(*ExcludeBitSet);
    237   }
    238 
    239   // Display filtered register sets, if requested.
    240   if (BuildDefs::dump() && NumRegs &&
    241       (getFlags().getVerbose() & IceV_AvailableRegs)) {
    242     Ostream &Str = Ctx->getStrDump();
    243     const std::string Indent = "  ";
    244     const std::string IndentTwice = Indent + Indent;
    245     Str << "Registers available for register allocation:\n";
    246     for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) {
    247       Str << Indent << getRegClassName(static_cast<RegClass>(TypeIndex))
    248           << ":\n";
    249       printRegisterSet(Str, TypeToRegisterSet[TypeIndex], getRegName,
    250                        IndentTwice);
    251     }
    252     Str << "\n";
    253   }
    254 }
    255 
    256 std::unique_ptr<TargetLowering>
    257 TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
    258   switch (Target) {
    259   default:
    260     badTargetFatalError(Target);
    261 #define SUBZERO_TARGET(X)                                                      \
    262   case TARGET_LOWERING_CLASS_FOR(X):                                           \
    263     return ::X::createTargetLowering(Func);
    264 #include "SZTargets.def"
    265 #undef SUBZERO_TARGET
    266   }
    267 }
    268 
    269 void TargetLowering::staticInit(GlobalContext *Ctx) {
    270   const TargetArch Target = getFlags().getTargetArch();
    271   // Call the specified target's static initializer.
    272   switch (Target) {
    273   default:
    274     badTargetFatalError(Target);
    275 #define SUBZERO_TARGET(X)                                                      \
    276   case TARGET_LOWERING_CLASS_FOR(X): {                                         \
    277     static bool InitGuard##X = false;                                          \
    278     if (InitGuard##X) {                                                        \
    279       return;                                                                  \
    280     }                                                                          \
    281     InitGuard##X = true;                                                       \
    282     ::X::staticInit(Ctx);                                                      \
    283   } break;
    284 #include "SZTargets.def"
    285 #undef SUBZERO_TARGET
    286   }
    287 }
    288 
    289 bool TargetLowering::shouldBePooled(const Constant *C) {
    290   const TargetArch Target = getFlags().getTargetArch();
    291   switch (Target) {
    292   default:
    293     return false;
    294 #define SUBZERO_TARGET(X)                                                      \
    295   case TARGET_LOWERING_CLASS_FOR(X):                                           \
    296     return ::X::shouldBePooled(C);
    297 #include "SZTargets.def"
    298 #undef SUBZERO_TARGET
    299   }
    300 }
    301 
    302 ::Ice::Type TargetLowering::getPointerType() {
    303   const TargetArch Target = getFlags().getTargetArch();
    304   switch (Target) {
    305   default:
    306     return ::Ice::IceType_void;
    307 #define SUBZERO_TARGET(X)                                                      \
    308   case TARGET_LOWERING_CLASS_FOR(X):                                           \
    309     return ::X::getPointerType();
    310 #include "SZTargets.def"
    311 #undef SUBZERO_TARGET
    312   }
    313 }
    314 
    315 TargetLowering::SandboxType
    316 TargetLowering::determineSandboxTypeFromFlags(const ClFlags &Flags) {
    317   assert(!Flags.getUseSandboxing() || !Flags.getUseNonsfi());
    318   if (Flags.getUseNonsfi()) {
    319     return TargetLowering::ST_Nonsfi;
    320   }
    321   if (Flags.getUseSandboxing()) {
    322     return TargetLowering::ST_NaCl;
    323   }
    324   return TargetLowering::ST_None;
    325 }
    326 
    327 TargetLowering::TargetLowering(Cfg *Func)
    328     : Func(Func), Ctx(Func->getContext()),
    329       SandboxingType(determineSandboxTypeFromFlags(getFlags())) {}
    330 
    331 TargetLowering::AutoBundle::AutoBundle(TargetLowering *Target,
    332                                        InstBundleLock::Option Option)
    333     : Target(Target), NeedSandboxing(getFlags().getUseSandboxing()) {
    334   assert(!Target->AutoBundling);
    335   Target->AutoBundling = true;
    336   if (NeedSandboxing) {
    337     Target->_bundle_lock(Option);
    338   }
    339 }
    340 
    341 TargetLowering::AutoBundle::~AutoBundle() {
    342   assert(Target->AutoBundling);
    343   Target->AutoBundling = false;
    344   if (NeedSandboxing) {
    345     Target->_bundle_unlock();
    346   }
    347 }
    348 
    349 void TargetLowering::genTargetHelperCalls() {
    350   TimerMarker T(TimerStack::TT_genHelpers, Func);
    351   Utils::BoolFlagSaver _(GeneratingTargetHelpers, true);
    352   for (CfgNode *Node : Func->getNodes()) {
    353     Context.init(Node);
    354     while (!Context.atEnd()) {
    355       PostIncrLoweringContext _(Context);
    356       genTargetHelperCallFor(iteratorToInst(Context.getCur()));
    357     }
    358   }
    359 }
    360 
    361 void TargetLowering::doAddressOpt() {
    362   doAddressOptOther();
    363   if (llvm::isa<InstLoad>(*Context.getCur()))
    364     doAddressOptLoad();
    365   else if (llvm::isa<InstStore>(*Context.getCur()))
    366     doAddressOptStore();
    367   else if (auto *Intrinsic =
    368                llvm::dyn_cast<InstIntrinsicCall>(&*Context.getCur())) {
    369     if (Intrinsic->getIntrinsicInfo().ID == Intrinsics::LoadSubVector)
    370       doAddressOptLoadSubVector();
    371     else if (Intrinsic->getIntrinsicInfo().ID == Intrinsics::StoreSubVector)
    372       doAddressOptStoreSubVector();
    373   }
    374   Context.advanceCur();
    375   Context.advanceNext();
    376 }
    377 
    378 void TargetLowering::doNopInsertion(RandomNumberGenerator &RNG) {
    379   Inst *I = iteratorToInst(Context.getCur());
    380   bool ShouldSkip = llvm::isa<InstFakeUse>(I) || llvm::isa<InstFakeDef>(I) ||
    381                     llvm::isa<InstFakeKill>(I) || I->isRedundantAssign() ||
    382                     I->isDeleted();
    383   if (!ShouldSkip) {
    384     int Probability = getFlags().getNopProbabilityAsPercentage();
    385     for (int I = 0; I < getFlags().getMaxNopsPerInstruction(); ++I) {
    386       randomlyInsertNop(Probability / 100.0, RNG);
    387     }
    388   }
    389 }
    390 
    391 // Lowers a single instruction according to the information in Context, by
    392 // checking the Context.Cur instruction kind and calling the appropriate
    393 // lowering method. The lowering method should insert target instructions at
    394 // the Cur.Next insertion point, and should not delete the Context.Cur
    395 // instruction or advance Context.Cur.
    396 //
    397 // The lowering method may look ahead in the instruction stream as desired, and
    398 // lower additional instructions in conjunction with the current one, for
    399 // example fusing a compare and branch. If it does, it should advance
    400 // Context.Cur to point to the next non-deleted instruction to process, and it
    401 // should delete any additional instructions it consumes.
    402 void TargetLowering::lower() {
    403   assert(!Context.atEnd());
    404   Inst *Instr = iteratorToInst(Context.getCur());
    405   Instr->deleteIfDead();
    406   if (!Instr->isDeleted() && !llvm::isa<InstFakeDef>(Instr) &&
    407       !llvm::isa<InstFakeUse>(Instr)) {
    408     // Mark the current instruction as deleted before lowering, otherwise the
    409     // Dest variable will likely get marked as non-SSA. See
    410     // Variable::setDefinition(). However, just pass-through FakeDef and
    411     // FakeUse instructions that might have been inserted prior to lowering.
    412     Instr->setDeleted();
    413     switch (Instr->getKind()) {
    414     case Inst::Alloca:
    415       lowerAlloca(llvm::cast<InstAlloca>(Instr));
    416       break;
    417     case Inst::Arithmetic:
    418       lowerArithmetic(llvm::cast<InstArithmetic>(Instr));
    419       break;
    420     case Inst::Assign:
    421       lowerAssign(llvm::cast<InstAssign>(Instr));
    422       break;
    423     case Inst::Br:
    424       lowerBr(llvm::cast<InstBr>(Instr));
    425       break;
    426     case Inst::Breakpoint:
    427       lowerBreakpoint(llvm::cast<InstBreakpoint>(Instr));
    428       break;
    429     case Inst::Call:
    430       lowerCall(llvm::cast<InstCall>(Instr));
    431       break;
    432     case Inst::Cast:
    433       lowerCast(llvm::cast<InstCast>(Instr));
    434       break;
    435     case Inst::ExtractElement:
    436       lowerExtractElement(llvm::cast<InstExtractElement>(Instr));
    437       break;
    438     case Inst::Fcmp:
    439       lowerFcmp(llvm::cast<InstFcmp>(Instr));
    440       break;
    441     case Inst::Icmp:
    442       lowerIcmp(llvm::cast<InstIcmp>(Instr));
    443       break;
    444     case Inst::InsertElement:
    445       lowerInsertElement(llvm::cast<InstInsertElement>(Instr));
    446       break;
    447     case Inst::IntrinsicCall: {
    448       auto *Call = llvm::cast<InstIntrinsicCall>(Instr);
    449       if (Call->getIntrinsicInfo().ReturnsTwice)
    450         setCallsReturnsTwice(true);
    451       lowerIntrinsicCall(Call);
    452       break;
    453     }
    454     case Inst::Load:
    455       lowerLoad(llvm::cast<InstLoad>(Instr));
    456       break;
    457     case Inst::Phi:
    458       lowerPhi(llvm::cast<InstPhi>(Instr));
    459       break;
    460     case Inst::Ret:
    461       lowerRet(llvm::cast<InstRet>(Instr));
    462       break;
    463     case Inst::Select:
    464       lowerSelect(llvm::cast<InstSelect>(Instr));
    465       break;
    466     case Inst::ShuffleVector:
    467       lowerShuffleVector(llvm::cast<InstShuffleVector>(Instr));
    468       break;
    469     case Inst::Store:
    470       lowerStore(llvm::cast<InstStore>(Instr));
    471       break;
    472     case Inst::Switch:
    473       lowerSwitch(llvm::cast<InstSwitch>(Instr));
    474       break;
    475     case Inst::Unreachable:
    476       lowerUnreachable(llvm::cast<InstUnreachable>(Instr));
    477       break;
    478     default:
    479       lowerOther(Instr);
    480       break;
    481     }
    482 
    483     postLower();
    484   }
    485 
    486   Context.advanceCur();
    487   Context.advanceNext();
    488 }
    489 
    490 void TargetLowering::lowerInst(CfgNode *Node, InstList::iterator Next,
    491                                InstHighLevel *Instr) {
    492   // TODO(stichnot): Consider modifying the design/implementation to avoid
    493   // multiple init() calls when using lowerInst() to lower several instructions
    494   // in the same node.
    495   Context.init(Node);
    496   Context.setNext(Next);
    497   Context.insert(Instr);
    498   --Next;
    499   assert(iteratorToInst(Next) == Instr);
    500   Context.setCur(Next);
    501   lower();
    502 }
    503 
    504 void TargetLowering::lowerOther(const Inst *Instr) {
    505   (void)Instr;
    506   Func->setError("Can't lower unsupported instruction type");
    507 }
    508 
    509 // Drives register allocation, allowing all physical registers (except perhaps
    510 // for the frame pointer) to be allocated. This set of registers could
    511 // potentially be parameterized if we want to restrict registers e.g. for
    512 // performance testing.
    513 void TargetLowering::regAlloc(RegAllocKind Kind) {
    514   TimerMarker T(TimerStack::TT_regAlloc, Func);
    515   LinearScan LinearScan(Func);
    516   RegSetMask RegInclude = RegSet_None;
    517   RegSetMask RegExclude = RegSet_None;
    518   RegInclude |= RegSet_CallerSave;
    519   RegInclude |= RegSet_CalleeSave;
    520   if (hasFramePointer())
    521     RegExclude |= RegSet_FramePointer;
    522   SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
    523   bool Repeat = (Kind == RAK_Global && getFlags().getRepeatRegAlloc());
    524   CfgSet<Variable *> EmptySet;
    525   do {
    526     LinearScan.init(Kind, EmptySet);
    527     LinearScan.scan(RegMask, getFlags().getRandomizeRegisterAllocation());
    528     if (!LinearScan.hasEvictions())
    529       Repeat = false;
    530     Kind = RAK_SecondChance;
    531   } while (Repeat);
    532   // TODO(stichnot): Run the register allocator one more time to do stack slot
    533   // coalescing.  The idea would be to initialize the Unhandled list with the
    534   // set of Variables that have no register and a non-empty live range, and
    535   // model an infinite number of registers.  Maybe use the register aliasing
    536   // mechanism to get better packing of narrower slots.
    537   if (getFlags().getSplitGlobalVars())
    538     postRegallocSplitting(RegMask);
    539 }
    540 
    541 namespace {
    542 CfgVector<Inst *> getInstructionsInRange(CfgNode *Node, InstNumberT Start,
    543                                          InstNumberT End) {
    544   CfgVector<Inst *> Result;
    545   bool Started = false;
    546   auto Process = [&](InstList &Insts) {
    547     for (auto &Instr : Insts) {
    548       if (Instr.isDeleted()) {
    549         continue;
    550       }
    551       if (Instr.getNumber() == Start) {
    552         Started = true;
    553       }
    554       if (Started) {
    555         Result.emplace_back(&Instr);
    556       }
    557       if (Instr.getNumber() == End) {
    558         break;
    559       }
    560     }
    561   };
    562   Process(Node->getPhis());
    563   Process(Node->getInsts());
    564   // TODO(manasijm): Investigate why checking >= End significantly changes
    565   // output. Should not happen when renumbering produces monotonically
    566   // increasing instruction numbers and live ranges begin and end on non-deleted
    567   // instructions.
    568   return Result;
    569 }
    570 }
    571 
    572 void TargetLowering::postRegallocSplitting(const SmallBitVector &RegMask) {
    573   // Splits the live ranges of global(/multi block) variables and runs the
    574   // register allocator to find registers for as many of the new variables as
    575   // possible.
    576   // TODO(manasijm): Merge the small liveranges back into multi-block ones when
    577   // the variables get the same register. This will reduce the amount of new
    578   // instructions inserted. This might involve a full dataflow analysis.
    579   // Also, modify the preference mechanism in the register allocator to match.
    580 
    581   TimerMarker _(TimerStack::TT_splitGlobalVars, Func);
    582   CfgSet<Variable *> SplitCandidates;
    583 
    584   // Find variables that do not have registers but are allowed to. Also skip
    585   // variables with single segment live ranges as they are not split further in
    586   // this function.
    587   for (Variable *Var : Func->getVariables()) {
    588     if (!Var->mustNotHaveReg() && !Var->hasReg()) {
    589       if (Var->getLiveRange().getNumSegments() > 1)
    590         SplitCandidates.insert(Var);
    591     }
    592   }
    593   if (SplitCandidates.empty())
    594     return;
    595 
    596   CfgSet<Variable *> ExtraVars;
    597 
    598   struct UseInfo {
    599     Variable *Replacing = nullptr;
    600     Inst *FirstUse = nullptr;
    601     Inst *LastDef = nullptr;
    602     SizeT UseCount = 0;
    603   };
    604   CfgUnorderedMap<Variable *, UseInfo> VarInfo;
    605   // Split the live ranges of the viable variables by node.
    606   // Compute metadata (UseInfo) for each of the resulting variables.
    607   for (auto *Var : SplitCandidates) {
    608     for (auto &Segment : Var->getLiveRange().getSegments()) {
    609       UseInfo Info;
    610       Info.Replacing = Var;
    611       auto *Node = Var->getLiveRange().getNodeForSegment(Segment.first);
    612 
    613       for (auto *Instr :
    614            getInstructionsInRange(Node, Segment.first, Segment.second)) {
    615         for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
    616           // It's safe to iterate over the top-level src operands rather than
    617           // using FOREACH_VAR_IN_INST(), because any variables inside e.g.
    618           // mem operands should already have registers.
    619           if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) {
    620             if (Var == Info.Replacing) {
    621               if (Info.FirstUse == nullptr && !llvm::isa<InstPhi>(Instr)) {
    622                 Info.FirstUse = Instr;
    623               }
    624               Info.UseCount++;
    625             }
    626           }
    627         }
    628         if (Instr->getDest() == Info.Replacing && !llvm::isa<InstPhi>(Instr)) {
    629           Info.LastDef = Instr;
    630         }
    631       }
    632 
    633       static constexpr SizeT MinUseThreshold = 3;
    634       // Skip if variable has less than `MinUseThreshold` uses in the segment.
    635       if (Info.UseCount < MinUseThreshold)
    636         continue;
    637 
    638       if (!Info.FirstUse && !Info.LastDef) {
    639         continue;
    640       }
    641 
    642       LiveRange LR;
    643       LR.addSegment(Segment);
    644       Variable *NewVar = Func->makeVariable(Var->getType());
    645 
    646       NewVar->setLiveRange(LR);
    647 
    648       VarInfo[NewVar] = Info;
    649 
    650       ExtraVars.insert(NewVar);
    651     }
    652   }
    653   // Run the register allocator with all these new variables included
    654   LinearScan RegAlloc(Func);
    655   RegAlloc.init(RAK_Global, SplitCandidates);
    656   RegAlloc.scan(RegMask, getFlags().getRandomizeRegisterAllocation());
    657 
    658   // Modify the Cfg to use the new variables that now have registers.
    659   for (auto *ExtraVar : ExtraVars) {
    660     if (!ExtraVar->hasReg()) {
    661       continue;
    662     }
    663 
    664     auto &Info = VarInfo[ExtraVar];
    665 
    666     assert(ExtraVar->getLiveRange().getSegments().size() == 1);
    667     auto Segment = ExtraVar->getLiveRange().getSegments()[0];
    668 
    669     auto *Node =
    670         Info.Replacing->getLiveRange().getNodeForSegment(Segment.first);
    671 
    672     auto RelevantInsts =
    673         getInstructionsInRange(Node, Segment.first, Segment.second);
    674 
    675     if (RelevantInsts.empty())
    676       continue;
    677 
    678     // Replace old variables
    679     for (auto *Instr : RelevantInsts) {
    680       if (llvm::isa<InstPhi>(Instr))
    681         continue;
    682       // TODO(manasijm): Figure out how to safely enable replacing phi dest
    683       // variables. The issue is that we can not insert low level mov
    684       // instructions into the PhiList.
    685       for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
    686         // FOREACH_VAR_IN_INST() not needed. Same logic as above.
    687         if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) {
    688           if (Var == Info.Replacing) {
    689             Instr->replaceSource(i, ExtraVar);
    690           }
    691         }
    692       }
    693       if (Instr->getDest() == Info.Replacing) {
    694         Instr->replaceDest(ExtraVar);
    695       }
    696     }
    697 
    698     assert(Info.FirstUse != Info.LastDef);
    699     assert(Info.FirstUse || Info.LastDef);
    700 
    701     // Insert spill code
    702     if (Info.FirstUse != nullptr) {
    703       auto *NewInst =
    704           Func->getTarget()->createLoweredMove(ExtraVar, Info.Replacing);
    705       Node->getInsts().insert(instToIterator(Info.FirstUse), NewInst);
    706     }
    707     if (Info.LastDef != nullptr) {
    708       auto *NewInst =
    709           Func->getTarget()->createLoweredMove(Info.Replacing, ExtraVar);
    710       Node->getInsts().insertAfter(instToIterator(Info.LastDef), NewInst);
    711     }
    712   }
    713 }
    714 
    715 void TargetLowering::markRedefinitions() {
    716   // Find (non-SSA) instructions where the Dest variable appears in some source
    717   // operand, and set the IsDestRedefined flag to keep liveness analysis
    718   // consistent.
    719   for (auto Instr = Context.getCur(), E = Context.getNext(); Instr != E;
    720        ++Instr) {
    721     if (Instr->isDeleted())
    722       continue;
    723     Variable *Dest = Instr->getDest();
    724     if (Dest == nullptr)
    725       continue;
    726     FOREACH_VAR_IN_INST(Var, *Instr) {
    727       if (Var == Dest) {
    728         Instr->setDestRedefined();
    729         break;
    730       }
    731     }
    732   }
    733 }
    734 
    735 void TargetLowering::addFakeDefUses(const Inst *Instr) {
    736   FOREACH_VAR_IN_INST(Var, *Instr) {
    737     if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Var)) {
    738       Context.insert<InstFakeUse>(Var64->getLo());
    739       Context.insert<InstFakeUse>(Var64->getHi());
    740     } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Var)) {
    741       for (Variable *Var : VarVec->getContainers()) {
    742         Context.insert<InstFakeUse>(Var);
    743       }
    744     } else {
    745       Context.insert<InstFakeUse>(Var);
    746     }
    747   }
    748   Variable *Dest = Instr->getDest();
    749   if (Dest == nullptr)
    750     return;
    751   if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Dest)) {
    752     Context.insert<InstFakeDef>(Var64->getLo());
    753     Context.insert<InstFakeDef>(Var64->getHi());
    754   } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Dest)) {
    755     for (Variable *Var : VarVec->getContainers()) {
    756       Context.insert<InstFakeDef>(Var);
    757     }
    758   } else {
    759     Context.insert<InstFakeDef>(Dest);
    760   }
    761 }
    762 
    763 void TargetLowering::sortVarsByAlignment(VarList &Dest,
    764                                          const VarList &Source) const {
    765   Dest = Source;
    766   // Instead of std::sort, we could do a bucket sort with log2(alignment) as
    767   // the buckets, if performance is an issue.
    768   std::sort(Dest.begin(), Dest.end(),
    769             [this](const Variable *V1, const Variable *V2) {
    770               const size_t WidthV1 = typeWidthInBytesOnStack(V1->getType());
    771               const size_t WidthV2 = typeWidthInBytesOnStack(V2->getType());
    772               if (WidthV1 == WidthV2)
    773                 return V1->getIndex() < V2->getIndex();
    774               return WidthV1 > WidthV2;
    775             });
    776 }
    777 
    778 void TargetLowering::getVarStackSlotParams(
    779     VarList &SortedSpilledVariables, SmallBitVector &RegsUsed,
    780     size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
    781     uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes,
    782     std::function<bool(Variable *)> TargetVarHook) {
    783   const VariablesMetadata *VMetadata = Func->getVMetadata();
    784   BitVector IsVarReferenced(Func->getNumVariables());
    785   for (CfgNode *Node : Func->getNodes()) {
    786     for (Inst &Instr : Node->getInsts()) {
    787       if (Instr.isDeleted())
    788         continue;
    789       if (const Variable *Var = Instr.getDest())
    790         IsVarReferenced[Var->getIndex()] = true;
    791       FOREACH_VAR_IN_INST(Var, Instr) {
    792         IsVarReferenced[Var->getIndex()] = true;
    793       }
    794     }
    795   }
    796 
    797   // If SimpleCoalescing is false, each variable without a register gets its
    798   // own unique stack slot, which leads to large stack frames. If
    799   // SimpleCoalescing is true, then each "global" variable without a register
    800   // gets its own slot, but "local" variable slots are reused across basic
    801   // blocks. E.g., if A and B are local to block 1 and C is local to block 2,
    802   // then C may share a slot with A or B.
    803   //
    804   // We cannot coalesce stack slots if this function calls a "returns twice"
    805   // function. In that case, basic blocks may be revisited, and variables local
    806   // to those basic blocks are actually live until after the called function
    807   // returns a second time.
    808   const bool SimpleCoalescing = !callsReturnsTwice();
    809 
    810   CfgVector<size_t> LocalsSize(Func->getNumNodes());
    811   const VarList &Variables = Func->getVariables();
    812   VarList SpilledVariables;
    813   for (Variable *Var : Variables) {
    814     if (Var->hasReg()) {
    815       // Don't consider a rematerializable variable to be an actual register use
    816       // (specifically of the frame pointer).  Otherwise, the prolog may decide
    817       // to save the frame pointer twice - once because of the explicit need for
    818       // a frame pointer, and once because of an active use of a callee-save
    819       // register.
    820       if (!Var->isRematerializable())
    821         RegsUsed[Var->getRegNum()] = true;
    822       continue;
    823     }
    824     // An argument either does not need a stack slot (if passed in a register)
    825     // or already has one (if passed on the stack).
    826     if (Var->getIsArg()) {
    827       if (!Var->hasReg()) {
    828         assert(!Var->hasStackOffset());
    829         Var->setHasStackOffset();
    830       }
    831       continue;
    832     }
    833     // An unreferenced variable doesn't need a stack slot.
    834     if (!IsVarReferenced[Var->getIndex()])
    835       continue;
    836     // Check a target-specific variable (it may end up sharing stack slots) and
    837     // not need accounting here.
    838     if (TargetVarHook(Var))
    839       continue;
    840     assert(!Var->hasStackOffset());
    841     Var->setHasStackOffset();
    842     SpilledVariables.push_back(Var);
    843   }
    844 
    845   SortedSpilledVariables.reserve(SpilledVariables.size());
    846   sortVarsByAlignment(SortedSpilledVariables, SpilledVariables);
    847 
    848   for (Variable *Var : SortedSpilledVariables) {
    849     size_t Increment = typeWidthInBytesOnStack(Var->getType());
    850     // We have sorted by alignment, so the first variable we encounter that is
    851     // located in each area determines the max alignment for the area.
    852     if (!*SpillAreaAlignmentBytes)
    853       *SpillAreaAlignmentBytes = Increment;
    854     if (SimpleCoalescing && VMetadata->isTracked(Var)) {
    855       if (VMetadata->isMultiBlock(Var)) {
    856         *GlobalsSize += Increment;
    857       } else {
    858         SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
    859         LocalsSize[NodeIndex] += Increment;
    860         if (LocalsSize[NodeIndex] > *SpillAreaSizeBytes)
    861           *SpillAreaSizeBytes = LocalsSize[NodeIndex];
    862         if (!*LocalsSlotsAlignmentBytes)
    863           *LocalsSlotsAlignmentBytes = Increment;
    864       }
    865     } else {
    866       *SpillAreaSizeBytes += Increment;
    867     }
    868   }
    869   // For testing legalization of large stack offsets on targets with limited
    870   // offset bits in instruction encodings, add some padding.
    871   *SpillAreaSizeBytes += getFlags().getTestStackExtra();
    872 }
    873 
    874 void TargetLowering::alignStackSpillAreas(uint32_t SpillAreaStartOffset,
    875                                           uint32_t SpillAreaAlignmentBytes,
    876                                           size_t GlobalsSize,
    877                                           uint32_t LocalsSlotsAlignmentBytes,
    878                                           uint32_t *SpillAreaPaddingBytes,
    879                                           uint32_t *LocalsSlotsPaddingBytes) {
    880   if (SpillAreaAlignmentBytes) {
    881     uint32_t PaddingStart = SpillAreaStartOffset;
    882     uint32_t SpillAreaStart =
    883         Utils::applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
    884     *SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
    885   }
    886 
    887   // If there are separate globals and locals areas, make sure the locals area
    888   // is aligned by padding the end of the globals area.
    889   if (LocalsSlotsAlignmentBytes) {
    890     uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
    891     GlobalsAndSubsequentPaddingSize =
    892         Utils::applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
    893     *LocalsSlotsPaddingBytes = GlobalsAndSubsequentPaddingSize - GlobalsSize;
    894   }
    895 }
    896 
    897 void TargetLowering::assignVarStackSlots(VarList &SortedSpilledVariables,
    898                                          size_t SpillAreaPaddingBytes,
    899                                          size_t SpillAreaSizeBytes,
    900                                          size_t GlobalsAndSubsequentPaddingSize,
    901                                          bool UsesFramePointer) {
    902   const VariablesMetadata *VMetadata = Func->getVMetadata();
    903   // For testing legalization of large stack offsets on targets with limited
    904   // offset bits in instruction encodings, add some padding. This assumes that
    905   // SpillAreaSizeBytes has accounted for the extra test padding. When
    906   // UseFramePointer is true, the offset depends on the padding, not just the
    907   // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the
    908   // offsets depend on the gap between SpillAreaSizeBytes and
    909   // SpillAreaPaddingBytes, so we don't increment that.
    910   size_t TestPadding = getFlags().getTestStackExtra();
    911   if (UsesFramePointer)
    912     SpillAreaPaddingBytes += TestPadding;
    913   size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
    914   size_t NextStackOffset = SpillAreaPaddingBytes;
    915   CfgVector<size_t> LocalsSize(Func->getNumNodes());
    916   const bool SimpleCoalescing = !callsReturnsTwice();
    917 
    918   for (Variable *Var : SortedSpilledVariables) {
    919     size_t Increment = typeWidthInBytesOnStack(Var->getType());
    920     if (SimpleCoalescing && VMetadata->isTracked(Var)) {
    921       if (VMetadata->isMultiBlock(Var)) {
    922         GlobalsSpaceUsed += Increment;
    923         NextStackOffset = GlobalsSpaceUsed;
    924       } else {
    925         SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
    926         LocalsSize[NodeIndex] += Increment;
    927         NextStackOffset = SpillAreaPaddingBytes +
    928                           GlobalsAndSubsequentPaddingSize +
    929                           LocalsSize[NodeIndex];
    930       }
    931     } else {
    932       NextStackOffset += Increment;
    933     }
    934     if (UsesFramePointer)
    935       Var->setStackOffset(-NextStackOffset);
    936     else
    937       Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
    938   }
    939 }
    940 
    941 InstCall *TargetLowering::makeHelperCall(RuntimeHelper FuncID, Variable *Dest,
    942                                          SizeT MaxSrcs) {
    943   constexpr bool HasTailCall = false;
    944   Constant *CallTarget = Ctx->getRuntimeHelperFunc(FuncID);
    945   InstCall *Call =
    946       InstCall::create(Func, MaxSrcs, Dest, CallTarget, HasTailCall);
    947   return Call;
    948 }
    949 
    950 bool TargetLowering::shouldOptimizeMemIntrins() {
    951   return Func->getOptLevel() >= Opt_1 || getFlags().getForceMemIntrinOpt();
    952 }
    953 
    954 void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind,
    955                                          Variable *Dest, Operand *Src0,
    956                                          Operand *Src1) {
    957   scalarizeInstruction(
    958       Dest, [this, Kind](Variable *Dest, Operand *Src0, Operand *Src1) {
    959         return Context.insert<InstArithmetic>(Kind, Dest, Src0, Src1);
    960       }, Src0, Src1);
    961 }
    962 
    963 void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C,
    964                                        const char *Suffix) const {
    965   if (!BuildDefs::dump())
    966     return;
    967   Ostream &Str = Ctx->getStrEmit();
    968   const std::string &EmitStr = C->getEmitString();
    969   if (!EmitStr.empty()) {
    970     // C has a custom emit string, so we use it instead of the canonical
    971     // Name + Offset form.
    972     Str << EmitStr;
    973     return;
    974   }
    975   Str << C->getName() << Suffix;
    976   RelocOffsetT Offset = C->getOffset();
    977   if (Offset) {
    978     if (Offset > 0)
    979       Str << "+";
    980     Str << Offset;
    981   }
    982 }
    983 
    984 std::unique_ptr<TargetDataLowering>
    985 TargetDataLowering::createLowering(GlobalContext *Ctx) {
    986   TargetArch Target = getFlags().getTargetArch();
    987   switch (Target) {
    988   default:
    989     badTargetFatalError(Target);
    990 #define SUBZERO_TARGET(X)                                                      \
    991   case TARGET_LOWERING_CLASS_FOR(X):                                           \
    992     return ::X::createTargetDataLowering(Ctx);
    993 #include "SZTargets.def"
    994 #undef SUBZERO_TARGET
    995   }
    996 }
    997 
    998 TargetDataLowering::~TargetDataLowering() = default;
    999 
   1000 namespace {
   1001 
   1002 // dataSectionSuffix decides whether to use SectionSuffix or VarName as data
   1003 // section suffix. Essentially, when using separate data sections for globals
   1004 // SectionSuffix is not necessary.
   1005 std::string dataSectionSuffix(const std::string &SectionSuffix,
   1006                               const std::string &VarName,
   1007                               const bool DataSections) {
   1008   if (SectionSuffix.empty() && !DataSections) {
   1009     return "";
   1010   }
   1011 
   1012   if (DataSections) {
   1013     // With data sections we don't need to use the SectionSuffix.
   1014     return "." + VarName;
   1015   }
   1016 
   1017   assert(!SectionSuffix.empty());
   1018   return "." + SectionSuffix;
   1019 }
   1020 
   1021 } // end of anonymous namespace
   1022 
   1023 void TargetDataLowering::emitGlobal(const VariableDeclaration &Var,
   1024                                     const std::string &SectionSuffix) {
   1025   if (!BuildDefs::dump())
   1026     return;
   1027 
   1028   // If external and not initialized, this must be a cross test. Don't generate
   1029   // a declaration for such cases.
   1030   const bool IsExternal = Var.isExternal() || getFlags().getDisableInternal();
   1031   if (IsExternal && !Var.hasInitializer())
   1032     return;
   1033 
   1034   Ostream &Str = Ctx->getStrEmit();
   1035   const bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
   1036   const bool IsConstant = Var.getIsConstant();
   1037   const SizeT Size = Var.getNumBytes();
   1038   const std::string Name = Var.getName().toString();
   1039 
   1040   Str << "\t.type\t" << Name << ",%object\n";
   1041 
   1042   const bool UseDataSections = getFlags().getDataSections();
   1043   const bool UseNonsfi = getFlags().getUseNonsfi();
   1044   const std::string Suffix =
   1045       dataSectionSuffix(SectionSuffix, Name, UseDataSections);
   1046   if (IsConstant && UseNonsfi)
   1047     Str << "\t.section\t.data.rel.ro" << Suffix << ",\"aw\",%progbits\n";
   1048   else if (IsConstant)
   1049     Str << "\t.section\t.rodata" << Suffix << ",\"a\",%progbits\n";
   1050   else if (HasNonzeroInitializer)
   1051     Str << "\t.section\t.data" << Suffix << ",\"aw\",%progbits\n";
   1052   else
   1053     Str << "\t.section\t.bss" << Suffix << ",\"aw\",%nobits\n";
   1054 
   1055   if (IsExternal)
   1056     Str << "\t.globl\t" << Name << "\n";
   1057 
   1058   const uint32_t Align = Var.getAlignment();
   1059   if (Align > 1) {
   1060     assert(llvm::isPowerOf2_32(Align));
   1061     // Use the .p2align directive, since the .align N directive can either
   1062     // interpret N as bytes, or power of 2 bytes, depending on the target.
   1063     Str << "\t.p2align\t" << llvm::Log2_32(Align) << "\n";
   1064   }
   1065 
   1066   Str << Name << ":\n";
   1067 
   1068   if (HasNonzeroInitializer) {
   1069     for (const auto *Init : Var.getInitializers()) {
   1070       switch (Init->getKind()) {
   1071       case VariableDeclaration::Initializer::DataInitializerKind: {
   1072         const auto &Data =
   1073             llvm::cast<VariableDeclaration::DataInitializer>(Init)
   1074                 ->getContents();
   1075         for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
   1076           Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
   1077         }
   1078         break;
   1079       }
   1080       case VariableDeclaration::Initializer::ZeroInitializerKind:
   1081         Str << "\t.zero\t" << Init->getNumBytes() << "\n";
   1082         break;
   1083       case VariableDeclaration::Initializer::RelocInitializerKind: {
   1084         const auto *Reloc =
   1085             llvm::cast<VariableDeclaration::RelocInitializer>(Init);
   1086         Str << "\t" << getEmit32Directive() << "\t";
   1087         Str << Reloc->getDeclaration()->getName();
   1088         if (Reloc->hasFixup()) {
   1089           // TODO(jpp): this is ARM32 specific.
   1090           Str << "(GOTOFF)";
   1091         }
   1092         if (RelocOffsetT Offset = Reloc->getOffset()) {
   1093           if (Offset >= 0 || (Offset == INT32_MIN))
   1094             Str << " + " << Offset;
   1095           else
   1096             Str << " - " << -Offset;
   1097         }
   1098         Str << "\n";
   1099         break;
   1100       }
   1101       }
   1102     }
   1103   } else {
   1104     // NOTE: for non-constant zero initializers, this is BSS (no bits), so an
   1105     // ELF writer would not write to the file, and only track virtual offsets,
   1106     // but the .s writer still needs this .zero and cannot simply use the .size
   1107     // to advance offsets.
   1108     Str << "\t.zero\t" << Size << "\n";
   1109   }
   1110 
   1111   Str << "\t.size\t" << Name << ", " << Size << "\n";
   1112 }
   1113 
   1114 std::unique_ptr<TargetHeaderLowering>
   1115 TargetHeaderLowering::createLowering(GlobalContext *Ctx) {
   1116   TargetArch Target = getFlags().getTargetArch();
   1117   switch (Target) {
   1118   default:
   1119     badTargetFatalError(Target);
   1120 #define SUBZERO_TARGET(X)                                                      \
   1121   case TARGET_LOWERING_CLASS_FOR(X):                                           \
   1122     return ::X::createTargetHeaderLowering(Ctx);
   1123 #include "SZTargets.def"
   1124 #undef SUBZERO_TARGET
   1125   }
   1126 }
   1127 
   1128 TargetHeaderLowering::~TargetHeaderLowering() = default;
   1129 
   1130 } // end of namespace Ice
   1131