Home | History | Annotate | Download | only in PowerPC
      1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines a pattern matching instruction selector for PowerPC,
     11 // converting from a legalized dag to a PPC dag.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "PPC.h"
     16 #include "MCTargetDesc/PPCPredicates.h"
     17 #include "PPCMachineFunctionInfo.h"
     18 #include "PPCTargetMachine.h"
     19 #include "llvm/Analysis/BranchProbabilityInfo.h"
     20 #include "llvm/CodeGen/FunctionLoweringInfo.h"
     21 #include "llvm/CodeGen/MachineFunction.h"
     22 #include "llvm/CodeGen/MachineInstrBuilder.h"
     23 #include "llvm/CodeGen/MachineRegisterInfo.h"
     24 #include "llvm/CodeGen/SelectionDAG.h"
     25 #include "llvm/CodeGen/SelectionDAGISel.h"
     26 #include "llvm/IR/Constants.h"
     27 #include "llvm/IR/Function.h"
     28 #include "llvm/IR/GlobalAlias.h"
     29 #include "llvm/IR/GlobalValue.h"
     30 #include "llvm/IR/GlobalVariable.h"
     31 #include "llvm/IR/Intrinsics.h"
     32 #include "llvm/IR/Module.h"
     33 #include "llvm/Support/CommandLine.h"
     34 #include "llvm/Support/Debug.h"
     35 #include "llvm/Support/ErrorHandling.h"
     36 #include "llvm/Support/MathExtras.h"
     37 #include "llvm/Support/raw_ostream.h"
     38 #include "llvm/Target/TargetOptions.h"
     39 using namespace llvm;
     40 
     41 #define DEBUG_TYPE "ppc-codegen"
     42 
     43 // FIXME: Remove this once the bug has been fixed!
     44 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
     45 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
     46 
     47 static cl::opt<bool>
     48     UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
     49                        cl::desc("use aggressive ppc isel for bit permutations"),
     50                        cl::Hidden);
     51 static cl::opt<bool> BPermRewriterNoMasking(
     52     "ppc-bit-perm-rewriter-stress-rotates",
     53     cl::desc("stress rotate selection in aggressive ppc isel for "
     54              "bit permutations"),
     55     cl::Hidden);
     56 
     57 static cl::opt<bool> EnableBranchHint(
     58   "ppc-use-branch-hint", cl::init(true),
     59     cl::desc("Enable static hinting of branches on ppc"),
     60     cl::Hidden);
     61 
     62 namespace {
     63   //===--------------------------------------------------------------------===//
     64   /// PPCDAGToDAGISel - PPC specific code to select PPC machine
     65   /// instructions for SelectionDAG operations.
     66   ///
     67   class PPCDAGToDAGISel : public SelectionDAGISel {
     68     const PPCTargetMachine &TM;
     69     const PPCSubtarget *PPCSubTarget;
     70     const PPCTargetLowering *PPCLowering;
     71     unsigned GlobalBaseReg;
     72   public:
     73     explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
     74         : SelectionDAGISel(tm), TM(tm) {}
     75 
     76     bool runOnMachineFunction(MachineFunction &MF) override {
     77       // Make sure we re-emit a set of the global base reg if necessary
     78       GlobalBaseReg = 0;
     79       PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
     80       PPCLowering = PPCSubTarget->getTargetLowering();
     81       SelectionDAGISel::runOnMachineFunction(MF);
     82 
     83       if (!PPCSubTarget->isSVR4ABI())
     84         InsertVRSaveCode(MF);
     85 
     86       return true;
     87     }
     88 
     89     void PreprocessISelDAG() override;
     90     void PostprocessISelDAG() override;
     91 
     92     /// getI32Imm - Return a target constant with the specified value, of type
     93     /// i32.
     94     inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
     95       return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
     96     }
     97 
     98     /// getI64Imm - Return a target constant with the specified value, of type
     99     /// i64.
    100     inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
    101       return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
    102     }
    103 
    104     /// getSmallIPtrImm - Return a target constant of pointer type.
    105     inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
    106       return CurDAG->getTargetConstant(
    107           Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
    108     }
    109 
    110     /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
    111     /// rotate and mask opcode and mask operation.
    112     static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
    113                                 unsigned &SH, unsigned &MB, unsigned &ME);
    114 
    115     /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
    116     /// base register.  Return the virtual register that holds this value.
    117     SDNode *getGlobalBaseReg();
    118 
    119     void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
    120 
    121     // Select - Convert the specified operand from a target-independent to a
    122     // target-specific node if it hasn't already been changed.
    123     void Select(SDNode *N) override;
    124 
    125     bool tryBitfieldInsert(SDNode *N);
    126     bool tryBitPermutation(SDNode *N);
    127 
    128     /// SelectCC - Select a comparison of the specified values with the
    129     /// specified condition code, returning the CR# of the expression.
    130     SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
    131                      const SDLoc &dl);
    132 
    133     /// SelectAddrImm - Returns true if the address N can be represented by
    134     /// a base register plus a signed 16-bit displacement [r+imm].
    135     bool SelectAddrImm(SDValue N, SDValue &Disp,
    136                        SDValue &Base) {
    137       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
    138     }
    139 
    140     /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
    141     /// immediate field.  Note that the operand at this point is already the
    142     /// result of a prior SelectAddressRegImm call.
    143     bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
    144       if (N.getOpcode() == ISD::TargetConstant ||
    145           N.getOpcode() == ISD::TargetGlobalAddress) {
    146         Out = N;
    147         return true;
    148       }
    149 
    150       return false;
    151     }
    152 
    153     /// SelectAddrIdx - Given the specified addressed, check to see if it can be
    154     /// represented as an indexed [r+r] operation.  Returns false if it can
    155     /// be represented by [r+imm], which are preferred.
    156     bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
    157       return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
    158     }
    159 
    160     /// SelectAddrIdxOnly - Given the specified addressed, force it to be
    161     /// represented as an indexed [r+r] operation.
    162     bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
    163       return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
    164     }
    165 
    166     /// SelectAddrImmX4 - Returns true if the address N can be represented by
    167     /// a base register plus a signed 16-bit displacement that is a multiple of 4.
    168     /// Suitable for use by STD and friends.
    169     bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
    170       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
    171     }
    172 
    173     // Select an address into a single register.
    174     bool SelectAddr(SDValue N, SDValue &Base) {
    175       Base = N;
    176       return true;
    177     }
    178 
    179     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
    180     /// inline asm expressions.  It is always correct to compute the value into
    181     /// a register.  The case of adding a (possibly relocatable) constant to a
    182     /// register can be improved, but it is wrong to substitute Reg+Reg for
    183     /// Reg in an asm, because the load or store opcode would have to change.
    184     bool SelectInlineAsmMemoryOperand(const SDValue &Op,
    185                                       unsigned ConstraintID,
    186                                       std::vector<SDValue> &OutOps) override {
    187 
    188       switch(ConstraintID) {
    189       default:
    190         errs() << "ConstraintID: " << ConstraintID << "\n";
    191         llvm_unreachable("Unexpected asm memory constraint");
    192       case InlineAsm::Constraint_es:
    193       case InlineAsm::Constraint_i:
    194       case InlineAsm::Constraint_m:
    195       case InlineAsm::Constraint_o:
    196       case InlineAsm::Constraint_Q:
    197       case InlineAsm::Constraint_Z:
    198       case InlineAsm::Constraint_Zy:
    199         // We need to make sure that this one operand does not end up in r0
    200         // (because we might end up lowering this as 0(%op)).
    201         const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
    202         const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
    203         SDLoc dl(Op);
    204         SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
    205         SDValue NewOp =
    206           SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
    207                                          dl, Op.getValueType(),
    208                                          Op, RC), 0);
    209 
    210         OutOps.push_back(NewOp);
    211         return false;
    212       }
    213       return true;
    214     }
    215 
    216     void InsertVRSaveCode(MachineFunction &MF);
    217 
    218     const char *getPassName() const override {
    219       return "PowerPC DAG->DAG Pattern Instruction Selection";
    220     }
    221 
    222 // Include the pieces autogenerated from the target description.
    223 #include "PPCGenDAGISel.inc"
    224 
    225 private:
    226     bool trySETCC(SDNode *N);
    227 
    228     void PeepholePPC64();
    229     void PeepholePPC64ZExt();
    230     void PeepholeCROps();
    231 
    232     SDValue combineToCMPB(SDNode *N);
    233     void foldBoolExts(SDValue &Res, SDNode *&N);
    234 
    235     bool AllUsersSelectZero(SDNode *N);
    236     void SwapAllSelectUsers(SDNode *N);
    237 
    238     void transferMemOperands(SDNode *N, SDNode *Result);
    239   };
    240 }
    241 
    242 /// InsertVRSaveCode - Once the entire function has been instruction selected,
    243 /// all virtual registers are created and all machine instructions are built,
    244 /// check to see if we need to save/restore VRSAVE.  If so, do it.
    245 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
    246   // Check to see if this function uses vector registers, which means we have to
    247   // save and restore the VRSAVE register and update it with the regs we use.
    248   //
    249   // In this case, there will be virtual registers of vector type created
    250   // by the scheduler.  Detect them now.
    251   bool HasVectorVReg = false;
    252   for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
    253     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
    254     if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
    255       HasVectorVReg = true;
    256       break;
    257     }
    258   }
    259   if (!HasVectorVReg) return;  // nothing to do.
    260 
    261   // If we have a vector register, we want to emit code into the entry and exit
    262   // blocks to save and restore the VRSAVE register.  We do this here (instead
    263   // of marking all vector instructions as clobbering VRSAVE) for two reasons:
    264   //
    265   // 1. This (trivially) reduces the load on the register allocator, by not
    266   //    having to represent the live range of the VRSAVE register.
    267   // 2. This (more significantly) allows us to create a temporary virtual
    268   //    register to hold the saved VRSAVE value, allowing this temporary to be
    269   //    register allocated, instead of forcing it to be spilled to the stack.
    270 
    271   // Create two vregs - one to hold the VRSAVE register that is live-in to the
    272   // function and one for the value after having bits or'd into it.
    273   unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
    274   unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
    275 
    276   const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
    277   MachineBasicBlock &EntryBB = *Fn.begin();
    278   DebugLoc dl;
    279   // Emit the following code into the entry block:
    280   // InVRSAVE = MFVRSAVE
    281   // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
    282   // MTVRSAVE UpdatedVRSAVE
    283   MachineBasicBlock::iterator IP = EntryBB.begin();  // Insert Point
    284   BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
    285   BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
    286           UpdatedVRSAVE).addReg(InVRSAVE);
    287   BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
    288 
    289   // Find all return blocks, outputting a restore in each epilog.
    290   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
    291     if (BB->isReturnBlock()) {
    292       IP = BB->end(); --IP;
    293 
    294       // Skip over all terminator instructions, which are part of the return
    295       // sequence.
    296       MachineBasicBlock::iterator I2 = IP;
    297       while (I2 != BB->begin() && (--I2)->isTerminator())
    298         IP = I2;
    299 
    300       // Emit: MTVRSAVE InVRSave
    301       BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
    302     }
    303   }
    304 }
    305 
    306 
    307 /// getGlobalBaseReg - Output the instructions required to put the
    308 /// base address to use for accessing globals into a register.
    309 ///
    310 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
    311   if (!GlobalBaseReg) {
    312     const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
    313     // Insert the set of GlobalBaseReg into the first MBB of the function
    314     MachineBasicBlock &FirstMBB = MF->front();
    315     MachineBasicBlock::iterator MBBI = FirstMBB.begin();
    316     const Module *M = MF->getFunction()->getParent();
    317     DebugLoc dl;
    318 
    319     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
    320       if (PPCSubTarget->isTargetELF()) {
    321         GlobalBaseReg = PPC::R30;
    322         if (M->getPICLevel() == PICLevel::SmallPIC) {
    323           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
    324           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
    325           MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
    326         } else {
    327           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
    328           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
    329           unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
    330           BuildMI(FirstMBB, MBBI, dl,
    331                   TII.get(PPC::UpdateGBR), GlobalBaseReg)
    332                   .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
    333           MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
    334         }
    335       } else {
    336         GlobalBaseReg =
    337           RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
    338         BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
    339         BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
    340       }
    341     } else {
    342       GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
    343       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
    344       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
    345     }
    346   }
    347   return CurDAG->getRegister(GlobalBaseReg,
    348                              PPCLowering->getPointerTy(CurDAG->getDataLayout()))
    349       .getNode();
    350 }
    351 
    352 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
    353 /// or 64-bit immediate, and if the value can be accurately represented as a
    354 /// sign extension from a 16-bit value.  If so, this returns true and the
    355 /// immediate.
    356 static bool isIntS16Immediate(SDNode *N, short &Imm) {
    357   if (N->getOpcode() != ISD::Constant)
    358     return false;
    359 
    360   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
    361   if (N->getValueType(0) == MVT::i32)
    362     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
    363   else
    364     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
    365 }
    366 
    367 static bool isIntS16Immediate(SDValue Op, short &Imm) {
    368   return isIntS16Immediate(Op.getNode(), Imm);
    369 }
    370 
    371 
    372 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
    373 /// operand. If so Imm will receive the 32-bit value.
    374 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
    375   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
    376     Imm = cast<ConstantSDNode>(N)->getZExtValue();
    377     return true;
    378   }
    379   return false;
    380 }
    381 
    382 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
    383 /// operand.  If so Imm will receive the 64-bit value.
    384 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
    385   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
    386     Imm = cast<ConstantSDNode>(N)->getZExtValue();
    387     return true;
    388   }
    389   return false;
    390 }
    391 
    392 // isInt32Immediate - This method tests to see if a constant operand.
    393 // If so Imm will receive the 32 bit value.
    394 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
    395   return isInt32Immediate(N.getNode(), Imm);
    396 }
    397 
    398 static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
    399                               const SDValue &DestMBB) {
    400   assert(isa<BasicBlockSDNode>(DestMBB));
    401 
    402   if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
    403 
    404   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
    405   const TerminatorInst *BBTerm = BB->getTerminator();
    406 
    407   if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
    408 
    409   const BasicBlock *TBB = BBTerm->getSuccessor(0);
    410   const BasicBlock *FBB = BBTerm->getSuccessor(1);
    411 
    412   auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
    413   auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
    414 
    415   // We only want to handle cases which are easy to predict at static time, e.g.
    416   // C++ throw statement, that is very likely not taken, or calling never
    417   // returned function, e.g. stdlib exit(). So we set Threshold to filter
    418   // unwanted cases.
    419   //
    420   // Below is LLVM branch weight table, we only want to handle case 1, 2
    421   //
    422   // Case                  Taken:Nontaken  Example
    423   // 1. Unreachable        1048575:1       C++ throw, stdlib exit(),
    424   // 2. Invoke-terminating 1:1048575
    425   // 3. Coldblock          4:64            __builtin_expect
    426   // 4. Loop Branch        124:4           For loop
    427   // 5. PH/ZH/FPH          20:12
    428   const uint32_t Threshold = 10000;
    429 
    430   if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
    431     return PPC::BR_NO_HINT;
    432 
    433   DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::"
    434                << BB->getName() << "'\n"
    435                << " -> " << TBB->getName() << ": " << TProb << "\n"
    436                << " -> " << FBB->getName() << ": " << FProb << "\n");
    437 
    438   const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
    439 
    440   // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
    441   // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
    442   if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
    443     std::swap(TProb, FProb);
    444 
    445   return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
    446 }
    447 
    448 // isOpcWithIntImmediate - This method tests to see if the node is a specific
    449 // opcode and that it has a immediate integer right operand.
    450 // If so Imm will receive the 32 bit value.
    451 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
    452   return N->getOpcode() == Opc
    453          && isInt32Immediate(N->getOperand(1).getNode(), Imm);
    454 }
    455 
    456 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
    457   SDLoc dl(SN);
    458   int FI = cast<FrameIndexSDNode>(N)->getIndex();
    459   SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
    460   unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
    461   if (SN->hasOneUse())
    462     CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
    463                          getSmallIPtrImm(Offset, dl));
    464   else
    465     ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
    466                                            getSmallIPtrImm(Offset, dl)));
    467 }
    468 
    469 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
    470                                       bool isShiftMask, unsigned &SH,
    471                                       unsigned &MB, unsigned &ME) {
    472   // Don't even go down this path for i64, since different logic will be
    473   // necessary for rldicl/rldicr/rldimi.
    474   if (N->getValueType(0) != MVT::i32)
    475     return false;
    476 
    477   unsigned Shift  = 32;
    478   unsigned Indeterminant = ~0;  // bit mask marking indeterminant results
    479   unsigned Opcode = N->getOpcode();
    480   if (N->getNumOperands() != 2 ||
    481       !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
    482     return false;
    483 
    484   if (Opcode == ISD::SHL) {
    485     // apply shift left to mask if it comes first
    486     if (isShiftMask) Mask = Mask << Shift;
    487     // determine which bits are made indeterminant by shift
    488     Indeterminant = ~(0xFFFFFFFFu << Shift);
    489   } else if (Opcode == ISD::SRL) {
    490     // apply shift right to mask if it comes first
    491     if (isShiftMask) Mask = Mask >> Shift;
    492     // determine which bits are made indeterminant by shift
    493     Indeterminant = ~(0xFFFFFFFFu >> Shift);
    494     // adjust for the left rotate
    495     Shift = 32 - Shift;
    496   } else if (Opcode == ISD::ROTL) {
    497     Indeterminant = 0;
    498   } else {
    499     return false;
    500   }
    501 
    502   // if the mask doesn't intersect any Indeterminant bits
    503   if (Mask && !(Mask & Indeterminant)) {
    504     SH = Shift & 31;
    505     // make sure the mask is still a mask (wrap arounds may not be)
    506     return isRunOfOnes(Mask, MB, ME);
    507   }
    508   return false;
    509 }
    510 
    511 /// Turn an or of two masked values into the rotate left word immediate then
    512 /// mask insert (rlwimi) instruction.
    513 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
    514   SDValue Op0 = N->getOperand(0);
    515   SDValue Op1 = N->getOperand(1);
    516   SDLoc dl(N);
    517 
    518   APInt LKZ, LKO, RKZ, RKO;
    519   CurDAG->computeKnownBits(Op0, LKZ, LKO);
    520   CurDAG->computeKnownBits(Op1, RKZ, RKO);
    521 
    522   unsigned TargetMask = LKZ.getZExtValue();
    523   unsigned InsertMask = RKZ.getZExtValue();
    524 
    525   if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
    526     unsigned Op0Opc = Op0.getOpcode();
    527     unsigned Op1Opc = Op1.getOpcode();
    528     unsigned Value, SH = 0;
    529     TargetMask = ~TargetMask;
    530     InsertMask = ~InsertMask;
    531 
    532     // If the LHS has a foldable shift and the RHS does not, then swap it to the
    533     // RHS so that we can fold the shift into the insert.
    534     if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
    535       if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
    536           Op0.getOperand(0).getOpcode() == ISD::SRL) {
    537         if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
    538             Op1.getOperand(0).getOpcode() != ISD::SRL) {
    539           std::swap(Op0, Op1);
    540           std::swap(Op0Opc, Op1Opc);
    541           std::swap(TargetMask, InsertMask);
    542         }
    543       }
    544     } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
    545       if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
    546           Op1.getOperand(0).getOpcode() != ISD::SRL) {
    547         std::swap(Op0, Op1);
    548         std::swap(Op0Opc, Op1Opc);
    549         std::swap(TargetMask, InsertMask);
    550       }
    551     }
    552 
    553     unsigned MB, ME;
    554     if (isRunOfOnes(InsertMask, MB, ME)) {
    555       SDValue Tmp1, Tmp2;
    556 
    557       if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
    558           isInt32Immediate(Op1.getOperand(1), Value)) {
    559         Op1 = Op1.getOperand(0);
    560         SH  = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
    561       }
    562       if (Op1Opc == ISD::AND) {
    563        // The AND mask might not be a constant, and we need to make sure that
    564        // if we're going to fold the masking with the insert, all bits not
    565        // know to be zero in the mask are known to be one.
    566         APInt MKZ, MKO;
    567         CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO);
    568         bool CanFoldMask = InsertMask == MKO.getZExtValue();
    569 
    570         unsigned SHOpc = Op1.getOperand(0).getOpcode();
    571         if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
    572             isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
    573           // Note that Value must be in range here (less than 32) because
    574           // otherwise there would not be any bits set in InsertMask.
    575           Op1 = Op1.getOperand(0).getOperand(0);
    576           SH  = (SHOpc == ISD::SHL) ? Value : 32 - Value;
    577         }
    578       }
    579 
    580       SH &= 31;
    581       SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
    582                           getI32Imm(ME, dl) };
    583       ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
    584       return true;
    585     }
    586   }
    587   return false;
    588 }
    589 
    590 // Predict the number of instructions that would be generated by calling
    591 // getInt64(N).
    592 static unsigned getInt64CountDirect(int64_t Imm) {
    593   // Assume no remaining bits.
    594   unsigned Remainder = 0;
    595   // Assume no shift required.
    596   unsigned Shift = 0;
    597 
    598   // If it can't be represented as a 32 bit value.
    599   if (!isInt<32>(Imm)) {
    600     Shift = countTrailingZeros<uint64_t>(Imm);
    601     int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
    602 
    603     // If the shifted value fits 32 bits.
    604     if (isInt<32>(ImmSh)) {
    605       // Go with the shifted value.
    606       Imm = ImmSh;
    607     } else {
    608       // Still stuck with a 64 bit value.
    609       Remainder = Imm;
    610       Shift = 32;
    611       Imm >>= 32;
    612     }
    613   }
    614 
    615   // Intermediate operand.
    616   unsigned Result = 0;
    617 
    618   // Handle first 32 bits.
    619   unsigned Lo = Imm & 0xFFFF;
    620 
    621   // Simple value.
    622   if (isInt<16>(Imm)) {
    623     // Just the Lo bits.
    624     ++Result;
    625   } else if (Lo) {
    626     // Handle the Hi bits and Lo bits.
    627     Result += 2;
    628   } else {
    629     // Just the Hi bits.
    630     ++Result;
    631   }
    632 
    633   // If no shift, we're done.
    634   if (!Shift) return Result;
    635 
    636   // Shift for next step if the upper 32-bits were not zero.
    637   if (Imm)
    638     ++Result;
    639 
    640   // Add in the last bits as required.
    641   if ((Remainder >> 16) & 0xFFFF)
    642     ++Result;
    643   if (Remainder & 0xFFFF)
    644     ++Result;
    645 
    646   return Result;
    647 }
    648 
    649 static uint64_t Rot64(uint64_t Imm, unsigned R) {
    650   return (Imm << R) | (Imm >> (64 - R));
    651 }
    652 
    653 static unsigned getInt64Count(int64_t Imm) {
    654   unsigned Count = getInt64CountDirect(Imm);
    655   if (Count == 1)
    656     return Count;
    657 
    658   for (unsigned r = 1; r < 63; ++r) {
    659     uint64_t RImm = Rot64(Imm, r);
    660     unsigned RCount = getInt64CountDirect(RImm) + 1;
    661     Count = std::min(Count, RCount);
    662 
    663     // See comments in getInt64 for an explanation of the logic below.
    664     unsigned LS = findLastSet(RImm);
    665     if (LS != r-1)
    666       continue;
    667 
    668     uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
    669     uint64_t RImmWithOnes = RImm | OnesMask;
    670 
    671     RCount = getInt64CountDirect(RImmWithOnes) + 1;
    672     Count = std::min(Count, RCount);
    673   }
    674 
    675   return Count;
    676 }
    677 
    678 // Select a 64-bit constant. For cost-modeling purposes, getInt64Count
    679 // (above) needs to be kept in sync with this function.
    680 static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl,
    681                               int64_t Imm) {
    682   // Assume no remaining bits.
    683   unsigned Remainder = 0;
    684   // Assume no shift required.
    685   unsigned Shift = 0;
    686 
    687   // If it can't be represented as a 32 bit value.
    688   if (!isInt<32>(Imm)) {
    689     Shift = countTrailingZeros<uint64_t>(Imm);
    690     int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
    691 
    692     // If the shifted value fits 32 bits.
    693     if (isInt<32>(ImmSh)) {
    694       // Go with the shifted value.
    695       Imm = ImmSh;
    696     } else {
    697       // Still stuck with a 64 bit value.
    698       Remainder = Imm;
    699       Shift = 32;
    700       Imm >>= 32;
    701     }
    702   }
    703 
    704   // Intermediate operand.
    705   SDNode *Result;
    706 
    707   // Handle first 32 bits.
    708   unsigned Lo = Imm & 0xFFFF;
    709   unsigned Hi = (Imm >> 16) & 0xFFFF;
    710 
    711   auto getI32Imm = [CurDAG, dl](unsigned Imm) {
    712       return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
    713   };
    714 
    715   // Simple value.
    716   if (isInt<16>(Imm)) {
    717     // Just the Lo bits.
    718     Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
    719   } else if (Lo) {
    720     // Handle the Hi bits.
    721     unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
    722     Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
    723     // And Lo bits.
    724     Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
    725                                     SDValue(Result, 0), getI32Imm(Lo));
    726   } else {
    727     // Just the Hi bits.
    728     Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
    729   }
    730 
    731   // If no shift, we're done.
    732   if (!Shift) return Result;
    733 
    734   // Shift for next step if the upper 32-bits were not zero.
    735   if (Imm) {
    736     Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
    737                                     SDValue(Result, 0),
    738                                     getI32Imm(Shift),
    739                                     getI32Imm(63 - Shift));
    740   }
    741 
    742   // Add in the last bits as required.
    743   if ((Hi = (Remainder >> 16) & 0xFFFF)) {
    744     Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
    745                                     SDValue(Result, 0), getI32Imm(Hi));
    746   }
    747   if ((Lo = Remainder & 0xFFFF)) {
    748     Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
    749                                     SDValue(Result, 0), getI32Imm(Lo));
    750   }
    751 
    752   return Result;
    753 }
    754 
    755 static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) {
    756   unsigned Count = getInt64CountDirect(Imm);
    757   if (Count == 1)
    758     return getInt64Direct(CurDAG, dl, Imm);
    759 
    760   unsigned RMin = 0;
    761 
    762   int64_t MatImm;
    763   unsigned MaskEnd;
    764 
    765   for (unsigned r = 1; r < 63; ++r) {
    766     uint64_t RImm = Rot64(Imm, r);
    767     unsigned RCount = getInt64CountDirect(RImm) + 1;
    768     if (RCount < Count) {
    769       Count = RCount;
    770       RMin = r;
    771       MatImm = RImm;
    772       MaskEnd = 63;
    773     }
    774 
    775     // If the immediate to generate has many trailing zeros, it might be
    776     // worthwhile to generate a rotated value with too many leading ones
    777     // (because that's free with li/lis's sign-extension semantics), and then
    778     // mask them off after rotation.
    779 
    780     unsigned LS = findLastSet(RImm);
    781     // We're adding (63-LS) higher-order ones, and we expect to mask them off
    782     // after performing the inverse rotation by (64-r). So we need that:
    783     //   63-LS == 64-r => LS == r-1
    784     if (LS != r-1)
    785       continue;
    786 
    787     uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
    788     uint64_t RImmWithOnes = RImm | OnesMask;
    789 
    790     RCount = getInt64CountDirect(RImmWithOnes) + 1;
    791     if (RCount < Count) {
    792       Count = RCount;
    793       RMin = r;
    794       MatImm = RImmWithOnes;
    795       MaskEnd = LS;
    796     }
    797   }
    798 
    799   if (!RMin)
    800     return getInt64Direct(CurDAG, dl, Imm);
    801 
    802   auto getI32Imm = [CurDAG, dl](unsigned Imm) {
    803       return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
    804   };
    805 
    806   SDValue Val = SDValue(getInt64Direct(CurDAG, dl, MatImm), 0);
    807   return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
    808                                 getI32Imm(64 - RMin), getI32Imm(MaskEnd));
    809 }
    810 
    811 // Select a 64-bit constant.
    812 static SDNode *getInt64(SelectionDAG *CurDAG, SDNode *N) {
    813   SDLoc dl(N);
    814 
    815   // Get 64 bit value.
    816   int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
    817   return getInt64(CurDAG, dl, Imm);
    818 }
    819 
    820 namespace {
    821 class BitPermutationSelector {
    822   struct ValueBit {
    823     SDValue V;
    824 
    825     // The bit number in the value, using a convention where bit 0 is the
    826     // lowest-order bit.
    827     unsigned Idx;
    828 
    829     enum Kind {
    830       ConstZero,
    831       Variable
    832     } K;
    833 
    834     ValueBit(SDValue V, unsigned I, Kind K = Variable)
    835       : V(V), Idx(I), K(K) {}
    836     ValueBit(Kind K = Variable)
    837       : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
    838 
    839     bool isZero() const {
    840       return K == ConstZero;
    841     }
    842 
    843     bool hasValue() const {
    844       return K == Variable;
    845     }
    846 
    847     SDValue getValue() const {
    848       assert(hasValue() && "Cannot get the value of a constant bit");
    849       return V;
    850     }
    851 
    852     unsigned getValueBitIndex() const {
    853       assert(hasValue() && "Cannot get the value bit index of a constant bit");
    854       return Idx;
    855     }
    856   };
    857 
    858   // A bit group has the same underlying value and the same rotate factor.
    859   struct BitGroup {
    860     SDValue V;
    861     unsigned RLAmt;
    862     unsigned StartIdx, EndIdx;
    863 
    864     // This rotation amount assumes that the lower 32 bits of the quantity are
    865     // replicated in the high 32 bits by the rotation operator (which is done
    866     // by rlwinm and friends in 64-bit mode).
    867     bool Repl32;
    868     // Did converting to Repl32 == true change the rotation factor? If it did,
    869     // it decreased it by 32.
    870     bool Repl32CR;
    871     // Was this group coalesced after setting Repl32 to true?
    872     bool Repl32Coalesced;
    873 
    874     BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
    875       : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
    876         Repl32Coalesced(false) {
    877       DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R <<
    878                       " [" << S << ", " << E << "]\n");
    879     }
    880   };
    881 
    882   // Information on each (Value, RLAmt) pair (like the number of groups
    883   // associated with each) used to choose the lowering method.
    884   struct ValueRotInfo {
    885     SDValue V;
    886     unsigned RLAmt;
    887     unsigned NumGroups;
    888     unsigned FirstGroupStartIdx;
    889     bool Repl32;
    890 
    891     ValueRotInfo()
    892       : RLAmt(UINT32_MAX), NumGroups(0), FirstGroupStartIdx(UINT32_MAX),
    893         Repl32(false) {}
    894 
    895     // For sorting (in reverse order) by NumGroups, and then by
    896     // FirstGroupStartIdx.
    897     bool operator < (const ValueRotInfo &Other) const {
    898       // We need to sort so that the non-Repl32 come first because, when we're
    899       // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
    900       // masking operation.
    901       if (Repl32 < Other.Repl32)
    902         return true;
    903       else if (Repl32 > Other.Repl32)
    904         return false;
    905       else if (NumGroups > Other.NumGroups)
    906         return true;
    907       else if (NumGroups < Other.NumGroups)
    908         return false;
    909       else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
    910         return true;
    911       return false;
    912     }
    913   };
    914 
    915   // Return true if something interesting was deduced, return false if we're
    916   // providing only a generic representation of V (or something else likewise
    917   // uninteresting for instruction selection).
    918   bool getValueBits(SDValue V, SmallVector<ValueBit, 64> &Bits) {
    919     switch (V.getOpcode()) {
    920     default: break;
    921     case ISD::ROTL:
    922       if (isa<ConstantSDNode>(V.getOperand(1))) {
    923         unsigned RotAmt = V.getConstantOperandVal(1);
    924 
    925         SmallVector<ValueBit, 64> LHSBits(Bits.size());
    926         getValueBits(V.getOperand(0), LHSBits);
    927 
    928         for (unsigned i = 0; i < Bits.size(); ++i)
    929           Bits[i] = LHSBits[i < RotAmt ? i + (Bits.size() - RotAmt) : i - RotAmt];
    930 
    931         return true;
    932       }
    933       break;
    934     case ISD::SHL:
    935       if (isa<ConstantSDNode>(V.getOperand(1))) {
    936         unsigned ShiftAmt = V.getConstantOperandVal(1);
    937 
    938         SmallVector<ValueBit, 64> LHSBits(Bits.size());
    939         getValueBits(V.getOperand(0), LHSBits);
    940 
    941         for (unsigned i = ShiftAmt; i < Bits.size(); ++i)
    942           Bits[i] = LHSBits[i - ShiftAmt];
    943 
    944         for (unsigned i = 0; i < ShiftAmt; ++i)
    945           Bits[i] = ValueBit(ValueBit::ConstZero);
    946 
    947         return true;
    948       }
    949       break;
    950     case ISD::SRL:
    951       if (isa<ConstantSDNode>(V.getOperand(1))) {
    952         unsigned ShiftAmt = V.getConstantOperandVal(1);
    953 
    954         SmallVector<ValueBit, 64> LHSBits(Bits.size());
    955         getValueBits(V.getOperand(0), LHSBits);
    956 
    957         for (unsigned i = 0; i < Bits.size() - ShiftAmt; ++i)
    958           Bits[i] = LHSBits[i + ShiftAmt];
    959 
    960         for (unsigned i = Bits.size() - ShiftAmt; i < Bits.size(); ++i)
    961           Bits[i] = ValueBit(ValueBit::ConstZero);
    962 
    963         return true;
    964       }
    965       break;
    966     case ISD::AND:
    967       if (isa<ConstantSDNode>(V.getOperand(1))) {
    968         uint64_t Mask = V.getConstantOperandVal(1);
    969 
    970         SmallVector<ValueBit, 64> LHSBits(Bits.size());
    971         bool LHSTrivial = getValueBits(V.getOperand(0), LHSBits);
    972 
    973         for (unsigned i = 0; i < Bits.size(); ++i)
    974           if (((Mask >> i) & 1) == 1)
    975             Bits[i] = LHSBits[i];
    976           else
    977             Bits[i] = ValueBit(ValueBit::ConstZero);
    978 
    979         // Mark this as interesting, only if the LHS was also interesting. This
    980         // prevents the overall procedure from matching a single immediate 'and'
    981         // (which is non-optimal because such an and might be folded with other
    982         // things if we don't select it here).
    983         return LHSTrivial;
    984       }
    985       break;
    986     case ISD::OR: {
    987       SmallVector<ValueBit, 64> LHSBits(Bits.size()), RHSBits(Bits.size());
    988       getValueBits(V.getOperand(0), LHSBits);
    989       getValueBits(V.getOperand(1), RHSBits);
    990 
    991       bool AllDisjoint = true;
    992       for (unsigned i = 0; i < Bits.size(); ++i)
    993         if (LHSBits[i].isZero())
    994           Bits[i] = RHSBits[i];
    995         else if (RHSBits[i].isZero())
    996           Bits[i] = LHSBits[i];
    997         else {
    998           AllDisjoint = false;
    999           break;
   1000         }
   1001 
   1002       if (!AllDisjoint)
   1003         break;
   1004 
   1005       return true;
   1006     }
   1007     }
   1008 
   1009     for (unsigned i = 0; i < Bits.size(); ++i)
   1010       Bits[i] = ValueBit(V, i);
   1011 
   1012     return false;
   1013   }
   1014 
   1015   // For each value (except the constant ones), compute the left-rotate amount
   1016   // to get it from its original to final position.
   1017   void computeRotationAmounts() {
   1018     HasZeros = false;
   1019     RLAmt.resize(Bits.size());
   1020     for (unsigned i = 0; i < Bits.size(); ++i)
   1021       if (Bits[i].hasValue()) {
   1022         unsigned VBI = Bits[i].getValueBitIndex();
   1023         if (i >= VBI)
   1024           RLAmt[i] = i - VBI;
   1025         else
   1026           RLAmt[i] = Bits.size() - (VBI - i);
   1027       } else if (Bits[i].isZero()) {
   1028         HasZeros = true;
   1029         RLAmt[i] = UINT32_MAX;
   1030       } else {
   1031         llvm_unreachable("Unknown value bit type");
   1032       }
   1033   }
   1034 
   1035   // Collect groups of consecutive bits with the same underlying value and
   1036   // rotation factor. If we're doing late masking, we ignore zeros, otherwise
   1037   // they break up groups.
   1038   void collectBitGroups(bool LateMask) {
   1039     BitGroups.clear();
   1040 
   1041     unsigned LastRLAmt = RLAmt[0];
   1042     SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
   1043     unsigned LastGroupStartIdx = 0;
   1044     for (unsigned i = 1; i < Bits.size(); ++i) {
   1045       unsigned ThisRLAmt = RLAmt[i];
   1046       SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
   1047       if (LateMask && !ThisValue) {
   1048         ThisValue = LastValue;
   1049         ThisRLAmt = LastRLAmt;
   1050         // If we're doing late masking, then the first bit group always starts
   1051         // at zero (even if the first bits were zero).
   1052         if (BitGroups.empty())
   1053           LastGroupStartIdx = 0;
   1054       }
   1055 
   1056       // If this bit has the same underlying value and the same rotate factor as
   1057       // the last one, then they're part of the same group.
   1058       if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
   1059         continue;
   1060 
   1061       if (LastValue.getNode())
   1062         BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
   1063                                      i-1));
   1064       LastRLAmt = ThisRLAmt;
   1065       LastValue = ThisValue;
   1066       LastGroupStartIdx = i;
   1067     }
   1068     if (LastValue.getNode())
   1069       BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
   1070                                    Bits.size()-1));
   1071 
   1072     if (BitGroups.empty())
   1073       return;
   1074 
   1075     // We might be able to combine the first and last groups.
   1076     if (BitGroups.size() > 1) {
   1077       // If the first and last groups are the same, then remove the first group
   1078       // in favor of the last group, making the ending index of the last group
   1079       // equal to the ending index of the to-be-removed first group.
   1080       if (BitGroups[0].StartIdx == 0 &&
   1081           BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
   1082           BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
   1083           BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
   1084         DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
   1085         BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
   1086         BitGroups.erase(BitGroups.begin());
   1087       }
   1088     }
   1089   }
   1090 
   1091   // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
   1092   // associated with each. If there is a degeneracy, pick the one that occurs
   1093   // first (in the final value).
   1094   void collectValueRotInfo() {
   1095     ValueRots.clear();
   1096 
   1097     for (auto &BG : BitGroups) {
   1098       unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
   1099       ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
   1100       VRI.V = BG.V;
   1101       VRI.RLAmt = BG.RLAmt;
   1102       VRI.Repl32 = BG.Repl32;
   1103       VRI.NumGroups += 1;
   1104       VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
   1105     }
   1106 
   1107     // Now that we've collected the various ValueRotInfo instances, we need to
   1108     // sort them.
   1109     ValueRotsVec.clear();
   1110     for (auto &I : ValueRots) {
   1111       ValueRotsVec.push_back(I.second);
   1112     }
   1113     std::sort(ValueRotsVec.begin(), ValueRotsVec.end());
   1114   }
   1115 
   1116   // In 64-bit mode, rlwinm and friends have a rotation operator that
   1117   // replicates the low-order 32 bits into the high-order 32-bits. The mask
   1118   // indices of these instructions can only be in the lower 32 bits, so they
   1119   // can only represent some 64-bit bit groups. However, when they can be used,
   1120   // the 32-bit replication can be used to represent, as a single bit group,
   1121   // otherwise separate bit groups. We'll convert to replicated-32-bit bit
   1122   // groups when possible. Returns true if any of the bit groups were
   1123   // converted.
   1124   void assignRepl32BitGroups() {
   1125     // If we have bits like this:
   1126     //
   1127     // Indices:    15 14 13 12 11 10 9 8  7  6  5  4  3  2  1  0
   1128     // V bits: ... 7  6  5  4  3  2  1 0 31 30 29 28 27 26 25 24
   1129     // Groups:    |      RLAmt = 8      |      RLAmt = 40       |
   1130     //
   1131     // But, making use of a 32-bit operation that replicates the low-order 32
   1132     // bits into the high-order 32 bits, this can be one bit group with a RLAmt
   1133     // of 8.
   1134 
   1135     auto IsAllLow32 = [this](BitGroup & BG) {
   1136       if (BG.StartIdx <= BG.EndIdx) {
   1137         for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
   1138           if (!Bits[i].hasValue())
   1139             continue;
   1140           if (Bits[i].getValueBitIndex() >= 32)
   1141             return false;
   1142         }
   1143       } else {
   1144         for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
   1145           if (!Bits[i].hasValue())
   1146             continue;
   1147           if (Bits[i].getValueBitIndex() >= 32)
   1148             return false;
   1149         }
   1150         for (unsigned i = 0; i <= BG.EndIdx; ++i) {
   1151           if (!Bits[i].hasValue())
   1152             continue;
   1153           if (Bits[i].getValueBitIndex() >= 32)
   1154             return false;
   1155         }
   1156       }
   1157 
   1158       return true;
   1159     };
   1160 
   1161     for (auto &BG : BitGroups) {
   1162       if (BG.StartIdx < 32 && BG.EndIdx < 32) {
   1163         if (IsAllLow32(BG)) {
   1164           if (BG.RLAmt >= 32) {
   1165             BG.RLAmt -= 32;
   1166             BG.Repl32CR = true;
   1167           }
   1168 
   1169           BG.Repl32 = true;
   1170 
   1171           DEBUG(dbgs() << "\t32-bit replicated bit group for " <<
   1172                           BG.V.getNode() << " RLAmt = " << BG.RLAmt <<
   1173                           " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n");
   1174         }
   1175       }
   1176     }
   1177 
   1178     // Now walk through the bit groups, consolidating where possible.
   1179     for (auto I = BitGroups.begin(); I != BitGroups.end();) {
   1180       // We might want to remove this bit group by merging it with the previous
   1181       // group (which might be the ending group).
   1182       auto IP = (I == BitGroups.begin()) ?
   1183                 std::prev(BitGroups.end()) : std::prev(I);
   1184       if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
   1185           I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
   1186 
   1187         DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " <<
   1188                         I->V.getNode() << " RLAmt = " << I->RLAmt <<
   1189                         " [" << I->StartIdx << ", " << I->EndIdx <<
   1190                         "] with group with range [" <<
   1191                         IP->StartIdx << ", " << IP->EndIdx << "]\n");
   1192 
   1193         IP->EndIdx = I->EndIdx;
   1194         IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
   1195         IP->Repl32Coalesced = true;
   1196         I = BitGroups.erase(I);
   1197         continue;
   1198       } else {
   1199         // There is a special case worth handling: If there is a single group
   1200         // covering the entire upper 32 bits, and it can be merged with both
   1201         // the next and previous groups (which might be the same group), then
   1202         // do so. If it is the same group (so there will be only one group in
   1203         // total), then we need to reverse the order of the range so that it
   1204         // covers the entire 64 bits.
   1205         if (I->StartIdx == 32 && I->EndIdx == 63) {
   1206           assert(std::next(I) == BitGroups.end() &&
   1207                  "bit group ends at index 63 but there is another?");
   1208           auto IN = BitGroups.begin();
   1209 
   1210           if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
   1211               (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
   1212               IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
   1213               IsAllLow32(*I)) {
   1214 
   1215             DEBUG(dbgs() << "\tcombining bit group for " <<
   1216                             I->V.getNode() << " RLAmt = " << I->RLAmt <<
   1217                             " [" << I->StartIdx << ", " << I->EndIdx <<
   1218                             "] with 32-bit replicated groups with ranges [" <<
   1219                             IP->StartIdx << ", " << IP->EndIdx << "] and [" <<
   1220                             IN->StartIdx << ", " << IN->EndIdx << "]\n");
   1221 
   1222             if (IP == IN) {
   1223               // There is only one other group; change it to cover the whole
   1224               // range (backward, so that it can still be Repl32 but cover the
   1225               // whole 64-bit range).
   1226               IP->StartIdx = 31;
   1227               IP->EndIdx = 30;
   1228               IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
   1229               IP->Repl32Coalesced = true;
   1230               I = BitGroups.erase(I);
   1231             } else {
   1232               // There are two separate groups, one before this group and one
   1233               // after us (at the beginning). We're going to remove this group,
   1234               // but also the group at the very beginning.
   1235               IP->EndIdx = IN->EndIdx;
   1236               IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
   1237               IP->Repl32Coalesced = true;
   1238               I = BitGroups.erase(I);
   1239               BitGroups.erase(BitGroups.begin());
   1240             }
   1241 
   1242             // This must be the last group in the vector (and we might have
   1243             // just invalidated the iterator above), so break here.
   1244             break;
   1245           }
   1246         }
   1247       }
   1248 
   1249       ++I;
   1250     }
   1251   }
   1252 
   1253   SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
   1254     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
   1255   }
   1256 
   1257   uint64_t getZerosMask() {
   1258     uint64_t Mask = 0;
   1259     for (unsigned i = 0; i < Bits.size(); ++i) {
   1260       if (Bits[i].hasValue())
   1261         continue;
   1262       Mask |= (UINT64_C(1) << i);
   1263     }
   1264 
   1265     return ~Mask;
   1266   }
   1267 
   1268   // Depending on the number of groups for a particular value, it might be
   1269   // better to rotate, mask explicitly (using andi/andis), and then or the
   1270   // result. Select this part of the result first.
   1271   void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
   1272     if (BPermRewriterNoMasking)
   1273       return;
   1274 
   1275     for (ValueRotInfo &VRI : ValueRotsVec) {
   1276       unsigned Mask = 0;
   1277       for (unsigned i = 0; i < Bits.size(); ++i) {
   1278         if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
   1279           continue;
   1280         if (RLAmt[i] != VRI.RLAmt)
   1281           continue;
   1282         Mask |= (1u << i);
   1283       }
   1284 
   1285       // Compute the masks for andi/andis that would be necessary.
   1286       unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
   1287       assert((ANDIMask != 0 || ANDISMask != 0) &&
   1288              "No set bits in mask for value bit groups");
   1289       bool NeedsRotate = VRI.RLAmt != 0;
   1290 
   1291       // We're trying to minimize the number of instructions. If we have one
   1292       // group, using one of andi/andis can break even.  If we have three
   1293       // groups, we can use both andi and andis and break even (to use both
   1294       // andi and andis we also need to or the results together). We need four
   1295       // groups if we also need to rotate. To use andi/andis we need to do more
   1296       // than break even because rotate-and-mask instructions tend to be easier
   1297       // to schedule.
   1298 
   1299       // FIXME: We've biased here against using andi/andis, which is right for
   1300       // POWER cores, but not optimal everywhere. For example, on the A2,
   1301       // andi/andis have single-cycle latency whereas the rotate-and-mask
   1302       // instructions take two cycles, and it would be better to bias toward
   1303       // andi/andis in break-even cases.
   1304 
   1305       unsigned NumAndInsts = (unsigned) NeedsRotate +
   1306                              (unsigned) (ANDIMask != 0) +
   1307                              (unsigned) (ANDISMask != 0) +
   1308                              (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
   1309                              (unsigned) (bool) Res;
   1310 
   1311       DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
   1312                       " RL: " << VRI.RLAmt << ":" <<
   1313                       "\n\t\t\tisel using masking: " << NumAndInsts <<
   1314                       " using rotates: " << VRI.NumGroups << "\n");
   1315 
   1316       if (NumAndInsts >= VRI.NumGroups)
   1317         continue;
   1318 
   1319       DEBUG(dbgs() << "\t\t\t\tusing masking\n");
   1320 
   1321       if (InstCnt) *InstCnt += NumAndInsts;
   1322 
   1323       SDValue VRot;
   1324       if (VRI.RLAmt) {
   1325         SDValue Ops[] =
   1326           { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
   1327             getI32Imm(31, dl) };
   1328         VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
   1329                                               Ops), 0);
   1330       } else {
   1331         VRot = VRI.V;
   1332       }
   1333 
   1334       SDValue ANDIVal, ANDISVal;
   1335       if (ANDIMask != 0)
   1336         ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
   1337                             VRot, getI32Imm(ANDIMask, dl)), 0);
   1338       if (ANDISMask != 0)
   1339         ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
   1340                              VRot, getI32Imm(ANDISMask, dl)), 0);
   1341 
   1342       SDValue TotalVal;
   1343       if (!ANDIVal)
   1344         TotalVal = ANDISVal;
   1345       else if (!ANDISVal)
   1346         TotalVal = ANDIVal;
   1347       else
   1348         TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
   1349                              ANDIVal, ANDISVal), 0);
   1350 
   1351       if (!Res)
   1352         Res = TotalVal;
   1353       else
   1354         Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
   1355                         Res, TotalVal), 0);
   1356 
   1357       // Now, remove all groups with this underlying value and rotation
   1358       // factor.
   1359       eraseMatchingBitGroups([VRI](const BitGroup &BG) {
   1360         return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
   1361       });
   1362     }
   1363   }
   1364 
   1365   // Instruction selection for the 32-bit case.
   1366   SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
   1367     SDLoc dl(N);
   1368     SDValue Res;
   1369 
   1370     if (InstCnt) *InstCnt = 0;
   1371 
   1372     // Take care of cases that should use andi/andis first.
   1373     SelectAndParts32(dl, Res, InstCnt);
   1374 
   1375     // If we've not yet selected a 'starting' instruction, and we have no zeros
   1376     // to fill in, select the (Value, RLAmt) with the highest priority (largest
   1377     // number of groups), and start with this rotated value.
   1378     if ((!HasZeros || LateMask) && !Res) {
   1379       ValueRotInfo &VRI = ValueRotsVec[0];
   1380       if (VRI.RLAmt) {
   1381         if (InstCnt) *InstCnt += 1;
   1382         SDValue Ops[] =
   1383           { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
   1384             getI32Imm(31, dl) };
   1385         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
   1386                       0);
   1387       } else {
   1388         Res = VRI.V;
   1389       }
   1390 
   1391       // Now, remove all groups with this underlying value and rotation factor.
   1392       eraseMatchingBitGroups([VRI](const BitGroup &BG) {
   1393         return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
   1394       });
   1395     }
   1396 
   1397     if (InstCnt) *InstCnt += BitGroups.size();
   1398 
   1399     // Insert the other groups (one at a time).
   1400     for (auto &BG : BitGroups) {
   1401       if (!Res) {
   1402         SDValue Ops[] =
   1403           { BG.V, getI32Imm(BG.RLAmt, dl),
   1404             getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
   1405             getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
   1406         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
   1407       } else {
   1408         SDValue Ops[] =
   1409           { Res, BG.V, getI32Imm(BG.RLAmt, dl),
   1410               getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
   1411             getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
   1412         Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
   1413       }
   1414     }
   1415 
   1416     if (LateMask) {
   1417       unsigned Mask = (unsigned) getZerosMask();
   1418 
   1419       unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
   1420       assert((ANDIMask != 0 || ANDISMask != 0) &&
   1421              "No set bits in zeros mask?");
   1422 
   1423       if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
   1424                                (unsigned) (ANDISMask != 0) +
   1425                                (unsigned) (ANDIMask != 0 && ANDISMask != 0);
   1426 
   1427       SDValue ANDIVal, ANDISVal;
   1428       if (ANDIMask != 0)
   1429         ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
   1430                             Res, getI32Imm(ANDIMask, dl)), 0);
   1431       if (ANDISMask != 0)
   1432         ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
   1433                              Res, getI32Imm(ANDISMask, dl)), 0);
   1434 
   1435       if (!ANDIVal)
   1436         Res = ANDISVal;
   1437       else if (!ANDISVal)
   1438         Res = ANDIVal;
   1439       else
   1440         Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
   1441                         ANDIVal, ANDISVal), 0);
   1442     }
   1443 
   1444     return Res.getNode();
   1445   }
   1446 
   1447   unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
   1448                                 unsigned MaskStart, unsigned MaskEnd,
   1449                                 bool IsIns) {
   1450     // In the notation used by the instructions, 'start' and 'end' are reversed
   1451     // because bits are counted from high to low order.
   1452     unsigned InstMaskStart = 64 - MaskEnd - 1,
   1453              InstMaskEnd   = 64 - MaskStart - 1;
   1454 
   1455     if (Repl32)
   1456       return 1;
   1457 
   1458     if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
   1459         InstMaskEnd == 63 - RLAmt)
   1460       return 1;
   1461 
   1462     return 2;
   1463   }
   1464 
   1465   // For 64-bit values, not all combinations of rotates and masks are
   1466   // available. Produce one if it is available.
   1467   SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
   1468                           bool Repl32, unsigned MaskStart, unsigned MaskEnd,
   1469                           unsigned *InstCnt = nullptr) {
   1470     // In the notation used by the instructions, 'start' and 'end' are reversed
   1471     // because bits are counted from high to low order.
   1472     unsigned InstMaskStart = 64 - MaskEnd - 1,
   1473              InstMaskEnd   = 64 - MaskStart - 1;
   1474 
   1475     if (InstCnt) *InstCnt += 1;
   1476 
   1477     if (Repl32) {
   1478       // This rotation amount assumes that the lower 32 bits of the quantity
   1479       // are replicated in the high 32 bits by the rotation operator (which is
   1480       // done by rlwinm and friends).
   1481       assert(InstMaskStart >= 32 && "Mask cannot start out of range");
   1482       assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
   1483       SDValue Ops[] =
   1484         { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
   1485           getI32Imm(InstMaskEnd - 32, dl) };
   1486       return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
   1487                                             Ops), 0);
   1488     }
   1489 
   1490     if (InstMaskEnd == 63) {
   1491       SDValue Ops[] =
   1492         { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
   1493       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
   1494     }
   1495 
   1496     if (InstMaskStart == 0) {
   1497       SDValue Ops[] =
   1498         { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) };
   1499       return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
   1500     }
   1501 
   1502     if (InstMaskEnd == 63 - RLAmt) {
   1503       SDValue Ops[] =
   1504         { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
   1505       return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
   1506     }
   1507 
   1508     // We cannot do this with a single instruction, so we'll use two. The
   1509     // problem is that we're not free to choose both a rotation amount and mask
   1510     // start and end independently. We can choose an arbitrary mask start and
   1511     // end, but then the rotation amount is fixed. Rotation, however, can be
   1512     // inverted, and so by applying an "inverse" rotation first, we can get the
   1513     // desired result.
   1514     if (InstCnt) *InstCnt += 1;
   1515 
   1516     // The rotation mask for the second instruction must be MaskStart.
   1517     unsigned RLAmt2 = MaskStart;
   1518     // The first instruction must rotate V so that the overall rotation amount
   1519     // is RLAmt.
   1520     unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
   1521     if (RLAmt1)
   1522       V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
   1523     return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
   1524   }
   1525 
   1526   // For 64-bit values, not all combinations of rotates and masks are
   1527   // available. Produce a rotate-mask-and-insert if one is available.
   1528   SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
   1529                              unsigned RLAmt, bool Repl32, unsigned MaskStart,
   1530                              unsigned MaskEnd, unsigned *InstCnt = nullptr) {
   1531     // In the notation used by the instructions, 'start' and 'end' are reversed
   1532     // because bits are counted from high to low order.
   1533     unsigned InstMaskStart = 64 - MaskEnd - 1,
   1534              InstMaskEnd   = 64 - MaskStart - 1;
   1535 
   1536     if (InstCnt) *InstCnt += 1;
   1537 
   1538     if (Repl32) {
   1539       // This rotation amount assumes that the lower 32 bits of the quantity
   1540       // are replicated in the high 32 bits by the rotation operator (which is
   1541       // done by rlwinm and friends).
   1542       assert(InstMaskStart >= 32 && "Mask cannot start out of range");
   1543       assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
   1544       SDValue Ops[] =
   1545         { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
   1546           getI32Imm(InstMaskEnd - 32, dl) };
   1547       return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
   1548                                             Ops), 0);
   1549     }
   1550 
   1551     if (InstMaskEnd == 63 - RLAmt) {
   1552       SDValue Ops[] =
   1553         { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
   1554       return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
   1555     }
   1556 
   1557     // We cannot do this with a single instruction, so we'll use two. The
   1558     // problem is that we're not free to choose both a rotation amount and mask
   1559     // start and end independently. We can choose an arbitrary mask start and
   1560     // end, but then the rotation amount is fixed. Rotation, however, can be
   1561     // inverted, and so by applying an "inverse" rotation first, we can get the
   1562     // desired result.
   1563     if (InstCnt) *InstCnt += 1;
   1564 
   1565     // The rotation mask for the second instruction must be MaskStart.
   1566     unsigned RLAmt2 = MaskStart;
   1567     // The first instruction must rotate V so that the overall rotation amount
   1568     // is RLAmt.
   1569     unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
   1570     if (RLAmt1)
   1571       V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
   1572     return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
   1573   }
   1574 
   1575   void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
   1576     if (BPermRewriterNoMasking)
   1577       return;
   1578 
   1579     // The idea here is the same as in the 32-bit version, but with additional
   1580     // complications from the fact that Repl32 might be true. Because we
   1581     // aggressively convert bit groups to Repl32 form (which, for small
   1582     // rotation factors, involves no other change), and then coalesce, it might
   1583     // be the case that a single 64-bit masking operation could handle both
   1584     // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
   1585     // form allowed coalescing, then we must use a 32-bit rotaton in order to
   1586     // completely capture the new combined bit group.
   1587 
   1588     for (ValueRotInfo &VRI : ValueRotsVec) {
   1589       uint64_t Mask = 0;
   1590 
   1591       // We need to add to the mask all bits from the associated bit groups.
   1592       // If Repl32 is false, we need to add bits from bit groups that have
   1593       // Repl32 true, but are trivially convertable to Repl32 false. Such a
   1594       // group is trivially convertable if it overlaps only with the lower 32
   1595       // bits, and the group has not been coalesced.
   1596       auto MatchingBG = [VRI](const BitGroup &BG) {
   1597         if (VRI.V != BG.V)
   1598           return false;
   1599 
   1600         unsigned EffRLAmt = BG.RLAmt;
   1601         if (!VRI.Repl32 && BG.Repl32) {
   1602           if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
   1603               !BG.Repl32Coalesced) {
   1604             if (BG.Repl32CR)
   1605               EffRLAmt += 32;
   1606           } else {
   1607             return false;
   1608           }
   1609         } else if (VRI.Repl32 != BG.Repl32) {
   1610           return false;
   1611         }
   1612 
   1613         return VRI.RLAmt == EffRLAmt;
   1614       };
   1615 
   1616       for (auto &BG : BitGroups) {
   1617         if (!MatchingBG(BG))
   1618           continue;
   1619 
   1620         if (BG.StartIdx <= BG.EndIdx) {
   1621           for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
   1622             Mask |= (UINT64_C(1) << i);
   1623         } else {
   1624           for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
   1625             Mask |= (UINT64_C(1) << i);
   1626           for (unsigned i = 0; i <= BG.EndIdx; ++i)
   1627             Mask |= (UINT64_C(1) << i);
   1628         }
   1629       }
   1630 
   1631       // We can use the 32-bit andi/andis technique if the mask does not
   1632       // require any higher-order bits. This can save an instruction compared
   1633       // to always using the general 64-bit technique.
   1634       bool Use32BitInsts = isUInt<32>(Mask);
   1635       // Compute the masks for andi/andis that would be necessary.
   1636       unsigned ANDIMask = (Mask & UINT16_MAX),
   1637                ANDISMask = (Mask >> 16) & UINT16_MAX;
   1638 
   1639       bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
   1640 
   1641       unsigned NumAndInsts = (unsigned) NeedsRotate +
   1642                              (unsigned) (bool) Res;
   1643       if (Use32BitInsts)
   1644         NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
   1645                        (unsigned) (ANDIMask != 0 && ANDISMask != 0);
   1646       else
   1647         NumAndInsts += getInt64Count(Mask) + /* and */ 1;
   1648 
   1649       unsigned NumRLInsts = 0;
   1650       bool FirstBG = true;
   1651       for (auto &BG : BitGroups) {
   1652         if (!MatchingBG(BG))
   1653           continue;
   1654         NumRLInsts +=
   1655           SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
   1656                                !FirstBG);
   1657         FirstBG = false;
   1658       }
   1659 
   1660       DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
   1661                       " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") <<
   1662                       "\n\t\t\tisel using masking: " << NumAndInsts <<
   1663                       " using rotates: " << NumRLInsts << "\n");
   1664 
   1665       // When we'd use andi/andis, we bias toward using the rotates (andi only
   1666       // has a record form, and is cracked on POWER cores). However, when using
   1667       // general 64-bit constant formation, bias toward the constant form,
   1668       // because that exposes more opportunities for CSE.
   1669       if (NumAndInsts > NumRLInsts)
   1670         continue;
   1671       if (Use32BitInsts && NumAndInsts == NumRLInsts)
   1672         continue;
   1673 
   1674       DEBUG(dbgs() << "\t\t\t\tusing masking\n");
   1675 
   1676       if (InstCnt) *InstCnt += NumAndInsts;
   1677 
   1678       SDValue VRot;
   1679       // We actually need to generate a rotation if we have a non-zero rotation
   1680       // factor or, in the Repl32 case, if we care about any of the
   1681       // higher-order replicated bits. In the latter case, we generate a mask
   1682       // backward so that it actually includes the entire 64 bits.
   1683       if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
   1684         VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
   1685                                VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
   1686       else
   1687         VRot = VRI.V;
   1688 
   1689       SDValue TotalVal;
   1690       if (Use32BitInsts) {
   1691         assert((ANDIMask != 0 || ANDISMask != 0) &&
   1692                "No set bits in mask when using 32-bit ands for 64-bit value");
   1693 
   1694         SDValue ANDIVal, ANDISVal;
   1695         if (ANDIMask != 0)
   1696           ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
   1697                               VRot, getI32Imm(ANDIMask, dl)), 0);
   1698         if (ANDISMask != 0)
   1699           ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
   1700                                VRot, getI32Imm(ANDISMask, dl)), 0);
   1701 
   1702         if (!ANDIVal)
   1703           TotalVal = ANDISVal;
   1704         else if (!ANDISVal)
   1705           TotalVal = ANDIVal;
   1706         else
   1707           TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
   1708                                ANDIVal, ANDISVal), 0);
   1709       } else {
   1710         TotalVal = SDValue(getInt64(CurDAG, dl, Mask), 0);
   1711         TotalVal =
   1712           SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
   1713                                          VRot, TotalVal), 0);
   1714      }
   1715 
   1716       if (!Res)
   1717         Res = TotalVal;
   1718       else
   1719         Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
   1720                                              Res, TotalVal), 0);
   1721 
   1722       // Now, remove all groups with this underlying value and rotation
   1723       // factor.
   1724       eraseMatchingBitGroups(MatchingBG);
   1725     }
   1726   }
   1727 
   1728   // Instruction selection for the 64-bit case.
   1729   SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
   1730     SDLoc dl(N);
   1731     SDValue Res;
   1732 
   1733     if (InstCnt) *InstCnt = 0;
   1734 
   1735     // Take care of cases that should use andi/andis first.
   1736     SelectAndParts64(dl, Res, InstCnt);
   1737 
   1738     // If we've not yet selected a 'starting' instruction, and we have no zeros
   1739     // to fill in, select the (Value, RLAmt) with the highest priority (largest
   1740     // number of groups), and start with this rotated value.
   1741     if ((!HasZeros || LateMask) && !Res) {
   1742       // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
   1743       // groups will come first, and so the VRI representing the largest number
   1744       // of groups might not be first (it might be the first Repl32 groups).
   1745       unsigned MaxGroupsIdx = 0;
   1746       if (!ValueRotsVec[0].Repl32) {
   1747         for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
   1748           if (ValueRotsVec[i].Repl32) {
   1749             if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
   1750               MaxGroupsIdx = i;
   1751             break;
   1752           }
   1753       }
   1754 
   1755       ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
   1756       bool NeedsRotate = false;
   1757       if (VRI.RLAmt) {
   1758         NeedsRotate = true;
   1759       } else if (VRI.Repl32) {
   1760         for (auto &BG : BitGroups) {
   1761           if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
   1762               BG.Repl32 != VRI.Repl32)
   1763             continue;
   1764 
   1765           // We don't need a rotate if the bit group is confined to the lower
   1766           // 32 bits.
   1767           if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
   1768             continue;
   1769 
   1770           NeedsRotate = true;
   1771           break;
   1772         }
   1773       }
   1774 
   1775       if (NeedsRotate)
   1776         Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
   1777                               VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
   1778                               InstCnt);
   1779       else
   1780         Res = VRI.V;
   1781 
   1782       // Now, remove all groups with this underlying value and rotation factor.
   1783       if (Res)
   1784         eraseMatchingBitGroups([VRI](const BitGroup &BG) {
   1785           return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
   1786                  BG.Repl32 == VRI.Repl32;
   1787         });
   1788     }
   1789 
   1790     // Because 64-bit rotates are more flexible than inserts, we might have a
   1791     // preference regarding which one we do first (to save one instruction).
   1792     if (!Res)
   1793       for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
   1794         if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
   1795                                 false) <
   1796             SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
   1797                                 true)) {
   1798           if (I != BitGroups.begin()) {
   1799             BitGroup BG = *I;
   1800             BitGroups.erase(I);
   1801             BitGroups.insert(BitGroups.begin(), BG);
   1802           }
   1803 
   1804           break;
   1805         }
   1806       }
   1807 
   1808     // Insert the other groups (one at a time).
   1809     for (auto &BG : BitGroups) {
   1810       if (!Res)
   1811         Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
   1812                               BG.EndIdx, InstCnt);
   1813       else
   1814         Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
   1815                                  BG.StartIdx, BG.EndIdx, InstCnt);
   1816     }
   1817 
   1818     if (LateMask) {
   1819       uint64_t Mask = getZerosMask();
   1820 
   1821       // We can use the 32-bit andi/andis technique if the mask does not
   1822       // require any higher-order bits. This can save an instruction compared
   1823       // to always using the general 64-bit technique.
   1824       bool Use32BitInsts = isUInt<32>(Mask);
   1825       // Compute the masks for andi/andis that would be necessary.
   1826       unsigned ANDIMask = (Mask & UINT16_MAX),
   1827                ANDISMask = (Mask >> 16) & UINT16_MAX;
   1828 
   1829       if (Use32BitInsts) {
   1830         assert((ANDIMask != 0 || ANDISMask != 0) &&
   1831                "No set bits in mask when using 32-bit ands for 64-bit value");
   1832 
   1833         if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
   1834                                  (unsigned) (ANDISMask != 0) +
   1835                                  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
   1836 
   1837         SDValue ANDIVal, ANDISVal;
   1838         if (ANDIMask != 0)
   1839           ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
   1840                               Res, getI32Imm(ANDIMask, dl)), 0);
   1841         if (ANDISMask != 0)
   1842           ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
   1843                                Res, getI32Imm(ANDISMask, dl)), 0);
   1844 
   1845         if (!ANDIVal)
   1846           Res = ANDISVal;
   1847         else if (!ANDISVal)
   1848           Res = ANDIVal;
   1849         else
   1850           Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
   1851                           ANDIVal, ANDISVal), 0);
   1852       } else {
   1853         if (InstCnt) *InstCnt += getInt64Count(Mask) + /* and */ 1;
   1854 
   1855         SDValue MaskVal = SDValue(getInt64(CurDAG, dl, Mask), 0);
   1856         Res =
   1857           SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
   1858                                          Res, MaskVal), 0);
   1859       }
   1860     }
   1861 
   1862     return Res.getNode();
   1863   }
   1864 
   1865   SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
   1866     // Fill in BitGroups.
   1867     collectBitGroups(LateMask);
   1868     if (BitGroups.empty())
   1869       return nullptr;
   1870 
   1871     // For 64-bit values, figure out when we can use 32-bit instructions.
   1872     if (Bits.size() == 64)
   1873       assignRepl32BitGroups();
   1874 
   1875     // Fill in ValueRotsVec.
   1876     collectValueRotInfo();
   1877 
   1878     if (Bits.size() == 32) {
   1879       return Select32(N, LateMask, InstCnt);
   1880     } else {
   1881       assert(Bits.size() == 64 && "Not 64 bits here?");
   1882       return Select64(N, LateMask, InstCnt);
   1883     }
   1884 
   1885     return nullptr;
   1886   }
   1887 
   1888   void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
   1889     BitGroups.erase(std::remove_if(BitGroups.begin(), BitGroups.end(), F),
   1890                     BitGroups.end());
   1891   }
   1892 
   1893   SmallVector<ValueBit, 64> Bits;
   1894 
   1895   bool HasZeros;
   1896   SmallVector<unsigned, 64> RLAmt;
   1897 
   1898   SmallVector<BitGroup, 16> BitGroups;
   1899 
   1900   DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
   1901   SmallVector<ValueRotInfo, 16> ValueRotsVec;
   1902 
   1903   SelectionDAG *CurDAG;
   1904 
   1905 public:
   1906   BitPermutationSelector(SelectionDAG *DAG)
   1907     : CurDAG(DAG) {}
   1908 
   1909   // Here we try to match complex bit permutations into a set of
   1910   // rotate-and-shift/shift/and/or instructions, using a set of heuristics
   1911   // known to produce optimial code for common cases (like i32 byte swapping).
   1912   SDNode *Select(SDNode *N) {
   1913     Bits.resize(N->getValueType(0).getSizeInBits());
   1914     if (!getValueBits(SDValue(N, 0), Bits))
   1915       return nullptr;
   1916 
   1917     DEBUG(dbgs() << "Considering bit-permutation-based instruction"
   1918                     " selection for:    ");
   1919     DEBUG(N->dump(CurDAG));
   1920 
   1921     // Fill it RLAmt and set HasZeros.
   1922     computeRotationAmounts();
   1923 
   1924     if (!HasZeros)
   1925       return Select(N, false);
   1926 
   1927     // We currently have two techniques for handling results with zeros: early
   1928     // masking (the default) and late masking. Late masking is sometimes more
   1929     // efficient, but because the structure of the bit groups is different, it
   1930     // is hard to tell without generating both and comparing the results. With
   1931     // late masking, we ignore zeros in the resulting value when inserting each
   1932     // set of bit groups, and then mask in the zeros at the end. With early
   1933     // masking, we only insert the non-zero parts of the result at every step.
   1934 
   1935     unsigned InstCnt, InstCntLateMask;
   1936     DEBUG(dbgs() << "\tEarly masking:\n");
   1937     SDNode *RN = Select(N, false, &InstCnt);
   1938     DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
   1939 
   1940     DEBUG(dbgs() << "\tLate masking:\n");
   1941     SDNode *RNLM = Select(N, true, &InstCntLateMask);
   1942     DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask <<
   1943                     " instructions\n");
   1944 
   1945     if (InstCnt <= InstCntLateMask) {
   1946       DEBUG(dbgs() << "\tUsing early-masking for isel\n");
   1947       return RN;
   1948     }
   1949 
   1950     DEBUG(dbgs() << "\tUsing late-masking for isel\n");
   1951     return RNLM;
   1952   }
   1953 };
   1954 } // anonymous namespace
   1955 
   1956 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
   1957   if (N->getValueType(0) != MVT::i32 &&
   1958       N->getValueType(0) != MVT::i64)
   1959     return false;
   1960 
   1961   if (!UseBitPermRewriter)
   1962     return false;
   1963 
   1964   switch (N->getOpcode()) {
   1965   default: break;
   1966   case ISD::ROTL:
   1967   case ISD::SHL:
   1968   case ISD::SRL:
   1969   case ISD::AND:
   1970   case ISD::OR: {
   1971     BitPermutationSelector BPS(CurDAG);
   1972     if (SDNode *New = BPS.Select(N)) {
   1973       ReplaceNode(N, New);
   1974       return true;
   1975     }
   1976     return false;
   1977   }
   1978   }
   1979 
   1980   return false;
   1981 }
   1982 
   1983 /// SelectCC - Select a comparison of the specified values with the specified
   1984 /// condition code, returning the CR# of the expression.
   1985 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   1986                                   const SDLoc &dl) {
   1987   // Always select the LHS.
   1988   unsigned Opc;
   1989 
   1990   if (LHS.getValueType() == MVT::i32) {
   1991     unsigned Imm;
   1992     if (CC == ISD::SETEQ || CC == ISD::SETNE) {
   1993       if (isInt32Immediate(RHS, Imm)) {
   1994         // SETEQ/SETNE comparison with 16-bit immediate, fold it.
   1995         if (isUInt<16>(Imm))
   1996           return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
   1997                                                 getI32Imm(Imm & 0xFFFF, dl)),
   1998                          0);
   1999         // If this is a 16-bit signed immediate, fold it.
   2000         if (isInt<16>((int)Imm))
   2001           return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
   2002                                                 getI32Imm(Imm & 0xFFFF, dl)),
   2003                          0);
   2004 
   2005         // For non-equality comparisons, the default code would materialize the
   2006         // constant, then compare against it, like this:
   2007         //   lis r2, 4660
   2008         //   ori r2, r2, 22136
   2009         //   cmpw cr0, r3, r2
   2010         // Since we are just comparing for equality, we can emit this instead:
   2011         //   xoris r0,r3,0x1234
   2012         //   cmplwi cr0,r0,0x5678
   2013         //   beq cr0,L6
   2014         SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
   2015                                            getI32Imm(Imm >> 16, dl)), 0);
   2016         return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
   2017                                               getI32Imm(Imm & 0xFFFF, dl)), 0);
   2018       }
   2019       Opc = PPC::CMPLW;
   2020     } else if (ISD::isUnsignedIntSetCC(CC)) {
   2021       if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
   2022         return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
   2023                                               getI32Imm(Imm & 0xFFFF, dl)), 0);
   2024       Opc = PPC::CMPLW;
   2025     } else {
   2026       short SImm;
   2027       if (isIntS16Immediate(RHS, SImm))
   2028         return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
   2029                                               getI32Imm((int)SImm & 0xFFFF,
   2030                                                         dl)),
   2031                          0);
   2032       Opc = PPC::CMPW;
   2033     }
   2034   } else if (LHS.getValueType() == MVT::i64) {
   2035     uint64_t Imm;
   2036     if (CC == ISD::SETEQ || CC == ISD::SETNE) {
   2037       if (isInt64Immediate(RHS.getNode(), Imm)) {
   2038         // SETEQ/SETNE comparison with 16-bit immediate, fold it.
   2039         if (isUInt<16>(Imm))
   2040           return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
   2041                                                 getI32Imm(Imm & 0xFFFF, dl)),
   2042                          0);
   2043         // If this is a 16-bit signed immediate, fold it.
   2044         if (isInt<16>(Imm))
   2045           return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
   2046                                                 getI32Imm(Imm & 0xFFFF, dl)),
   2047                          0);
   2048 
   2049         // For non-equality comparisons, the default code would materialize the
   2050         // constant, then compare against it, like this:
   2051         //   lis r2, 4660
   2052         //   ori r2, r2, 22136
   2053         //   cmpd cr0, r3, r2
   2054         // Since we are just comparing for equality, we can emit this instead:
   2055         //   xoris r0,r3,0x1234
   2056         //   cmpldi cr0,r0,0x5678
   2057         //   beq cr0,L6
   2058         if (isUInt<32>(Imm)) {
   2059           SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
   2060                                              getI64Imm(Imm >> 16, dl)), 0);
   2061           return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
   2062                                                 getI64Imm(Imm & 0xFFFF, dl)),
   2063                          0);
   2064         }
   2065       }
   2066       Opc = PPC::CMPLD;
   2067     } else if (ISD::isUnsignedIntSetCC(CC)) {
   2068       if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
   2069         return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
   2070                                               getI64Imm(Imm & 0xFFFF, dl)), 0);
   2071       Opc = PPC::CMPLD;
   2072     } else {
   2073       short SImm;
   2074       if (isIntS16Immediate(RHS, SImm))
   2075         return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
   2076                                               getI64Imm(SImm & 0xFFFF, dl)),
   2077                          0);
   2078       Opc = PPC::CMPD;
   2079     }
   2080   } else if (LHS.getValueType() == MVT::f32) {
   2081     Opc = PPC::FCMPUS;
   2082   } else {
   2083     assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
   2084     Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
   2085   }
   2086   return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
   2087 }
   2088 
   2089 static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
   2090   switch (CC) {
   2091   case ISD::SETUEQ:
   2092   case ISD::SETONE:
   2093   case ISD::SETOLE:
   2094   case ISD::SETOGE:
   2095     llvm_unreachable("Should be lowered by legalize!");
   2096   default: llvm_unreachable("Unknown condition!");
   2097   case ISD::SETOEQ:
   2098   case ISD::SETEQ:  return PPC::PRED_EQ;
   2099   case ISD::SETUNE:
   2100   case ISD::SETNE:  return PPC::PRED_NE;
   2101   case ISD::SETOLT:
   2102   case ISD::SETLT:  return PPC::PRED_LT;
   2103   case ISD::SETULE:
   2104   case ISD::SETLE:  return PPC::PRED_LE;
   2105   case ISD::SETOGT:
   2106   case ISD::SETGT:  return PPC::PRED_GT;
   2107   case ISD::SETUGE:
   2108   case ISD::SETGE:  return PPC::PRED_GE;
   2109   case ISD::SETO:   return PPC::PRED_NU;
   2110   case ISD::SETUO:  return PPC::PRED_UN;
   2111     // These two are invalid for floating point.  Assume we have int.
   2112   case ISD::SETULT: return PPC::PRED_LT;
   2113   case ISD::SETUGT: return PPC::PRED_GT;
   2114   }
   2115 }
   2116 
   2117 /// getCRIdxForSetCC - Return the index of the condition register field
   2118 /// associated with the SetCC condition, and whether or not the field is
   2119 /// treated as inverted.  That is, lt = 0; ge = 0 inverted.
   2120 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
   2121   Invert = false;
   2122   switch (CC) {
   2123   default: llvm_unreachable("Unknown condition!");
   2124   case ISD::SETOLT:
   2125   case ISD::SETLT:  return 0;                  // Bit #0 = SETOLT
   2126   case ISD::SETOGT:
   2127   case ISD::SETGT:  return 1;                  // Bit #1 = SETOGT
   2128   case ISD::SETOEQ:
   2129   case ISD::SETEQ:  return 2;                  // Bit #2 = SETOEQ
   2130   case ISD::SETUO:  return 3;                  // Bit #3 = SETUO
   2131   case ISD::SETUGE:
   2132   case ISD::SETGE:  Invert = true; return 0;   // !Bit #0 = SETUGE
   2133   case ISD::SETULE:
   2134   case ISD::SETLE:  Invert = true; return 1;   // !Bit #1 = SETULE
   2135   case ISD::SETUNE:
   2136   case ISD::SETNE:  Invert = true; return 2;   // !Bit #2 = SETUNE
   2137   case ISD::SETO:   Invert = true; return 3;   // !Bit #3 = SETO
   2138   case ISD::SETUEQ:
   2139   case ISD::SETOGE:
   2140   case ISD::SETOLE:
   2141   case ISD::SETONE:
   2142     llvm_unreachable("Invalid branch code: should be expanded by legalize");
   2143   // These are invalid for floating point.  Assume integer.
   2144   case ISD::SETULT: return 0;
   2145   case ISD::SETUGT: return 1;
   2146   }
   2147 }
   2148 
   2149 // getVCmpInst: return the vector compare instruction for the specified
   2150 // vector type and condition code. Since this is for altivec specific code,
   2151 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
   2152 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
   2153                                 bool HasVSX, bool &Swap, bool &Negate) {
   2154   Swap = false;
   2155   Negate = false;
   2156 
   2157   if (VecVT.isFloatingPoint()) {
   2158     /* Handle some cases by swapping input operands.  */
   2159     switch (CC) {
   2160       case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
   2161       case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
   2162       case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
   2163       case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
   2164       case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
   2165       case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
   2166       default: break;
   2167     }
   2168     /* Handle some cases by negating the result.  */
   2169     switch (CC) {
   2170       case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
   2171       case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
   2172       case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
   2173       case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
   2174       default: break;
   2175     }
   2176     /* We have instructions implementing the remaining cases.  */
   2177     switch (CC) {
   2178       case ISD::SETEQ:
   2179       case ISD::SETOEQ:
   2180         if (VecVT == MVT::v4f32)
   2181           return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
   2182         else if (VecVT == MVT::v2f64)
   2183           return PPC::XVCMPEQDP;
   2184         break;
   2185       case ISD::SETGT:
   2186       case ISD::SETOGT:
   2187         if (VecVT == MVT::v4f32)
   2188           return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
   2189         else if (VecVT == MVT::v2f64)
   2190           return PPC::XVCMPGTDP;
   2191         break;
   2192       case ISD::SETGE:
   2193       case ISD::SETOGE:
   2194         if (VecVT == MVT::v4f32)
   2195           return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
   2196         else if (VecVT == MVT::v2f64)
   2197           return PPC::XVCMPGEDP;
   2198         break;
   2199       default:
   2200         break;
   2201     }
   2202     llvm_unreachable("Invalid floating-point vector compare condition");
   2203   } else {
   2204     /* Handle some cases by swapping input operands.  */
   2205     switch (CC) {
   2206       case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
   2207       case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
   2208       case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
   2209       case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
   2210       default: break;
   2211     }
   2212     /* Handle some cases by negating the result.  */
   2213     switch (CC) {
   2214       case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
   2215       case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
   2216       case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
   2217       case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
   2218       default: break;
   2219     }
   2220     /* We have instructions implementing the remaining cases.  */
   2221     switch (CC) {
   2222       case ISD::SETEQ:
   2223       case ISD::SETUEQ:
   2224         if (VecVT == MVT::v16i8)
   2225           return PPC::VCMPEQUB;
   2226         else if (VecVT == MVT::v8i16)
   2227           return PPC::VCMPEQUH;
   2228         else if (VecVT == MVT::v4i32)
   2229           return PPC::VCMPEQUW;
   2230         else if (VecVT == MVT::v2i64)
   2231           return PPC::VCMPEQUD;
   2232         break;
   2233       case ISD::SETGT:
   2234         if (VecVT == MVT::v16i8)
   2235           return PPC::VCMPGTSB;
   2236         else if (VecVT == MVT::v8i16)
   2237           return PPC::VCMPGTSH;
   2238         else if (VecVT == MVT::v4i32)
   2239           return PPC::VCMPGTSW;
   2240         else if (VecVT == MVT::v2i64)
   2241           return PPC::VCMPGTSD;
   2242         break;
   2243       case ISD::SETUGT:
   2244         if (VecVT == MVT::v16i8)
   2245           return PPC::VCMPGTUB;
   2246         else if (VecVT == MVT::v8i16)
   2247           return PPC::VCMPGTUH;
   2248         else if (VecVT == MVT::v4i32)
   2249           return PPC::VCMPGTUW;
   2250         else if (VecVT == MVT::v2i64)
   2251           return PPC::VCMPGTUD;
   2252         break;
   2253       default:
   2254         break;
   2255     }
   2256     llvm_unreachable("Invalid integer vector compare condition");
   2257   }
   2258 }
   2259 
   2260 bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
   2261   SDLoc dl(N);
   2262   unsigned Imm;
   2263   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   2264   EVT PtrVT =
   2265       CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
   2266   bool isPPC64 = (PtrVT == MVT::i64);
   2267 
   2268   if (!PPCSubTarget->useCRBits() &&
   2269       isInt32Immediate(N->getOperand(1), Imm)) {
   2270     // We can codegen setcc op, imm very efficiently compared to a brcond.
   2271     // Check for those cases here.
   2272     // setcc op, 0
   2273     if (Imm == 0) {
   2274       SDValue Op = N->getOperand(0);
   2275       switch (CC) {
   2276       default: break;
   2277       case ISD::SETEQ: {
   2278         Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
   2279         SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
   2280                           getI32Imm(31, dl) };
   2281         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
   2282         return true;
   2283       }
   2284       case ISD::SETNE: {
   2285         if (isPPC64) break;
   2286         SDValue AD =
   2287           SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
   2288                                          Op, getI32Imm(~0U, dl)), 0);
   2289         CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
   2290         return true;
   2291       }
   2292       case ISD::SETLT: {
   2293         SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
   2294                           getI32Imm(31, dl) };
   2295         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
   2296         return true;
   2297       }
   2298       case ISD::SETGT: {
   2299         SDValue T =
   2300           SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
   2301         T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
   2302         SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
   2303                           getI32Imm(31, dl) };
   2304         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
   2305         return true;
   2306       }
   2307       }
   2308     } else if (Imm == ~0U) {        // setcc op, -1
   2309       SDValue Op = N->getOperand(0);
   2310       switch (CC) {
   2311       default: break;
   2312       case ISD::SETEQ:
   2313         if (isPPC64) break;
   2314         Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
   2315                                             Op, getI32Imm(1, dl)), 0);
   2316         CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
   2317                              SDValue(CurDAG->getMachineNode(PPC::LI, dl,
   2318                                                             MVT::i32,
   2319                                                             getI32Imm(0, dl)),
   2320                                      0), Op.getValue(1));
   2321         return true;
   2322       case ISD::SETNE: {
   2323         if (isPPC64) break;
   2324         Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
   2325         SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
   2326                                             Op, getI32Imm(~0U, dl));
   2327         CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
   2328                              SDValue(AD, 1));
   2329         return true;
   2330       }
   2331       case ISD::SETLT: {
   2332         SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
   2333                                                     getI32Imm(1, dl)), 0);
   2334         SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
   2335                                                     Op), 0);
   2336         SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
   2337                           getI32Imm(31, dl) };
   2338         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
   2339         return true;
   2340       }
   2341       case ISD::SETGT: {
   2342         SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
   2343                           getI32Imm(31, dl) };
   2344         Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
   2345         CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
   2346         return true;
   2347       }
   2348       }
   2349     }
   2350   }
   2351 
   2352   SDValue LHS = N->getOperand(0);
   2353   SDValue RHS = N->getOperand(1);
   2354 
   2355   // Altivec Vector compare instructions do not set any CR register by default and
   2356   // vector compare operations return the same type as the operands.
   2357   if (LHS.getValueType().isVector()) {
   2358     if (PPCSubTarget->hasQPX())
   2359       return false;
   2360 
   2361     EVT VecVT = LHS.getValueType();
   2362     bool Swap, Negate;
   2363     unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
   2364                                         PPCSubTarget->hasVSX(), Swap, Negate);
   2365     if (Swap)
   2366       std::swap(LHS, RHS);
   2367 
   2368     EVT ResVT = VecVT.changeVectorElementTypeToInteger();
   2369     if (Negate) {
   2370       SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
   2371       CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
   2372                            ResVT, VCmp, VCmp);
   2373       return true;
   2374     }
   2375 
   2376     CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
   2377     return true;
   2378   }
   2379 
   2380   if (PPCSubTarget->useCRBits())
   2381     return false;
   2382 
   2383   bool Inv;
   2384   unsigned Idx = getCRIdxForSetCC(CC, Inv);
   2385   SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
   2386   SDValue IntCR;
   2387 
   2388   // Force the ccreg into CR7.
   2389   SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
   2390 
   2391   SDValue InFlag(nullptr, 0);  // Null incoming flag value.
   2392   CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
   2393                                InFlag).getValue(1);
   2394 
   2395   IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
   2396                                          CCReg), 0);
   2397 
   2398   SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
   2399                       getI32Imm(31, dl), getI32Imm(31, dl) };
   2400   if (!Inv) {
   2401     CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
   2402     return true;
   2403   }
   2404 
   2405   // Get the specified bit.
   2406   SDValue Tmp =
   2407     SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
   2408   CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
   2409   return true;
   2410 }
   2411 
   2412 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
   2413   // Transfer memoperands.
   2414   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   2415   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
   2416   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
   2417 }
   2418 
   2419 
   2420 // Select - Convert the specified operand from a target-independent to a
   2421 // target-specific node if it hasn't already been changed.
   2422 void PPCDAGToDAGISel::Select(SDNode *N) {
   2423   SDLoc dl(N);
   2424   if (N->isMachineOpcode()) {
   2425     N->setNodeId(-1);
   2426     return;   // Already selected.
   2427   }
   2428 
   2429   // In case any misguided DAG-level optimizations form an ADD with a
   2430   // TargetConstant operand, crash here instead of miscompiling (by selecting
   2431   // an r+r add instead of some kind of r+i add).
   2432   if (N->getOpcode() == ISD::ADD &&
   2433       N->getOperand(1).getOpcode() == ISD::TargetConstant)
   2434     llvm_unreachable("Invalid ADD with TargetConstant operand");
   2435 
   2436   // Try matching complex bit permutations before doing anything else.
   2437   if (tryBitPermutation(N))
   2438     return;
   2439 
   2440   switch (N->getOpcode()) {
   2441   default: break;
   2442 
   2443   case ISD::Constant: {
   2444     if (N->getValueType(0) == MVT::i64) {
   2445       ReplaceNode(N, getInt64(CurDAG, N));
   2446       return;
   2447     }
   2448     break;
   2449   }
   2450 
   2451   case ISD::SETCC: {
   2452     if (trySETCC(N))
   2453       return;
   2454     break;
   2455   }
   2456   case PPCISD::GlobalBaseReg:
   2457     ReplaceNode(N, getGlobalBaseReg());
   2458     return;
   2459 
   2460   case ISD::FrameIndex:
   2461     selectFrameIndex(N, N);
   2462     return;
   2463 
   2464   case PPCISD::MFOCRF: {
   2465     SDValue InFlag = N->getOperand(1);
   2466     ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
   2467                                           N->getOperand(0), InFlag));
   2468     return;
   2469   }
   2470 
   2471   case PPCISD::READ_TIME_BASE: {
   2472     ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
   2473                                           MVT::Other, N->getOperand(0)));
   2474     return;
   2475   }
   2476 
   2477   case PPCISD::SRA_ADDZE: {
   2478     SDValue N0 = N->getOperand(0);
   2479     SDValue ShiftAmt =
   2480       CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
   2481                                   getConstantIntValue(), dl,
   2482                                   N->getValueType(0));
   2483     if (N->getValueType(0) == MVT::i64) {
   2484       SDNode *Op =
   2485         CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
   2486                                N0, ShiftAmt);
   2487       CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
   2488                            SDValue(Op, 1));
   2489       return;
   2490     } else {
   2491       assert(N->getValueType(0) == MVT::i32 &&
   2492              "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
   2493       SDNode *Op =
   2494         CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
   2495                                N0, ShiftAmt);
   2496       CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
   2497                            SDValue(Op, 1));
   2498       return;
   2499     }
   2500   }
   2501 
   2502   case ISD::LOAD: {
   2503     // Handle preincrement loads.
   2504     LoadSDNode *LD = cast<LoadSDNode>(N);
   2505     EVT LoadedVT = LD->getMemoryVT();
   2506 
   2507     // Normal loads are handled by code generated from the .td file.
   2508     if (LD->getAddressingMode() != ISD::PRE_INC)
   2509       break;
   2510 
   2511     SDValue Offset = LD->getOffset();
   2512     if (Offset.getOpcode() == ISD::TargetConstant ||
   2513         Offset.getOpcode() == ISD::TargetGlobalAddress) {
   2514 
   2515       unsigned Opcode;
   2516       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
   2517       if (LD->getValueType(0) != MVT::i64) {
   2518         // Handle PPC32 integer and normal FP loads.
   2519         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
   2520         switch (LoadedVT.getSimpleVT().SimpleTy) {
   2521           default: llvm_unreachable("Invalid PPC load type!");
   2522           case MVT::f64: Opcode = PPC::LFDU; break;
   2523           case MVT::f32: Opcode = PPC::LFSU; break;
   2524           case MVT::i32: Opcode = PPC::LWZU; break;
   2525           case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
   2526           case MVT::i1:
   2527           case MVT::i8:  Opcode = PPC::LBZU; break;
   2528         }
   2529       } else {
   2530         assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
   2531         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
   2532         switch (LoadedVT.getSimpleVT().SimpleTy) {
   2533           default: llvm_unreachable("Invalid PPC load type!");
   2534           case MVT::i64: Opcode = PPC::LDU; break;
   2535           case MVT::i32: Opcode = PPC::LWZU8; break;
   2536           case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
   2537           case MVT::i1:
   2538           case MVT::i8:  Opcode = PPC::LBZU8; break;
   2539         }
   2540       }
   2541 
   2542       SDValue Chain = LD->getChain();
   2543       SDValue Base = LD->getBasePtr();
   2544       SDValue Ops[] = { Offset, Base, Chain };
   2545       SDNode *MN = CurDAG->getMachineNode(
   2546           Opcode, dl, LD->getValueType(0),
   2547           PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
   2548       transferMemOperands(N, MN);
   2549       ReplaceNode(N, MN);
   2550       return;
   2551     } else {
   2552       unsigned Opcode;
   2553       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
   2554       if (LD->getValueType(0) != MVT::i64) {
   2555         // Handle PPC32 integer and normal FP loads.
   2556         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
   2557         switch (LoadedVT.getSimpleVT().SimpleTy) {
   2558           default: llvm_unreachable("Invalid PPC load type!");
   2559           case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
   2560           case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
   2561           case MVT::f64: Opcode = PPC::LFDUX; break;
   2562           case MVT::f32: Opcode = PPC::LFSUX; break;
   2563           case MVT::i32: Opcode = PPC::LWZUX; break;
   2564           case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
   2565           case MVT::i1:
   2566           case MVT::i8:  Opcode = PPC::LBZUX; break;
   2567         }
   2568       } else {
   2569         assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
   2570         assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
   2571                "Invalid sext update load");
   2572         switch (LoadedVT.getSimpleVT().SimpleTy) {
   2573           default: llvm_unreachable("Invalid PPC load type!");
   2574           case MVT::i64: Opcode = PPC::LDUX; break;
   2575           case MVT::i32: Opcode = isSExt ? PPC::LWAUX  : PPC::LWZUX8; break;
   2576           case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
   2577           case MVT::i1:
   2578           case MVT::i8:  Opcode = PPC::LBZUX8; break;
   2579         }
   2580       }
   2581 
   2582       SDValue Chain = LD->getChain();
   2583       SDValue Base = LD->getBasePtr();
   2584       SDValue Ops[] = { Base, Offset, Chain };
   2585       SDNode *MN = CurDAG->getMachineNode(
   2586           Opcode, dl, LD->getValueType(0),
   2587           PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
   2588       transferMemOperands(N, MN);
   2589       ReplaceNode(N, MN);
   2590       return;
   2591     }
   2592   }
   2593 
   2594   case ISD::AND: {
   2595     unsigned Imm, Imm2, SH, MB, ME;
   2596     uint64_t Imm64;
   2597 
   2598     // If this is an and of a value rotated between 0 and 31 bits and then and'd
   2599     // with a mask, emit rlwinm
   2600     if (isInt32Immediate(N->getOperand(1), Imm) &&
   2601         isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
   2602       SDValue Val = N->getOperand(0).getOperand(0);
   2603       SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
   2604                         getI32Imm(ME, dl) };
   2605       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
   2606       return;
   2607     }
   2608     // If this is just a masked value where the input is not handled above, and
   2609     // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
   2610     if (isInt32Immediate(N->getOperand(1), Imm) &&
   2611         isRunOfOnes(Imm, MB, ME) &&
   2612         N->getOperand(0).getOpcode() != ISD::ROTL) {
   2613       SDValue Val = N->getOperand(0);
   2614       SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
   2615                         getI32Imm(ME, dl) };
   2616       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
   2617       return;
   2618     }
   2619     // If this is a 64-bit zero-extension mask, emit rldicl.
   2620     if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
   2621         isMask_64(Imm64)) {
   2622       SDValue Val = N->getOperand(0);
   2623       MB = 64 - countTrailingOnes(Imm64);
   2624       SH = 0;
   2625 
   2626       // If the operand is a logical right shift, we can fold it into this
   2627       // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
   2628       // for n <= mb. The right shift is really a left rotate followed by a
   2629       // mask, and this mask is a more-restrictive sub-mask of the mask implied
   2630       // by the shift.
   2631       if (Val.getOpcode() == ISD::SRL &&
   2632           isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
   2633         assert(Imm < 64 && "Illegal shift amount");
   2634         Val = Val.getOperand(0);
   2635         SH = 64 - Imm;
   2636       }
   2637 
   2638       SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
   2639       CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
   2640       return;
   2641     }
   2642     // AND X, 0 -> 0, not "rlwinm 32".
   2643     if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
   2644       ReplaceUses(SDValue(N, 0), N->getOperand(1));
   2645       return;
   2646     }
   2647     // ISD::OR doesn't get all the bitfield insertion fun.
   2648     // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
   2649     // bitfield insert.
   2650     if (isInt32Immediate(N->getOperand(1), Imm) &&
   2651         N->getOperand(0).getOpcode() == ISD::OR &&
   2652         isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
   2653       // The idea here is to check whether this is equivalent to:
   2654       //   (c1 & m) | (x & ~m)
   2655       // where m is a run-of-ones mask. The logic here is that, for each bit in
   2656       // c1 and c2:
   2657       //  - if both are 1, then the output will be 1.
   2658       //  - if both are 0, then the output will be 0.
   2659       //  - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
   2660       //    come from x.
   2661       //  - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
   2662       //    be 0.
   2663       //  If that last condition is never the case, then we can form m from the
   2664       //  bits that are the same between c1 and c2.
   2665       unsigned MB, ME;
   2666       if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
   2667         SDValue Ops[] = { N->getOperand(0).getOperand(0),
   2668                             N->getOperand(0).getOperand(1),
   2669                             getI32Imm(0, dl), getI32Imm(MB, dl),
   2670                             getI32Imm(ME, dl) };
   2671         ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
   2672         return;
   2673       }
   2674     }
   2675 
   2676     // Other cases are autogenerated.
   2677     break;
   2678   }
   2679   case ISD::OR: {
   2680     if (N->getValueType(0) == MVT::i32)
   2681       if (tryBitfieldInsert(N))
   2682         return;
   2683 
   2684     short Imm;
   2685     if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
   2686         isIntS16Immediate(N->getOperand(1), Imm)) {
   2687       APInt LHSKnownZero, LHSKnownOne;
   2688       CurDAG->computeKnownBits(N->getOperand(0), LHSKnownZero, LHSKnownOne);
   2689 
   2690       // If this is equivalent to an add, then we can fold it with the
   2691       // FrameIndex calculation.
   2692       if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
   2693         selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
   2694         return;
   2695       }
   2696     }
   2697 
   2698     // Other cases are autogenerated.
   2699     break;
   2700   }
   2701   case ISD::ADD: {
   2702     short Imm;
   2703     if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
   2704         isIntS16Immediate(N->getOperand(1), Imm)) {
   2705       selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
   2706       return;
   2707     }
   2708 
   2709     break;
   2710   }
   2711   case ISD::SHL: {
   2712     unsigned Imm, SH, MB, ME;
   2713     if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
   2714         isRotateAndMask(N, Imm, true, SH, MB, ME)) {
   2715       SDValue Ops[] = { N->getOperand(0).getOperand(0),
   2716                           getI32Imm(SH, dl), getI32Imm(MB, dl),
   2717                           getI32Imm(ME, dl) };
   2718       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
   2719       return;
   2720     }
   2721 
   2722     // Other cases are autogenerated.
   2723     break;
   2724   }
   2725   case ISD::SRL: {
   2726     unsigned Imm, SH, MB, ME;
   2727     if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
   2728         isRotateAndMask(N, Imm, true, SH, MB, ME)) {
   2729       SDValue Ops[] = { N->getOperand(0).getOperand(0),
   2730                           getI32Imm(SH, dl), getI32Imm(MB, dl),
   2731                           getI32Imm(ME, dl) };
   2732       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
   2733       return;
   2734     }
   2735 
   2736     // Other cases are autogenerated.
   2737     break;
   2738   }
   2739   // FIXME: Remove this once the ANDI glue bug is fixed:
   2740   case PPCISD::ANDIo_1_EQ_BIT:
   2741   case PPCISD::ANDIo_1_GT_BIT: {
   2742     if (!ANDIGlueBug)
   2743       break;
   2744 
   2745     EVT InVT = N->getOperand(0).getValueType();
   2746     assert((InVT == MVT::i64 || InVT == MVT::i32) &&
   2747            "Invalid input type for ANDIo_1_EQ_BIT");
   2748 
   2749     unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
   2750     SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
   2751                                         N->getOperand(0),
   2752                                         CurDAG->getTargetConstant(1, dl, InVT)),
   2753                  0);
   2754     SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
   2755     SDValue SRIdxVal =
   2756       CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
   2757                                 PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
   2758 
   2759     CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
   2760                          SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
   2761     return;
   2762   }
   2763   case ISD::SELECT_CC: {
   2764     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
   2765     EVT PtrVT =
   2766         CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
   2767     bool isPPC64 = (PtrVT == MVT::i64);
   2768 
   2769     // If this is a select of i1 operands, we'll pattern match it.
   2770     if (PPCSubTarget->useCRBits() &&
   2771         N->getOperand(0).getValueType() == MVT::i1)
   2772       break;
   2773 
   2774     // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
   2775     if (!isPPC64)
   2776       if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
   2777         if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
   2778           if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
   2779             if (N1C->isNullValue() && N3C->isNullValue() &&
   2780                 N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
   2781                 // FIXME: Implement this optzn for PPC64.
   2782                 N->getValueType(0) == MVT::i32) {
   2783               SDNode *Tmp =
   2784                 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
   2785                                        N->getOperand(0), getI32Imm(~0U, dl));
   2786               CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
   2787                                    N->getOperand(0), SDValue(Tmp, 1));
   2788               return;
   2789             }
   2790 
   2791     SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
   2792 
   2793     if (N->getValueType(0) == MVT::i1) {
   2794       // An i1 select is: (c & t) | (!c & f).
   2795       bool Inv;
   2796       unsigned Idx = getCRIdxForSetCC(CC, Inv);
   2797 
   2798       unsigned SRI;
   2799       switch (Idx) {
   2800       default: llvm_unreachable("Invalid CC index");
   2801       case 0: SRI = PPC::sub_lt; break;
   2802       case 1: SRI = PPC::sub_gt; break;
   2803       case 2: SRI = PPC::sub_eq; break;
   2804       case 3: SRI = PPC::sub_un; break;
   2805       }
   2806 
   2807       SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
   2808 
   2809       SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
   2810                                               CCBit, CCBit), 0);
   2811       SDValue C =    Inv ? NotCCBit : CCBit,
   2812               NotC = Inv ? CCBit    : NotCCBit;
   2813 
   2814       SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
   2815                                            C, N->getOperand(2)), 0);
   2816       SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
   2817                                               NotC, N->getOperand(3)), 0);
   2818 
   2819       CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
   2820       return;
   2821     }
   2822 
   2823     unsigned BROpc = getPredicateForSetCC(CC);
   2824 
   2825     unsigned SelectCCOp;
   2826     if (N->getValueType(0) == MVT::i32)
   2827       SelectCCOp = PPC::SELECT_CC_I4;
   2828     else if (N->getValueType(0) == MVT::i64)
   2829       SelectCCOp = PPC::SELECT_CC_I8;
   2830     else if (N->getValueType(0) == MVT::f32)
   2831       if (PPCSubTarget->hasP8Vector())
   2832         SelectCCOp = PPC::SELECT_CC_VSSRC;
   2833       else
   2834         SelectCCOp = PPC::SELECT_CC_F4;
   2835     else if (N->getValueType(0) == MVT::f64)
   2836       if (PPCSubTarget->hasVSX())
   2837         SelectCCOp = PPC::SELECT_CC_VSFRC;
   2838       else
   2839         SelectCCOp = PPC::SELECT_CC_F8;
   2840     else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
   2841       SelectCCOp = PPC::SELECT_CC_QFRC;
   2842     else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
   2843       SelectCCOp = PPC::SELECT_CC_QSRC;
   2844     else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
   2845       SelectCCOp = PPC::SELECT_CC_QBRC;
   2846     else if (N->getValueType(0) == MVT::v2f64 ||
   2847              N->getValueType(0) == MVT::v2i64)
   2848       SelectCCOp = PPC::SELECT_CC_VSRC;
   2849     else
   2850       SelectCCOp = PPC::SELECT_CC_VRRC;
   2851 
   2852     SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
   2853                         getI32Imm(BROpc, dl) };
   2854     CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
   2855     return;
   2856   }
   2857   case ISD::VSELECT:
   2858     if (PPCSubTarget->hasVSX()) {
   2859       SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
   2860       CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
   2861       return;
   2862     }
   2863 
   2864     break;
   2865   case ISD::VECTOR_SHUFFLE:
   2866     if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
   2867                                   N->getValueType(0) == MVT::v2i64)) {
   2868       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   2869 
   2870       SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
   2871               Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
   2872       unsigned DM[2];
   2873 
   2874       for (int i = 0; i < 2; ++i)
   2875         if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
   2876           DM[i] = 0;
   2877         else
   2878           DM[i] = 1;
   2879 
   2880       if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
   2881           Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
   2882           isa<LoadSDNode>(Op1.getOperand(0))) {
   2883         LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
   2884         SDValue Base, Offset;
   2885 
   2886         if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
   2887             (LD->getMemoryVT() == MVT::f64 ||
   2888              LD->getMemoryVT() == MVT::i64) &&
   2889             SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
   2890           SDValue Chain = LD->getChain();
   2891           SDValue Ops[] = { Base, Offset, Chain };
   2892           CurDAG->SelectNodeTo(N, PPC::LXVDSX, N->getValueType(0), Ops);
   2893           return;
   2894         }
   2895       }
   2896 
   2897       // For little endian, we must swap the input operands and adjust
   2898       // the mask elements (reverse and invert them).
   2899       if (PPCSubTarget->isLittleEndian()) {
   2900         std::swap(Op1, Op2);
   2901         unsigned tmp = DM[0];
   2902         DM[0] = 1 - DM[1];
   2903         DM[1] = 1 - tmp;
   2904       }
   2905 
   2906       SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
   2907                                               MVT::i32);
   2908       SDValue Ops[] = { Op1, Op2, DMV };
   2909       CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
   2910       return;
   2911     }
   2912 
   2913     break;
   2914   case PPCISD::BDNZ:
   2915   case PPCISD::BDZ: {
   2916     bool IsPPC64 = PPCSubTarget->isPPC64();
   2917     SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
   2918     CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
   2919                                 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
   2920                                 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
   2921                          MVT::Other, Ops);
   2922     return;
   2923   }
   2924   case PPCISD::COND_BRANCH: {
   2925     // Op #0 is the Chain.
   2926     // Op #1 is the PPC::PRED_* number.
   2927     // Op #2 is the CR#
   2928     // Op #3 is the Dest MBB
   2929     // Op #4 is the Flag.
   2930     // Prevent PPC::PRED_* from being selected into LI.
   2931     unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
   2932     if (EnableBranchHint)
   2933       PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
   2934 
   2935     SDValue Pred = getI32Imm(PCC, dl);
   2936     SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
   2937       N->getOperand(0), N->getOperand(4) };
   2938     CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
   2939     return;
   2940   }
   2941   case ISD::BR_CC: {
   2942     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
   2943     unsigned PCC = getPredicateForSetCC(CC);
   2944 
   2945     if (N->getOperand(2).getValueType() == MVT::i1) {
   2946       unsigned Opc;
   2947       bool Swap;
   2948       switch (PCC) {
   2949       default: llvm_unreachable("Unexpected Boolean-operand predicate");
   2950       case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true;  break;
   2951       case PPC::PRED_LE: Opc = PPC::CRORC;  Swap = true;  break;
   2952       case PPC::PRED_EQ: Opc = PPC::CREQV;  Swap = false; break;
   2953       case PPC::PRED_GE: Opc = PPC::CRORC;  Swap = false; break;
   2954       case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
   2955       case PPC::PRED_NE: Opc = PPC::CRXOR;  Swap = false; break;
   2956       }
   2957 
   2958       SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
   2959                                              N->getOperand(Swap ? 3 : 2),
   2960                                              N->getOperand(Swap ? 2 : 3)), 0);
   2961       CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
   2962                            N->getOperand(0));
   2963       return;
   2964     }
   2965 
   2966     if (EnableBranchHint)
   2967       PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
   2968 
   2969     SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
   2970     SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
   2971                         N->getOperand(4), N->getOperand(0) };
   2972     CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
   2973     return;
   2974   }
   2975   case ISD::BRIND: {
   2976     // FIXME: Should custom lower this.
   2977     SDValue Chain = N->getOperand(0);
   2978     SDValue Target = N->getOperand(1);
   2979     unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
   2980     unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
   2981     Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
   2982                                            Chain), 0);
   2983     CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
   2984     return;
   2985   }
   2986   case PPCISD::TOC_ENTRY: {
   2987     assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
   2988             "Only supported for 64-bit ABI and 32-bit SVR4");
   2989     if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
   2990       SDValue GA = N->getOperand(0);
   2991       SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
   2992                                           N->getOperand(1));
   2993       transferMemOperands(N, MN);
   2994       ReplaceNode(N, MN);
   2995       return;
   2996     }
   2997 
   2998     // For medium and large code model, we generate two instructions as
   2999     // described below.  Otherwise we allow SelectCodeCommon to handle this,
   3000     // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
   3001     CodeModel::Model CModel = TM.getCodeModel();
   3002     if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
   3003       break;
   3004 
   3005     // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
   3006     // If it must be toc-referenced according to PPCSubTarget, we generate:
   3007     //   LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
   3008     // Otherwise we generate:
   3009     //   ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
   3010     SDValue GA = N->getOperand(0);
   3011     SDValue TOCbase = N->getOperand(1);
   3012     SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
   3013                                          TOCbase, GA);
   3014 
   3015     if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
   3016         CModel == CodeModel::Large) {
   3017       SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
   3018                                           SDValue(Tmp, 0));
   3019       transferMemOperands(N, MN);
   3020       ReplaceNode(N, MN);
   3021       return;
   3022     }
   3023 
   3024     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
   3025       const GlobalValue *GV = G->getGlobal();
   3026       unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
   3027       if (GVFlags & PPCII::MO_NLP_FLAG) {
   3028         SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
   3029                                             SDValue(Tmp, 0));
   3030         transferMemOperands(N, MN);
   3031         ReplaceNode(N, MN);
   3032         return;
   3033       }
   3034     }
   3035 
   3036     ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
   3037                                           SDValue(Tmp, 0), GA));
   3038     return;
   3039   }
   3040   case PPCISD::PPC32_PICGOT: {
   3041     // Generate a PIC-safe GOT reference.
   3042     assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
   3043       "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
   3044     CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
   3045                          PPCLowering->getPointerTy(CurDAG->getDataLayout()),
   3046                          MVT::i32);
   3047     return;
   3048   }
   3049   case PPCISD::VADD_SPLAT: {
   3050     // This expands into one of three sequences, depending on whether
   3051     // the first operand is odd or even, positive or negative.
   3052     assert(isa<ConstantSDNode>(N->getOperand(0)) &&
   3053            isa<ConstantSDNode>(N->getOperand(1)) &&
   3054            "Invalid operand on VADD_SPLAT!");
   3055 
   3056     int Elt     = N->getConstantOperandVal(0);
   3057     int EltSize = N->getConstantOperandVal(1);
   3058     unsigned Opc1, Opc2, Opc3;
   3059     EVT VT;
   3060 
   3061     if (EltSize == 1) {
   3062       Opc1 = PPC::VSPLTISB;
   3063       Opc2 = PPC::VADDUBM;
   3064       Opc3 = PPC::VSUBUBM;
   3065       VT = MVT::v16i8;
   3066     } else if (EltSize == 2) {
   3067       Opc1 = PPC::VSPLTISH;
   3068       Opc2 = PPC::VADDUHM;
   3069       Opc3 = PPC::VSUBUHM;
   3070       VT = MVT::v8i16;
   3071     } else {
   3072       assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
   3073       Opc1 = PPC::VSPLTISW;
   3074       Opc2 = PPC::VADDUWM;
   3075       Opc3 = PPC::VSUBUWM;
   3076       VT = MVT::v4i32;
   3077     }
   3078 
   3079     if ((Elt & 1) == 0) {
   3080       // Elt is even, in the range [-32,-18] + [16,30].
   3081       //
   3082       // Convert: VADD_SPLAT elt, size
   3083       // Into:    tmp = VSPLTIS[BHW] elt
   3084       //          VADDU[BHW]M tmp, tmp
   3085       // Where:   [BHW] = B for size = 1, H for size = 2, W for size = 4
   3086       SDValue EltVal = getI32Imm(Elt >> 1, dl);
   3087       SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
   3088       SDValue TmpVal = SDValue(Tmp, 0);
   3089       ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
   3090       return;
   3091 
   3092     } else if (Elt > 0) {
   3093       // Elt is odd and positive, in the range [17,31].
   3094       //
   3095       // Convert: VADD_SPLAT elt, size
   3096       // Into:    tmp1 = VSPLTIS[BHW] elt-16
   3097       //          tmp2 = VSPLTIS[BHW] -16
   3098       //          VSUBU[BHW]M tmp1, tmp2
   3099       SDValue EltVal = getI32Imm(Elt - 16, dl);
   3100       SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
   3101       EltVal = getI32Imm(-16, dl);
   3102       SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
   3103       ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
   3104                                             SDValue(Tmp2, 0)));
   3105       return;
   3106 
   3107     } else {
   3108       // Elt is odd and negative, in the range [-31,-17].
   3109       //
   3110       // Convert: VADD_SPLAT elt, size
   3111       // Into:    tmp1 = VSPLTIS[BHW] elt+16
   3112       //          tmp2 = VSPLTIS[BHW] -16
   3113       //          VADDU[BHW]M tmp1, tmp2
   3114       SDValue EltVal = getI32Imm(Elt + 16, dl);
   3115       SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
   3116       EltVal = getI32Imm(-16, dl);
   3117       SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
   3118       ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
   3119                                             SDValue(Tmp2, 0)));
   3120       return;
   3121     }
   3122   }
   3123   }
   3124 
   3125   SelectCode(N);
   3126 }
   3127 
   3128 // If the target supports the cmpb instruction, do the idiom recognition here.
   3129 // We don't do this as a DAG combine because we don't want to do it as nodes
   3130 // are being combined (because we might miss part of the eventual idiom). We
   3131 // don't want to do it during instruction selection because we want to reuse
   3132 // the logic for lowering the masking operations already part of the
   3133 // instruction selector.
   3134 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
   3135   SDLoc dl(N);
   3136 
   3137   assert(N->getOpcode() == ISD::OR &&
   3138          "Only OR nodes are supported for CMPB");
   3139 
   3140   SDValue Res;
   3141   if (!PPCSubTarget->hasCMPB())
   3142     return Res;
   3143 
   3144   if (N->getValueType(0) != MVT::i32 &&
   3145       N->getValueType(0) != MVT::i64)
   3146     return Res;
   3147 
   3148   EVT VT = N->getValueType(0);
   3149 
   3150   SDValue RHS, LHS;
   3151   bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
   3152   uint64_t Mask = 0, Alt = 0;
   3153 
   3154   auto IsByteSelectCC = [this](SDValue O, unsigned &b,
   3155                                uint64_t &Mask, uint64_t &Alt,
   3156                                SDValue &LHS, SDValue &RHS) {
   3157     if (O.getOpcode() != ISD::SELECT_CC)
   3158       return false;
   3159     ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
   3160 
   3161     if (!isa<ConstantSDNode>(O.getOperand(2)) ||
   3162         !isa<ConstantSDNode>(O.getOperand(3)))
   3163       return false;
   3164 
   3165     uint64_t PM = O.getConstantOperandVal(2);
   3166     uint64_t PAlt = O.getConstantOperandVal(3);
   3167     for (b = 0; b < 8; ++b) {
   3168       uint64_t Mask = UINT64_C(0xFF) << (8*b);
   3169       if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
   3170         break;
   3171     }
   3172 
   3173     if (b == 8)
   3174       return false;
   3175     Mask |= PM;
   3176     Alt  |= PAlt;
   3177 
   3178     if (!isa<ConstantSDNode>(O.getOperand(1)) ||
   3179         O.getConstantOperandVal(1) != 0) {
   3180       SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
   3181       if (Op0.getOpcode() == ISD::TRUNCATE)
   3182         Op0 = Op0.getOperand(0);
   3183       if (Op1.getOpcode() == ISD::TRUNCATE)
   3184         Op1 = Op1.getOperand(0);
   3185 
   3186       if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
   3187           Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
   3188           isa<ConstantSDNode>(Op0.getOperand(1))) {
   3189 
   3190         unsigned Bits = Op0.getValueType().getSizeInBits();
   3191         if (b != Bits/8-1)
   3192           return false;
   3193         if (Op0.getConstantOperandVal(1) != Bits-8)
   3194           return false;
   3195 
   3196         LHS = Op0.getOperand(0);
   3197         RHS = Op1.getOperand(0);
   3198         return true;
   3199       }
   3200 
   3201       // When we have small integers (i16 to be specific), the form present
   3202       // post-legalization uses SETULT in the SELECT_CC for the
   3203       // higher-order byte, depending on the fact that the
   3204       // even-higher-order bytes are known to all be zero, for example:
   3205       //   select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
   3206       // (so when the second byte is the same, because all higher-order
   3207       // bits from bytes 3 and 4 are known to be zero, the result of the
   3208       // xor can be at most 255)
   3209       if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
   3210           isa<ConstantSDNode>(O.getOperand(1))) {
   3211 
   3212         uint64_t ULim = O.getConstantOperandVal(1);
   3213         if (ULim != (UINT64_C(1) << b*8))
   3214           return false;
   3215 
   3216         // Now we need to make sure that the upper bytes are known to be
   3217         // zero.
   3218         unsigned Bits = Op0.getValueType().getSizeInBits();
   3219         if (!CurDAG->MaskedValueIsZero(Op0,
   3220               APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
   3221           return false;
   3222 
   3223         LHS = Op0.getOperand(0);
   3224         RHS = Op0.getOperand(1);
   3225         return true;
   3226       }
   3227 
   3228       return false;
   3229     }
   3230 
   3231     if (CC != ISD::SETEQ)
   3232       return false;
   3233 
   3234     SDValue Op = O.getOperand(0);
   3235     if (Op.getOpcode() == ISD::AND) {
   3236       if (!isa<ConstantSDNode>(Op.getOperand(1)))
   3237         return false;
   3238       if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
   3239         return false;
   3240 
   3241       SDValue XOR = Op.getOperand(0);
   3242       if (XOR.getOpcode() == ISD::TRUNCATE)
   3243         XOR = XOR.getOperand(0);
   3244       if (XOR.getOpcode() != ISD::XOR)
   3245         return false;
   3246 
   3247       LHS = XOR.getOperand(0);
   3248       RHS = XOR.getOperand(1);
   3249       return true;
   3250     } else if (Op.getOpcode() == ISD::SRL) {
   3251       if (!isa<ConstantSDNode>(Op.getOperand(1)))
   3252         return false;
   3253       unsigned Bits = Op.getValueType().getSizeInBits();
   3254       if (b != Bits/8-1)
   3255         return false;
   3256       if (Op.getConstantOperandVal(1) != Bits-8)
   3257         return false;
   3258 
   3259       SDValue XOR = Op.getOperand(0);
   3260       if (XOR.getOpcode() == ISD::TRUNCATE)
   3261         XOR = XOR.getOperand(0);
   3262       if (XOR.getOpcode() != ISD::XOR)
   3263         return false;
   3264 
   3265       LHS = XOR.getOperand(0);
   3266       RHS = XOR.getOperand(1);
   3267       return true;
   3268     }
   3269 
   3270     return false;
   3271   };
   3272 
   3273   SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
   3274   while (!Queue.empty()) {
   3275     SDValue V = Queue.pop_back_val();
   3276 
   3277     for (const SDValue &O : V.getNode()->ops()) {
   3278       unsigned b;
   3279       uint64_t M = 0, A = 0;
   3280       SDValue OLHS, ORHS;
   3281       if (O.getOpcode() == ISD::OR) {
   3282         Queue.push_back(O);
   3283       } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
   3284         if (!LHS) {
   3285           LHS = OLHS;
   3286           RHS = ORHS;
   3287           BytesFound[b] = true;
   3288           Mask |= M;
   3289           Alt  |= A;
   3290         } else if ((LHS == ORHS && RHS == OLHS) ||
   3291                    (RHS == ORHS && LHS == OLHS)) {
   3292           BytesFound[b] = true;
   3293           Mask |= M;
   3294           Alt  |= A;
   3295         } else {
   3296           return Res;
   3297         }
   3298       } else {
   3299         return Res;
   3300       }
   3301     }
   3302   }
   3303 
   3304   unsigned LastB = 0, BCnt = 0;
   3305   for (unsigned i = 0; i < 8; ++i)
   3306     if (BytesFound[LastB]) {
   3307       ++BCnt;
   3308       LastB = i;
   3309     }
   3310 
   3311   if (!LastB || BCnt < 2)
   3312     return Res;
   3313 
   3314   // Because we'll be zero-extending the output anyway if don't have a specific
   3315   // value for each input byte (via the Mask), we can 'anyext' the inputs.
   3316   if (LHS.getValueType() != VT) {
   3317     LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
   3318     RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
   3319   }
   3320 
   3321   Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
   3322 
   3323   bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
   3324   if (NonTrivialMask && !Alt) {
   3325     // Res = Mask & CMPB
   3326     Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
   3327                           CurDAG->getConstant(Mask, dl, VT));
   3328   } else if (Alt) {
   3329     // Res = (CMPB & Mask) | (~CMPB & Alt)
   3330     // Which, as suggested here:
   3331     //   https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
   3332     // can be written as:
   3333     // Res = Alt ^ ((Alt ^ Mask) & CMPB)
   3334     // useful because the (Alt ^ Mask) can be pre-computed.
   3335     Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
   3336                           CurDAG->getConstant(Mask ^ Alt, dl, VT));
   3337     Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
   3338                           CurDAG->getConstant(Alt, dl, VT));
   3339   }
   3340 
   3341   return Res;
   3342 }
   3343 
   3344 // When CR bit registers are enabled, an extension of an i1 variable to a i32
   3345 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
   3346 // involves constant materialization of a 0 or a 1 or both. If the result of
   3347 // the extension is then operated upon by some operator that can be constant
   3348 // folded with a constant 0 or 1, and that constant can be materialized using
   3349 // only one instruction (like a zero or one), then we should fold in those
   3350 // operations with the select.
   3351 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
   3352   if (!PPCSubTarget->useCRBits())
   3353     return;
   3354 
   3355   if (N->getOpcode() != ISD::ZERO_EXTEND &&
   3356       N->getOpcode() != ISD::SIGN_EXTEND &&
   3357       N->getOpcode() != ISD::ANY_EXTEND)
   3358     return;
   3359 
   3360   if (N->getOperand(0).getValueType() != MVT::i1)
   3361     return;
   3362 
   3363   if (!N->hasOneUse())
   3364     return;
   3365 
   3366   SDLoc dl(N);
   3367   EVT VT = N->getValueType(0);
   3368   SDValue Cond = N->getOperand(0);
   3369   SDValue ConstTrue =
   3370     CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
   3371   SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
   3372 
   3373   do {
   3374     SDNode *User = *N->use_begin();
   3375     if (User->getNumOperands() != 2)
   3376       break;
   3377 
   3378     auto TryFold = [this, N, User, dl](SDValue Val) {
   3379       SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
   3380       SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
   3381       SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
   3382 
   3383       return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
   3384                                             User->getValueType(0),
   3385                                             O0.getNode(), O1.getNode());
   3386     };
   3387 
   3388     SDValue TrueRes = TryFold(ConstTrue);
   3389     if (!TrueRes)
   3390       break;
   3391     SDValue FalseRes = TryFold(ConstFalse);
   3392     if (!FalseRes)
   3393       break;
   3394 
   3395     // For us to materialize these using one instruction, we must be able to
   3396     // represent them as signed 16-bit integers.
   3397     uint64_t True  = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
   3398              False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
   3399     if (!isInt<16>(True) || !isInt<16>(False))
   3400       break;
   3401 
   3402     // We can replace User with a new SELECT node, and try again to see if we
   3403     // can fold the select with its user.
   3404     Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
   3405     N = User;
   3406     ConstTrue = TrueRes;
   3407     ConstFalse = FalseRes;
   3408   } while (N->hasOneUse());
   3409 }
   3410 
   3411 void PPCDAGToDAGISel::PreprocessISelDAG() {
   3412   SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
   3413   ++Position;
   3414 
   3415   bool MadeChange = false;
   3416   while (Position != CurDAG->allnodes_begin()) {
   3417     SDNode *N = &*--Position;
   3418     if (N->use_empty())
   3419       continue;
   3420 
   3421     SDValue Res;
   3422     switch (N->getOpcode()) {
   3423     default: break;
   3424     case ISD::OR:
   3425       Res = combineToCMPB(N);
   3426       break;
   3427     }
   3428 
   3429     if (!Res)
   3430       foldBoolExts(Res, N);
   3431 
   3432     if (Res) {
   3433       DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld:    ");
   3434       DEBUG(N->dump(CurDAG));
   3435       DEBUG(dbgs() << "\nNew: ");
   3436       DEBUG(Res.getNode()->dump(CurDAG));
   3437       DEBUG(dbgs() << "\n");
   3438 
   3439       CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
   3440       MadeChange = true;
   3441     }
   3442   }
   3443 
   3444   if (MadeChange)
   3445     CurDAG->RemoveDeadNodes();
   3446 }
   3447 
   3448 /// PostprocessISelDAG - Perform some late peephole optimizations
   3449 /// on the DAG representation.
   3450 void PPCDAGToDAGISel::PostprocessISelDAG() {
   3451 
   3452   // Skip peepholes at -O0.
   3453   if (TM.getOptLevel() == CodeGenOpt::None)
   3454     return;
   3455 
   3456   PeepholePPC64();
   3457   PeepholeCROps();
   3458   PeepholePPC64ZExt();
   3459 }
   3460 
   3461 // Check if all users of this node will become isel where the second operand
   3462 // is the constant zero. If this is so, and if we can negate the condition,
   3463 // then we can flip the true and false operands. This will allow the zero to
   3464 // be folded with the isel so that we don't need to materialize a register
   3465 // containing zero.
   3466 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
   3467   // If we're not using isel, then this does not matter.
   3468   if (!PPCSubTarget->hasISEL())
   3469     return false;
   3470 
   3471   for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
   3472        UI != UE; ++UI) {
   3473     SDNode *User = *UI;
   3474     if (!User->isMachineOpcode())
   3475       return false;
   3476     if (User->getMachineOpcode() != PPC::SELECT_I4 &&
   3477         User->getMachineOpcode() != PPC::SELECT_I8)
   3478       return false;
   3479 
   3480     SDNode *Op2 = User->getOperand(2).getNode();
   3481     if (!Op2->isMachineOpcode())
   3482       return false;
   3483 
   3484     if (Op2->getMachineOpcode() != PPC::LI &&
   3485         Op2->getMachineOpcode() != PPC::LI8)
   3486       return false;
   3487 
   3488     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
   3489     if (!C)
   3490       return false;
   3491 
   3492     if (!C->isNullValue())
   3493       return false;
   3494   }
   3495 
   3496   return true;
   3497 }
   3498 
   3499 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
   3500   SmallVector<SDNode *, 4> ToReplace;
   3501   for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
   3502        UI != UE; ++UI) {
   3503     SDNode *User = *UI;
   3504     assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
   3505             User->getMachineOpcode() == PPC::SELECT_I8) &&
   3506            "Must have all select users");
   3507     ToReplace.push_back(User);
   3508   }
   3509 
   3510   for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
   3511        UE = ToReplace.end(); UI != UE; ++UI) {
   3512     SDNode *User = *UI;
   3513     SDNode *ResNode =
   3514       CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
   3515                              User->getValueType(0), User->getOperand(0),
   3516                              User->getOperand(2),
   3517                              User->getOperand(1));
   3518 
   3519       DEBUG(dbgs() << "CR Peephole replacing:\nOld:    ");
   3520       DEBUG(User->dump(CurDAG));
   3521       DEBUG(dbgs() << "\nNew: ");
   3522       DEBUG(ResNode->dump(CurDAG));
   3523       DEBUG(dbgs() << "\n");
   3524 
   3525       ReplaceUses(User, ResNode);
   3526   }
   3527 }
   3528 
   3529 void PPCDAGToDAGISel::PeepholeCROps() {
   3530   bool IsModified;
   3531   do {
   3532     IsModified = false;
   3533     for (SDNode &Node : CurDAG->allnodes()) {
   3534       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
   3535       if (!MachineNode || MachineNode->use_empty())
   3536         continue;
   3537       SDNode *ResNode = MachineNode;
   3538 
   3539       bool Op1Set   = false, Op1Unset = false,
   3540            Op1Not   = false,
   3541            Op2Set   = false, Op2Unset = false,
   3542            Op2Not   = false;
   3543 
   3544       unsigned Opcode = MachineNode->getMachineOpcode();
   3545       switch (Opcode) {
   3546       default: break;
   3547       case PPC::CRAND:
   3548       case PPC::CRNAND:
   3549       case PPC::CROR:
   3550       case PPC::CRXOR:
   3551       case PPC::CRNOR:
   3552       case PPC::CREQV:
   3553       case PPC::CRANDC:
   3554       case PPC::CRORC: {
   3555         SDValue Op = MachineNode->getOperand(1);
   3556         if (Op.isMachineOpcode()) {
   3557           if (Op.getMachineOpcode() == PPC::CRSET)
   3558             Op2Set = true;
   3559           else if (Op.getMachineOpcode() == PPC::CRUNSET)
   3560             Op2Unset = true;
   3561           else if (Op.getMachineOpcode() == PPC::CRNOR &&
   3562                    Op.getOperand(0) == Op.getOperand(1))
   3563             Op2Not = true;
   3564         }
   3565         }  // fallthrough
   3566       case PPC::BC:
   3567       case PPC::BCn:
   3568       case PPC::SELECT_I4:
   3569       case PPC::SELECT_I8:
   3570       case PPC::SELECT_F4:
   3571       case PPC::SELECT_F8:
   3572       case PPC::SELECT_QFRC:
   3573       case PPC::SELECT_QSRC:
   3574       case PPC::SELECT_QBRC:
   3575       case PPC::SELECT_VRRC:
   3576       case PPC::SELECT_VSFRC:
   3577       case PPC::SELECT_VSSRC:
   3578       case PPC::SELECT_VSRC: {
   3579         SDValue Op = MachineNode->getOperand(0);
   3580         if (Op.isMachineOpcode()) {
   3581           if (Op.getMachineOpcode() == PPC::CRSET)
   3582             Op1Set = true;
   3583           else if (Op.getMachineOpcode() == PPC::CRUNSET)
   3584             Op1Unset = true;
   3585           else if (Op.getMachineOpcode() == PPC::CRNOR &&
   3586                    Op.getOperand(0) == Op.getOperand(1))
   3587             Op1Not = true;
   3588         }
   3589         }
   3590         break;
   3591       }
   3592 
   3593       bool SelectSwap = false;
   3594       switch (Opcode) {
   3595       default: break;
   3596       case PPC::CRAND:
   3597         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
   3598           // x & x = x
   3599           ResNode = MachineNode->getOperand(0).getNode();
   3600         else if (Op1Set)
   3601           // 1 & y = y
   3602           ResNode = MachineNode->getOperand(1).getNode();
   3603         else if (Op2Set)
   3604           // x & 1 = x
   3605           ResNode = MachineNode->getOperand(0).getNode();
   3606         else if (Op1Unset || Op2Unset)
   3607           // x & 0 = 0 & y = 0
   3608           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
   3609                                            MVT::i1);
   3610         else if (Op1Not)
   3611           // ~x & y = andc(y, x)
   3612           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
   3613                                            MVT::i1, MachineNode->getOperand(1),
   3614                                            MachineNode->getOperand(0).
   3615                                              getOperand(0));
   3616         else if (Op2Not)
   3617           // x & ~y = andc(x, y)
   3618           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
   3619                                            MVT::i1, MachineNode->getOperand(0),
   3620                                            MachineNode->getOperand(1).
   3621                                              getOperand(0));
   3622         else if (AllUsersSelectZero(MachineNode)) {
   3623           ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
   3624                                            MVT::i1, MachineNode->getOperand(0),
   3625                                            MachineNode->getOperand(1));
   3626           SelectSwap = true;
   3627         }
   3628         break;
   3629       case PPC::CRNAND:
   3630         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
   3631           // nand(x, x) -> nor(x, x)
   3632           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3633                                            MVT::i1, MachineNode->getOperand(0),
   3634                                            MachineNode->getOperand(0));
   3635         else if (Op1Set)
   3636           // nand(1, y) -> nor(y, y)
   3637           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3638                                            MVT::i1, MachineNode->getOperand(1),
   3639                                            MachineNode->getOperand(1));
   3640         else if (Op2Set)
   3641           // nand(x, 1) -> nor(x, x)
   3642           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3643                                            MVT::i1, MachineNode->getOperand(0),
   3644                                            MachineNode->getOperand(0));
   3645         else if (Op1Unset || Op2Unset)
   3646           // nand(x, 0) = nand(0, y) = 1
   3647           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
   3648                                            MVT::i1);
   3649         else if (Op1Not)
   3650           // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
   3651           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
   3652                                            MVT::i1, MachineNode->getOperand(0).
   3653                                                       getOperand(0),
   3654                                            MachineNode->getOperand(1));
   3655         else if (Op2Not)
   3656           // nand(x, ~y) = ~x | y = orc(y, x)
   3657           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
   3658                                            MVT::i1, MachineNode->getOperand(1).
   3659                                                       getOperand(0),
   3660                                            MachineNode->getOperand(0));
   3661         else if (AllUsersSelectZero(MachineNode)) {
   3662           ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
   3663                                            MVT::i1, MachineNode->getOperand(0),
   3664                                            MachineNode->getOperand(1));
   3665           SelectSwap = true;
   3666         }
   3667         break;
   3668       case PPC::CROR:
   3669         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
   3670           // x | x = x
   3671           ResNode = MachineNode->getOperand(0).getNode();
   3672         else if (Op1Set || Op2Set)
   3673           // x | 1 = 1 | y = 1
   3674           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
   3675                                            MVT::i1);
   3676         else if (Op1Unset)
   3677           // 0 | y = y
   3678           ResNode = MachineNode->getOperand(1).getNode();
   3679         else if (Op2Unset)
   3680           // x | 0 = x
   3681           ResNode = MachineNode->getOperand(0).getNode();
   3682         else if (Op1Not)
   3683           // ~x | y = orc(y, x)
   3684           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
   3685                                            MVT::i1, MachineNode->getOperand(1),
   3686                                            MachineNode->getOperand(0).
   3687                                              getOperand(0));
   3688         else if (Op2Not)
   3689           // x | ~y = orc(x, y)
   3690           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
   3691                                            MVT::i1, MachineNode->getOperand(0),
   3692                                            MachineNode->getOperand(1).
   3693                                              getOperand(0));
   3694         else if (AllUsersSelectZero(MachineNode)) {
   3695           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3696                                            MVT::i1, MachineNode->getOperand(0),
   3697                                            MachineNode->getOperand(1));
   3698           SelectSwap = true;
   3699         }
   3700         break;
   3701       case PPC::CRXOR:
   3702         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
   3703           // xor(x, x) = 0
   3704           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
   3705                                            MVT::i1);
   3706         else if (Op1Set)
   3707           // xor(1, y) -> nor(y, y)
   3708           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3709                                            MVT::i1, MachineNode->getOperand(1),
   3710                                            MachineNode->getOperand(1));
   3711         else if (Op2Set)
   3712           // xor(x, 1) -> nor(x, x)
   3713           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3714                                            MVT::i1, MachineNode->getOperand(0),
   3715                                            MachineNode->getOperand(0));
   3716         else if (Op1Unset)
   3717           // xor(0, y) = y
   3718           ResNode = MachineNode->getOperand(1).getNode();
   3719         else if (Op2Unset)
   3720           // xor(x, 0) = x
   3721           ResNode = MachineNode->getOperand(0).getNode();
   3722         else if (Op1Not)
   3723           // xor(~x, y) = eqv(x, y)
   3724           ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
   3725                                            MVT::i1, MachineNode->getOperand(0).
   3726                                                       getOperand(0),
   3727                                            MachineNode->getOperand(1));
   3728         else if (Op2Not)
   3729           // xor(x, ~y) = eqv(x, y)
   3730           ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
   3731                                            MVT::i1, MachineNode->getOperand(0),
   3732                                            MachineNode->getOperand(1).
   3733                                              getOperand(0));
   3734         else if (AllUsersSelectZero(MachineNode)) {
   3735           ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
   3736                                            MVT::i1, MachineNode->getOperand(0),
   3737                                            MachineNode->getOperand(1));
   3738           SelectSwap = true;
   3739         }
   3740         break;
   3741       case PPC::CRNOR:
   3742         if (Op1Set || Op2Set)
   3743           // nor(1, y) -> 0
   3744           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
   3745                                            MVT::i1);
   3746         else if (Op1Unset)
   3747           // nor(0, y) = ~y -> nor(y, y)
   3748           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3749                                            MVT::i1, MachineNode->getOperand(1),
   3750                                            MachineNode->getOperand(1));
   3751         else if (Op2Unset)
   3752           // nor(x, 0) = ~x
   3753           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3754                                            MVT::i1, MachineNode->getOperand(0),
   3755                                            MachineNode->getOperand(0));
   3756         else if (Op1Not)
   3757           // nor(~x, y) = andc(x, y)
   3758           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
   3759                                            MVT::i1, MachineNode->getOperand(0).
   3760                                                       getOperand(0),
   3761                                            MachineNode->getOperand(1));
   3762         else if (Op2Not)
   3763           // nor(x, ~y) = andc(y, x)
   3764           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
   3765                                            MVT::i1, MachineNode->getOperand(1).
   3766                                                       getOperand(0),
   3767                                            MachineNode->getOperand(0));
   3768         else if (AllUsersSelectZero(MachineNode)) {
   3769           ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
   3770                                            MVT::i1, MachineNode->getOperand(0),
   3771                                            MachineNode->getOperand(1));
   3772           SelectSwap = true;
   3773         }
   3774         break;
   3775       case PPC::CREQV:
   3776         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
   3777           // eqv(x, x) = 1
   3778           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
   3779                                            MVT::i1);
   3780         else if (Op1Set)
   3781           // eqv(1, y) = y
   3782           ResNode = MachineNode->getOperand(1).getNode();
   3783         else if (Op2Set)
   3784           // eqv(x, 1) = x
   3785           ResNode = MachineNode->getOperand(0).getNode();
   3786         else if (Op1Unset)
   3787           // eqv(0, y) = ~y -> nor(y, y)
   3788           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3789                                            MVT::i1, MachineNode->getOperand(1),
   3790                                            MachineNode->getOperand(1));
   3791         else if (Op2Unset)
   3792           // eqv(x, 0) = ~x
   3793           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3794                                            MVT::i1, MachineNode->getOperand(0),
   3795                                            MachineNode->getOperand(0));
   3796         else if (Op1Not)
   3797           // eqv(~x, y) = xor(x, y)
   3798           ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
   3799                                            MVT::i1, MachineNode->getOperand(0).
   3800                                                       getOperand(0),
   3801                                            MachineNode->getOperand(1));
   3802         else if (Op2Not)
   3803           // eqv(x, ~y) = xor(x, y)
   3804           ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
   3805                                            MVT::i1, MachineNode->getOperand(0),
   3806                                            MachineNode->getOperand(1).
   3807                                              getOperand(0));
   3808         else if (AllUsersSelectZero(MachineNode)) {
   3809           ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
   3810                                            MVT::i1, MachineNode->getOperand(0),
   3811                                            MachineNode->getOperand(1));
   3812           SelectSwap = true;
   3813         }
   3814         break;
   3815       case PPC::CRANDC:
   3816         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
   3817           // andc(x, x) = 0
   3818           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
   3819                                            MVT::i1);
   3820         else if (Op1Set)
   3821           // andc(1, y) = ~y
   3822           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3823                                            MVT::i1, MachineNode->getOperand(1),
   3824                                            MachineNode->getOperand(1));
   3825         else if (Op1Unset || Op2Set)
   3826           // andc(0, y) = andc(x, 1) = 0
   3827           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
   3828                                            MVT::i1);
   3829         else if (Op2Unset)
   3830           // andc(x, 0) = x
   3831           ResNode = MachineNode->getOperand(0).getNode();
   3832         else if (Op1Not)
   3833           // andc(~x, y) = ~(x | y) = nor(x, y)
   3834           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3835                                            MVT::i1, MachineNode->getOperand(0).
   3836                                                       getOperand(0),
   3837                                            MachineNode->getOperand(1));
   3838         else if (Op2Not)
   3839           // andc(x, ~y) = x & y
   3840           ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
   3841                                            MVT::i1, MachineNode->getOperand(0),
   3842                                            MachineNode->getOperand(1).
   3843                                              getOperand(0));
   3844         else if (AllUsersSelectZero(MachineNode)) {
   3845           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
   3846                                            MVT::i1, MachineNode->getOperand(1),
   3847                                            MachineNode->getOperand(0));
   3848           SelectSwap = true;
   3849         }
   3850         break;
   3851       case PPC::CRORC:
   3852         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
   3853           // orc(x, x) = 1
   3854           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
   3855                                            MVT::i1);
   3856         else if (Op1Set || Op2Unset)
   3857           // orc(1, y) = orc(x, 0) = 1
   3858           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
   3859                                            MVT::i1);
   3860         else if (Op2Set)
   3861           // orc(x, 1) = x
   3862           ResNode = MachineNode->getOperand(0).getNode();
   3863         else if (Op1Unset)
   3864           // orc(0, y) = ~y
   3865           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
   3866                                            MVT::i1, MachineNode->getOperand(1),
   3867                                            MachineNode->getOperand(1));
   3868         else if (Op1Not)
   3869           // orc(~x, y) = ~(x & y) = nand(x, y)
   3870           ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
   3871                                            MVT::i1, MachineNode->getOperand(0).
   3872                                                       getOperand(0),
   3873                                            MachineNode->getOperand(1));
   3874         else if (Op2Not)
   3875           // orc(x, ~y) = x | y
   3876           ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
   3877                                            MVT::i1, MachineNode->getOperand(0),
   3878                                            MachineNode->getOperand(1).
   3879                                              getOperand(0));
   3880         else if (AllUsersSelectZero(MachineNode)) {
   3881           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
   3882                                            MVT::i1, MachineNode->getOperand(1),
   3883                                            MachineNode->getOperand(0));
   3884           SelectSwap = true;
   3885         }
   3886         break;
   3887       case PPC::SELECT_I4:
   3888       case PPC::SELECT_I8:
   3889       case PPC::SELECT_F4:
   3890       case PPC::SELECT_F8:
   3891       case PPC::SELECT_QFRC:
   3892       case PPC::SELECT_QSRC:
   3893       case PPC::SELECT_QBRC:
   3894       case PPC::SELECT_VRRC:
   3895       case PPC::SELECT_VSFRC:
   3896       case PPC::SELECT_VSSRC:
   3897       case PPC::SELECT_VSRC:
   3898         if (Op1Set)
   3899           ResNode = MachineNode->getOperand(1).getNode();
   3900         else if (Op1Unset)
   3901           ResNode = MachineNode->getOperand(2).getNode();
   3902         else if (Op1Not)
   3903           ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
   3904                                            SDLoc(MachineNode),
   3905                                            MachineNode->getValueType(0),
   3906                                            MachineNode->getOperand(0).
   3907                                              getOperand(0),
   3908                                            MachineNode->getOperand(2),
   3909                                            MachineNode->getOperand(1));
   3910         break;
   3911       case PPC::BC:
   3912       case PPC::BCn:
   3913         if (Op1Not)
   3914           ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
   3915                                                                PPC::BC,
   3916                                            SDLoc(MachineNode),
   3917                                            MVT::Other,
   3918                                            MachineNode->getOperand(0).
   3919                                              getOperand(0),
   3920                                            MachineNode->getOperand(1),
   3921                                            MachineNode->getOperand(2));
   3922         // FIXME: Handle Op1Set, Op1Unset here too.
   3923         break;
   3924       }
   3925 
   3926       // If we're inverting this node because it is used only by selects that
   3927       // we'd like to swap, then swap the selects before the node replacement.
   3928       if (SelectSwap)
   3929         SwapAllSelectUsers(MachineNode);
   3930 
   3931       if (ResNode != MachineNode) {
   3932         DEBUG(dbgs() << "CR Peephole replacing:\nOld:    ");
   3933         DEBUG(MachineNode->dump(CurDAG));
   3934         DEBUG(dbgs() << "\nNew: ");
   3935         DEBUG(ResNode->dump(CurDAG));
   3936         DEBUG(dbgs() << "\n");
   3937 
   3938         ReplaceUses(MachineNode, ResNode);
   3939         IsModified = true;
   3940       }
   3941     }
   3942     if (IsModified)
   3943       CurDAG->RemoveDeadNodes();
   3944   } while (IsModified);
   3945 }
   3946 
   3947 // Gather the set of 32-bit operations that are known to have their
   3948 // higher-order 32 bits zero, where ToPromote contains all such operations.
   3949 static bool PeepholePPC64ZExtGather(SDValue Op32,
   3950                                     SmallPtrSetImpl<SDNode *> &ToPromote) {
   3951   if (!Op32.isMachineOpcode())
   3952     return false;
   3953 
   3954   // First, check for the "frontier" instructions (those that will clear the
   3955   // higher-order 32 bits.
   3956 
   3957   // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
   3958   // around. If it does not, then these instructions will clear the
   3959   // higher-order bits.
   3960   if ((Op32.getMachineOpcode() == PPC::RLWINM ||
   3961        Op32.getMachineOpcode() == PPC::RLWNM) &&
   3962       Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
   3963     ToPromote.insert(Op32.getNode());
   3964     return true;
   3965   }
   3966 
   3967   // SLW and SRW always clear the higher-order bits.
   3968   if (Op32.getMachineOpcode() == PPC::SLW ||
   3969       Op32.getMachineOpcode() == PPC::SRW) {
   3970     ToPromote.insert(Op32.getNode());
   3971     return true;
   3972   }
   3973 
   3974   // For LI and LIS, we need the immediate to be positive (so that it is not
   3975   // sign extended).
   3976   if (Op32.getMachineOpcode() == PPC::LI ||
   3977       Op32.getMachineOpcode() == PPC::LIS) {
   3978     if (!isUInt<15>(Op32.getConstantOperandVal(0)))
   3979       return false;
   3980 
   3981     ToPromote.insert(Op32.getNode());
   3982     return true;
   3983   }
   3984 
   3985   // LHBRX and LWBRX always clear the higher-order bits.
   3986   if (Op32.getMachineOpcode() == PPC::LHBRX ||
   3987       Op32.getMachineOpcode() == PPC::LWBRX) {
   3988     ToPromote.insert(Op32.getNode());
   3989     return true;
   3990   }
   3991 
   3992   // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended.
   3993   if (Op32.getMachineOpcode() == PPC::CNTLZW) {
   3994     ToPromote.insert(Op32.getNode());
   3995     return true;
   3996   }
   3997 
   3998   // Next, check for those instructions we can look through.
   3999 
   4000   // Assuming the mask does not wrap around, then the higher-order bits are
   4001   // taken directly from the first operand.
   4002   if (Op32.getMachineOpcode() == PPC::RLWIMI &&
   4003       Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
   4004     SmallPtrSet<SDNode *, 16> ToPromote1;
   4005     if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
   4006       return false;
   4007 
   4008     ToPromote.insert(Op32.getNode());
   4009     ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
   4010     return true;
   4011   }
   4012 
   4013   // For OR, the higher-order bits are zero if that is true for both operands.
   4014   // For SELECT_I4, the same is true (but the relevant operand numbers are
   4015   // shifted by 1).
   4016   if (Op32.getMachineOpcode() == PPC::OR ||
   4017       Op32.getMachineOpcode() == PPC::SELECT_I4) {
   4018     unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
   4019     SmallPtrSet<SDNode *, 16> ToPromote1;
   4020     if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
   4021       return false;
   4022     if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
   4023       return false;
   4024 
   4025     ToPromote.insert(Op32.getNode());
   4026     ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
   4027     return true;
   4028   }
   4029 
   4030   // For ORI and ORIS, we need the higher-order bits of the first operand to be
   4031   // zero, and also for the constant to be positive (so that it is not sign
   4032   // extended).
   4033   if (Op32.getMachineOpcode() == PPC::ORI ||
   4034       Op32.getMachineOpcode() == PPC::ORIS) {
   4035     SmallPtrSet<SDNode *, 16> ToPromote1;
   4036     if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
   4037       return false;
   4038     if (!isUInt<15>(Op32.getConstantOperandVal(1)))
   4039       return false;
   4040 
   4041     ToPromote.insert(Op32.getNode());
   4042     ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
   4043     return true;
   4044   }
   4045 
   4046   // The higher-order bits of AND are zero if that is true for at least one of
   4047   // the operands.
   4048   if (Op32.getMachineOpcode() == PPC::AND) {
   4049     SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
   4050     bool Op0OK =
   4051       PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
   4052     bool Op1OK =
   4053       PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
   4054     if (!Op0OK && !Op1OK)
   4055       return false;
   4056 
   4057     ToPromote.insert(Op32.getNode());
   4058 
   4059     if (Op0OK)
   4060       ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
   4061 
   4062     if (Op1OK)
   4063       ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
   4064 
   4065     return true;
   4066   }
   4067 
   4068   // For ANDI and ANDIS, the higher-order bits are zero if either that is true
   4069   // of the first operand, or if the second operand is positive (so that it is
   4070   // not sign extended).
   4071   if (Op32.getMachineOpcode() == PPC::ANDIo ||
   4072       Op32.getMachineOpcode() == PPC::ANDISo) {
   4073     SmallPtrSet<SDNode *, 16> ToPromote1;
   4074     bool Op0OK =
   4075       PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
   4076     bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
   4077     if (!Op0OK && !Op1OK)
   4078       return false;
   4079 
   4080     ToPromote.insert(Op32.getNode());
   4081 
   4082     if (Op0OK)
   4083       ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
   4084 
   4085     return true;
   4086   }
   4087 
   4088   return false;
   4089 }
   4090 
   4091 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
   4092   if (!PPCSubTarget->isPPC64())
   4093     return;
   4094 
   4095   // When we zero-extend from i32 to i64, we use a pattern like this:
   4096   // def : Pat<(i64 (zext i32:$in)),
   4097   //           (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
   4098   //                   0, 32)>;
   4099   // There are several 32-bit shift/rotate instructions, however, that will
   4100   // clear the higher-order bits of their output, rendering the RLDICL
   4101   // unnecessary. When that happens, we remove it here, and redefine the
   4102   // relevant 32-bit operation to be a 64-bit operation.
   4103 
   4104   SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
   4105   ++Position;
   4106 
   4107   bool MadeChange = false;
   4108   while (Position != CurDAG->allnodes_begin()) {
   4109     SDNode *N = &*--Position;
   4110     // Skip dead nodes and any non-machine opcodes.
   4111     if (N->use_empty() || !N->isMachineOpcode())
   4112       continue;
   4113 
   4114     if (N->getMachineOpcode() != PPC::RLDICL)
   4115       continue;
   4116 
   4117     if (N->getConstantOperandVal(1) != 0 ||
   4118         N->getConstantOperandVal(2) != 32)
   4119       continue;
   4120 
   4121     SDValue ISR = N->getOperand(0);
   4122     if (!ISR.isMachineOpcode() ||
   4123         ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
   4124       continue;
   4125 
   4126     if (!ISR.hasOneUse())
   4127       continue;
   4128 
   4129     if (ISR.getConstantOperandVal(2) != PPC::sub_32)
   4130       continue;
   4131 
   4132     SDValue IDef = ISR.getOperand(0);
   4133     if (!IDef.isMachineOpcode() ||
   4134         IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
   4135       continue;
   4136 
   4137     // We now know that we're looking at a canonical i32 -> i64 zext. See if we
   4138     // can get rid of it.
   4139 
   4140     SDValue Op32 = ISR->getOperand(1);
   4141     if (!Op32.isMachineOpcode())
   4142       continue;
   4143 
   4144     // There are some 32-bit instructions that always clear the high-order 32
   4145     // bits, there are also some instructions (like AND) that we can look
   4146     // through.
   4147     SmallPtrSet<SDNode *, 16> ToPromote;
   4148     if (!PeepholePPC64ZExtGather(Op32, ToPromote))
   4149       continue;
   4150 
   4151     // If the ToPromote set contains nodes that have uses outside of the set
   4152     // (except for the original INSERT_SUBREG), then abort the transformation.
   4153     bool OutsideUse = false;
   4154     for (SDNode *PN : ToPromote) {
   4155       for (SDNode *UN : PN->uses()) {
   4156         if (!ToPromote.count(UN) && UN != ISR.getNode()) {
   4157           OutsideUse = true;
   4158           break;
   4159         }
   4160       }
   4161 
   4162       if (OutsideUse)
   4163         break;
   4164     }
   4165     if (OutsideUse)
   4166       continue;
   4167 
   4168     MadeChange = true;
   4169 
   4170     // We now know that this zero extension can be removed by promoting to
   4171     // nodes in ToPromote to 64-bit operations, where for operations in the
   4172     // frontier of the set, we need to insert INSERT_SUBREGs for their
   4173     // operands.
   4174     for (SDNode *PN : ToPromote) {
   4175       unsigned NewOpcode;
   4176       switch (PN->getMachineOpcode()) {
   4177       default:
   4178         llvm_unreachable("Don't know the 64-bit variant of this instruction");
   4179       case PPC::RLWINM:    NewOpcode = PPC::RLWINM8; break;
   4180       case PPC::RLWNM:     NewOpcode = PPC::RLWNM8; break;
   4181       case PPC::SLW:       NewOpcode = PPC::SLW8; break;
   4182       case PPC::SRW:       NewOpcode = PPC::SRW8; break;
   4183       case PPC::LI:        NewOpcode = PPC::LI8; break;
   4184       case PPC::LIS:       NewOpcode = PPC::LIS8; break;
   4185       case PPC::LHBRX:     NewOpcode = PPC::LHBRX8; break;
   4186       case PPC::LWBRX:     NewOpcode = PPC::LWBRX8; break;
   4187       case PPC::CNTLZW:    NewOpcode = PPC::CNTLZW8; break;
   4188       case PPC::RLWIMI:    NewOpcode = PPC::RLWIMI8; break;
   4189       case PPC::OR:        NewOpcode = PPC::OR8; break;
   4190       case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
   4191       case PPC::ORI:       NewOpcode = PPC::ORI8; break;
   4192       case PPC::ORIS:      NewOpcode = PPC::ORIS8; break;
   4193       case PPC::AND:       NewOpcode = PPC::AND8; break;
   4194       case PPC::ANDIo:     NewOpcode = PPC::ANDIo8; break;
   4195       case PPC::ANDISo:    NewOpcode = PPC::ANDISo8; break;
   4196       }
   4197 
   4198       // Note: During the replacement process, the nodes will be in an
   4199       // inconsistent state (some instructions will have operands with values
   4200       // of the wrong type). Once done, however, everything should be right
   4201       // again.
   4202 
   4203       SmallVector<SDValue, 4> Ops;
   4204       for (const SDValue &V : PN->ops()) {
   4205         if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
   4206             !isa<ConstantSDNode>(V)) {
   4207           SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
   4208           SDNode *ReplOp =
   4209             CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
   4210                                    ISR.getNode()->getVTList(), ReplOpOps);
   4211           Ops.push_back(SDValue(ReplOp, 0));
   4212         } else {
   4213           Ops.push_back(V);
   4214         }
   4215       }
   4216 
   4217       // Because all to-be-promoted nodes only have users that are other
   4218       // promoted nodes (or the original INSERT_SUBREG), we can safely replace
   4219       // the i32 result value type with i64.
   4220 
   4221       SmallVector<EVT, 2> NewVTs;
   4222       SDVTList VTs = PN->getVTList();
   4223       for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
   4224         if (VTs.VTs[i] == MVT::i32)
   4225           NewVTs.push_back(MVT::i64);
   4226         else
   4227           NewVTs.push_back(VTs.VTs[i]);
   4228 
   4229       DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld:    ");
   4230       DEBUG(PN->dump(CurDAG));
   4231 
   4232       CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
   4233 
   4234       DEBUG(dbgs() << "\nNew: ");
   4235       DEBUG(PN->dump(CurDAG));
   4236       DEBUG(dbgs() << "\n");
   4237     }
   4238 
   4239     // Now we replace the original zero extend and its associated INSERT_SUBREG
   4240     // with the value feeding the INSERT_SUBREG (which has now been promoted to
   4241     // return an i64).
   4242 
   4243     DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld:    ");
   4244     DEBUG(N->dump(CurDAG));
   4245     DEBUG(dbgs() << "\nNew: ");
   4246     DEBUG(Op32.getNode()->dump(CurDAG));
   4247     DEBUG(dbgs() << "\n");
   4248 
   4249     ReplaceUses(N, Op32.getNode());
   4250   }
   4251 
   4252   if (MadeChange)
   4253     CurDAG->RemoveDeadNodes();
   4254 }
   4255 
   4256 void PPCDAGToDAGISel::PeepholePPC64() {
   4257   // These optimizations are currently supported only for 64-bit SVR4.
   4258   if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
   4259     return;
   4260 
   4261   SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
   4262   ++Position;
   4263 
   4264   while (Position != CurDAG->allnodes_begin()) {
   4265     SDNode *N = &*--Position;
   4266     // Skip dead nodes and any non-machine opcodes.
   4267     if (N->use_empty() || !N->isMachineOpcode())
   4268       continue;
   4269 
   4270     unsigned FirstOp;
   4271     unsigned StorageOpcode = N->getMachineOpcode();
   4272 
   4273     switch (StorageOpcode) {
   4274     default: continue;
   4275 
   4276     case PPC::LBZ:
   4277     case PPC::LBZ8:
   4278     case PPC::LD:
   4279     case PPC::LFD:
   4280     case PPC::LFS:
   4281     case PPC::LHA:
   4282     case PPC::LHA8:
   4283     case PPC::LHZ:
   4284     case PPC::LHZ8:
   4285     case PPC::LWA:
   4286     case PPC::LWZ:
   4287     case PPC::LWZ8:
   4288       FirstOp = 0;
   4289       break;
   4290 
   4291     case PPC::STB:
   4292     case PPC::STB8:
   4293     case PPC::STD:
   4294     case PPC::STFD:
   4295     case PPC::STFS:
   4296     case PPC::STH:
   4297     case PPC::STH8:
   4298     case PPC::STW:
   4299     case PPC::STW8:
   4300       FirstOp = 1;
   4301       break;
   4302     }
   4303 
   4304     // If this is a load or store with a zero offset, or within the alignment,
   4305     // we may be able to fold an add-immediate into the memory operation.
   4306     // The check against alignment is below, as it can't occur until we check
   4307     // the arguments to N
   4308     if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
   4309       continue;
   4310 
   4311     SDValue Base = N->getOperand(FirstOp + 1);
   4312     if (!Base.isMachineOpcode())
   4313       continue;
   4314 
   4315     // On targets with fusion, we don't want this to fire and remove a fusion
   4316     // opportunity, unless a) it results in another fusion opportunity or
   4317     // b) optimizing for size.
   4318     if (PPCSubTarget->hasFusion() &&
   4319         (!MF->getFunction()->optForSize() && !Base.hasOneUse()))
   4320       continue;
   4321 
   4322     unsigned Flags = 0;
   4323     bool ReplaceFlags = true;
   4324 
   4325     // When the feeding operation is an add-immediate of some sort,
   4326     // determine whether we need to add relocation information to the
   4327     // target flags on the immediate operand when we fold it into the
   4328     // load instruction.
   4329     //
   4330     // For something like ADDItocL, the relocation information is
   4331     // inferred from the opcode; when we process it in the AsmPrinter,
   4332     // we add the necessary relocation there.  A load, though, can receive
   4333     // relocation from various flavors of ADDIxxx, so we need to carry
   4334     // the relocation information in the target flags.
   4335     switch (Base.getMachineOpcode()) {
   4336     default: continue;
   4337 
   4338     case PPC::ADDI8:
   4339     case PPC::ADDI:
   4340       // In some cases (such as TLS) the relocation information
   4341       // is already in place on the operand, so copying the operand
   4342       // is sufficient.
   4343       ReplaceFlags = false;
   4344       // For these cases, the immediate may not be divisible by 4, in
   4345       // which case the fold is illegal for DS-form instructions.  (The
   4346       // other cases provide aligned addresses and are always safe.)
   4347       if ((StorageOpcode == PPC::LWA ||
   4348            StorageOpcode == PPC::LD  ||
   4349            StorageOpcode == PPC::STD) &&
   4350           (!isa<ConstantSDNode>(Base.getOperand(1)) ||
   4351            Base.getConstantOperandVal(1) % 4 != 0))
   4352         continue;
   4353       break;
   4354     case PPC::ADDIdtprelL:
   4355       Flags = PPCII::MO_DTPREL_LO;
   4356       break;
   4357     case PPC::ADDItlsldL:
   4358       Flags = PPCII::MO_TLSLD_LO;
   4359       break;
   4360     case PPC::ADDItocL:
   4361       Flags = PPCII::MO_TOC_LO;
   4362       break;
   4363     }
   4364 
   4365     SDValue ImmOpnd = Base.getOperand(1);
   4366     int MaxDisplacement = 0;
   4367     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
   4368       const GlobalValue *GV = GA->getGlobal();
   4369       MaxDisplacement = GV->getAlignment() - 1;
   4370     }
   4371 
   4372     int Offset = N->getConstantOperandVal(FirstOp);
   4373     if (Offset < 0 || Offset > MaxDisplacement)
   4374       continue;
   4375 
   4376     // We found an opportunity.  Reverse the operands from the add
   4377     // immediate and substitute them into the load or store.  If
   4378     // needed, update the target flags for the immediate operand to
   4379     // reflect the necessary relocation information.
   4380     DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
   4381     DEBUG(Base->dump(CurDAG));
   4382     DEBUG(dbgs() << "\nN: ");
   4383     DEBUG(N->dump(CurDAG));
   4384     DEBUG(dbgs() << "\n");
   4385 
   4386     // If the relocation information isn't already present on the
   4387     // immediate operand, add it now.
   4388     if (ReplaceFlags) {
   4389       if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
   4390         SDLoc dl(GA);
   4391         const GlobalValue *GV = GA->getGlobal();
   4392         // We can't perform this optimization for data whose alignment
   4393         // is insufficient for the instruction encoding.
   4394         if (GV->getAlignment() < 4 &&
   4395             (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
   4396              StorageOpcode == PPC::LWA || (Offset % 4) != 0)) {
   4397           DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
   4398           continue;
   4399         }
   4400         ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
   4401       } else if (ConstantPoolSDNode *CP =
   4402                  dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
   4403         const Constant *C = CP->getConstVal();
   4404         ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
   4405                                                 CP->getAlignment(),
   4406                                                 Offset, Flags);
   4407       }
   4408     }
   4409 
   4410     if (FirstOp == 1) // Store
   4411       (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
   4412                                        Base.getOperand(0), N->getOperand(3));
   4413     else // Load
   4414       (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
   4415                                        N->getOperand(2));
   4416 
   4417     // The add-immediate may now be dead, in which case remove it.
   4418     if (Base.getNode()->use_empty())
   4419       CurDAG->RemoveDeadNode(Base.getNode());
   4420   }
   4421 }
   4422 
   4423 
   4424 /// createPPCISelDag - This pass converts a legalized DAG into a
   4425 /// PowerPC-specific DAG, ready for instruction scheduling.
   4426 ///
   4427 FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
   4428   return new PPCDAGToDAGISel(TM);
   4429 }
   4430