Cross Reference: /external/swiftshader/third_party/llvm-7.0/llvm/lib/Target/X86/X86ISelLowering.cpp

Lines Matching refs:PSHUFB
4361   case X86ISD::PSHUFB:
4400   case X86ISD::PSHUFB:
5906   case X86ISD::PSHUFB: {
8011 /// Look for opportunities to create a VPERMV/VPERMILPV/PSHUFB variable permute
8079       Opcode = X86ISD::PSHUFB;
8085       Opcode = X86ISD::PSHUFB;
8095       Opcode = X86ISD::PSHUFB;
8141                                DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[1], Idx),
8142                                DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[0], Idx),
8268 // TODO: Utilize pshufb and zero mask blending to support more efficient
9255 /// Try to lower a shuffle with a single PSHUFB of V1 or V2.
9294     // PSHUFB can't cross lanes, ensure this doesn't happen.
9306       VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V),
10511   // pshufb when available. We can only use more than 2 unpack instructions
10512   // when zero extending i8 elements which also makes it easier to use pshufb.
10523         VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
12320 /// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
12353     V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
12357     V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
12491   // If we can't directly blend but can use PSHUFB, that will be better as it
12735   // with PSHUFB. It is important to do this before we attempt to generate any
12737   // lowerings can find an instruction sequence that is faster than a PSHUFB, we
12739   // a PSHUFB in the end. But once we start blending from multiple inputs,
12740   // the complexity of DAG combining bad patterns back into PSHUFB is too high,
12742   // PSHUFB approach because of its ability to zero lanes.
12751     SDValue PSHUFB = lowerVectorShuffleAsBlendOfPSHUFBs(
12756     // important as a single pshufb is significantly faster for that.
12769       // shuffles will both be pshufb, in which case we shouldn't bother with
12780     return PSHUFB;
14163   if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
14165     return PSHUFB;
14243   if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
14245     return PSHUFB;
14730   if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
14732     return PSHUFB;
14770   if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
14772     return PSHUFB;
15344     // than using VMOVD + VPERMV/PSHUFB sequence ( 2/3 cycles throughput)
17279     // On AVX2, v8i32 -> v8i16 becomes PSHUFB.
17283       // The PSHUFB mask:
17307     // The PSHUFB mask:
22232 // Lower CTLZ using a PSHUFB lookup table implementation.
22241   // Per-nibble leading zero PSHUFB lookup table.
22253   // into lo/hi nibbles and use the PSHUFB LUT to perform CLTZ on each of them.
22273   Lo = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Lo);
22274   Hi = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Hi);
22334   assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
24510   // masked out higher ones) for each byte. PSHUFB is used separately with both
24542       DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, HighNibbles);
24544       DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, LowNibbles);
24730   // Perform BITREVERSE using PSHUFB lookups. Each byte is split into
24731   // two nibbles and a PSHUFB lookup to find the bitreverse of each
24756   Lo = DAG.getNode(X86ISD::PSHUFB, DL, VT, LoMask, Lo);
24757   Hi = DAG.getNode(X86ISD::PSHUFB, DL, VT, HiMask, Hi);
25943   case X86ISD::PSHUFB:             return "X86ISD::PSHUFB";
29621 /// for this operation, or into a PSHUFB instruction which is a fully general
29988   // mask, we can replace them with a single PSHUFB instruction profitably.
29989   // Intel's manuals suggest only using PSHUFB if doing so replacing 5
29990   // instructions, but in practice PSHUFB tends to be *very* fast so we're
30016     Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp);
30021   // to VPPERM. We match the depth requirement of PSHUFB - VPPERM is never
30022   // slower than PSHUFB on targets that support both.
30154 ///    PSHUFB instruction if available. We do this as the last combining step
30155 ///    to ensure we avoid using PSHUFB if we can implement the shuffle with
30156 ///    a suitable short sequence of other instructions. The PSHUFB will either
30166 /// would simplify under the threshold for PSHUFB formation because of
36650   // SSSE3's pshufb results in less instructions in the cases below.
39130   // is only worth it with SSSE3 (PSHUFB).
39707   case X86ISD::PSHUFB:
OpenGrok