Cross Reference: /external/llvm/lib/Target/X86/X86ISelLowering.cpp

Lines Matching refs:Shuffle
3931   default: llvm_unreachable("Unknown x86 shuffle node");
3945   default: llvm_unreachable("Unknown x86 shuffle node");
4636   // Subvector should be inserted in the middle - use shuffle
4717 /// This produces a shuffle where the low element of V2 is swizzled into the
4719 /// This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
4735 /// Calculates the shuffle mask corresponding to the target-specific opcode.
5008   default: llvm_unreachable("unknown target shuffle node");
5011   // If we have a fake unary shuffle, the shuffle mask is spread across two
5023 /// element of the result of the vector shuffle.
5245     // Let the shuffle legalizer deal with blend operations.
5312   // the shuffle mask.
5497 /// 2. A splat shuffle which uses a scalar_to_vector node which comes from
5676   // is 2, as specified by the shuffle.
5739         // Quit if more than 2 vectors to shuffle
6348     // a constant pool load than it is to do a movd + shuffle.
6357         // convert it to a vector with movd (S2V+shuffle to zero extend).
6366     // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
6428     // movd/movss) to move this into the low element, then shuffle it into
6436   // Splat is obviously ok. Let legalizer expand it to a shuffle.
6439       // Instead of a shuffle like this:
6440       // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
6442       // shuffle (vload ptr)), undef, <1, 1, 1, 1>
6557     // Check for a build vector from mostly shuffle plus few inserting.
6595         // then it is safe to just drop this shuffle: V[i] is already in the
6734 // Vector shuffle lowering
6740 // a framework that allows reasonably efficient handling of all vector shuffle
6747 /// array input, which is assumed to be a single-input shuffle mask of the kind
6748 /// used by the X86 shuffle instructions (not a fully general
6750 /// in-place shuffle are 'no-op's.
6760 /// This isn't a generic single-input test because in the vector shuffle
6774 /// shuffle mask.
6776 /// X86 divides up its shuffles into in-lane and cross-lane shuffle operations
6787 /// \brief Test whether a shuffle mask is equivalent within each 128-bit lane.
6789 /// This checks a shuffle mask to see if it is performing the same
6790 /// 128-bit lane-relative shuffle in each 128-bit lane. This trivially implies
6794 /// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
6808       // This entry crosses lanes, so there is no way to model this shuffle.
6823 /// \brief Checks whether a shuffle mask is equivalent to an explicit list of
6826 /// This is a fast way to test a shuffle mask against a fixed pattern:
6858 /// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
6860 /// This helper function produces an 8-bit shuffle immediate corresponding to
6861 /// the ubiquitous shuffle encoding scheme used in x86 instructions for
6868   assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
6882 /// \brief Compute whether each element of a shuffle is zeroable.
6884 /// A "zeroable" vector shuffle element is one which can be lowered to zero.
6885 /// Either it is an undef element in the shuffle mask, the element of the input
6889 /// shuffle.
6961 /// \brief Try to emit a bitmask instruction for a shuffle.
7003 /// \brief Try to emit a blend instruction for a shuffle using bit math.
7034 /// \brief Try to emit a blend instruction for a shuffle.
7039 /// that the shuffle mask is a blend, or convertible into a blend with zero.
7109     // If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
7202 /// then reduce the shuffle to a single-input permutation.
7208   // to reduce the shuffle.
7216     assert(Mask[i] < Size * 2 && "Shuffle input is out of bounds.");
7230 /// \brief Generic routine to decompose a shuffle and blend into indepndent
7234 /// shuffle+blend operations on newer X86 ISAs where we have very fast blend
7242   // Shuffle the input elements into the desired positions in V1 and V2 and
7257   // input shuffles would be a no-op. We prefer to shuffle inputs as the
7258   // shuffle may be able to fold with a load or other benefit. However, when
7271 /// \brief Try to lower a vector shuffle as a byte rotation.
7276 /// try to generically lower a vector shuffle through such an pattern. It
7279 /// This matches shuffle vectors that look like:
7382          "Can shuffle at most 16 bytes in a 128-bit vector!");
7400 /// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
7402 /// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
7490 /// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
7495   assert(!Zeroable.all() && "Fully zeroable shuffle mask");
7514     assert(Len > 0 && "Zeroable shuffle mask");
7621 /// \brief Lower a vector shuffle as a zero or any extension.
7667     // PUNPCK will catch this in a later shuffle match.
7678   // For any extends we can cheat for larger element sizes and use shuffle
7705     assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
7776 /// \brief Try to lower a vector shuffle as a zero extension on any microarch.
7779 /// a shuffle which happens to match the pattern of a zero extend. It doesn't
7799   assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size");
7849     // If we fail to find an input, we have a zero-shuffle which should always
7882   // Returns one of the source operands if the shuffle can be reduced to a
7968   // vector shuffle lowering is dead.
8025     // If we have 4 or fewer lanes we can cheaply shuffle the element into
8161   // type than the shuffle, the broadcast element is in essence truncated.
8178     // If we are broadcasting a load that is only used by the shuffle
8200 // Check for whether we can use INSERTPS to perform the shuffle. We only use
8209   assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
8212   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
8278 /// \brief Try to lower a shuffle as a permute of the inputs followed by an
8330     // If we will have to shuffle both inputs to use the unpack, check whether
8331     // we can just unpack first and shuffle the result. If so, skip this unpack.
8336     // Shuffle the inputs into place.
8405   assert(Op.getSimpleValueType() == MVT::v2f64 && "Bad shuffle type!");
8410   assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
8418     // Straight shuffle of a single input vector. Simulate this by using the
8424       // into the shuffle.
8478 /// Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by
8480 /// it falls back to the floating point shuffle operation with appropriate bit
8486   assert(Op.getSimpleValueType() == MVT::v2i64 && "Bad shuffle type!");
8491   assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
8499     // Straight shuffle of a single input vector. For everything from SSE2
8501     // We have to map the mask as it is actually a v4i32 shuffle instruction.
8606 /// \brief Lower a vector shuffle using the SHUFPS instruction.
8663       // when we detect a SHUFPS pattern but can't easily commute the shuffle to
8672       // shuffle to place them.
8683       // Now we do a normal shuffle of V1 by giving V1 as both operands to
8705   assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
8710   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
8731       // into the shuffle.
8736     // Otherwise, use a straight shuffle of a single input vector. We pass the
8757     // Use INSERTPS if we can complete the shuffle efficiently.
8784   assert(Op.getSimpleValueType() == MVT::v4i32 && "Bad shuffle type!");
8789   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
8793   // shuffle in many cases.
8807     // Straight shuffle of a single input vector. For everything from SSE2
8809     // We coerce the shuffle pattern to be compatible with UNPCK instructions
8881 /// shuffle lowering, and the most complex part.
8884 /// targeted at the same half of the final vector, and then use a dword shuffle
8893 /// lane must shuffle the *exact* same way. In fact, you must pass a v8 Mask to
8894 /// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16
8902   assert(Mask.size() == 8 && "Shuffle mask length doen't match!");
8937   // pre-shuffle the 2-into-2 half to avoid turning it into a 3-into-1 or
8988     // Check for one tricky case: We're fixing a 3<-1 or a 1<-3 shuffle for AToA
8996       // to balance this to ensure we don't form a 3-1 shuffle in the other
9080   // those dwords can then be moved to the correct half with a dword shuffle.
9081   // We use at most one low and one high word shuffle to collect these paired
9082   // inputs into dwords, and finally a dword shuffle to place them.
9306   // just shuffle them into their final position.
9314   // Do a half shuffle for the low mask.
9319   // Do a half shuffle with the high mask after shifting its values down.
9330 /// \brief Helper to form a PSHUFB-based shuffle+blend.
9384 /// This handles both single-input shuffles and combined shuffle/blends with
9389 /// enough inputs, it delegates to a basic UNPCK-based strategy. If the shuffle
9398   assert(Op.getSimpleValueType() == MVT::v8i16 && "Bad shuffle type!");
9407   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
9497   // can both shuffle and set up the inefficient blend.
9514 /// two. Example shuffle masks:
9535   // The modulus for the shuffle vector entries is based on whether this is
9559         // The shuffle mask must be equal to (i * 2^N) % M.
9589   assert(Op.getSimpleValueType() == MVT::v16i8 && "Bad shuffle type!");
9594   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
9626     // Check whether we can widen this to an i16 shuffle by duplicating bytes.
9628     // However, it only makes sense if the pre-duplication shuffle simplifies
9630     // express the pre-duplication shuffle as an i16 shuffle.
9633     // i16 shuffle as well.
9669         // Check if j is already a shuffle of this input. This happens when
9678             // We can't place the inputs into a single half with a simple i16 shuffle, so bail.
9681           // Map this input with the i16 shuffle.
9701           assert(MappedMask < 8 && "Invalid v8 shuffle mask!");
9706                    "Conflicting entrties in the original shuffle!");
9849     // This will be a single vector shuffle instead of a blend so nuke VHiHalf.
9876 /// This routine breaks down the specific type of 128-bit shuffle and
9900 /// \brief Helper function to test whether a shuffle mask could be
9906 /// NOTE: This must handle normal vector shuffle masks and *target* vector
9907 /// shuffle masks. The latter have the special property of a '-2' representing
9946     // Otherwise we can't safely widen the elements used in this shuffle.
9955 /// \brief Generic routine to split vector shuffle into half-sized shuffles.
9959 /// AVX vector shuffle types.
10045     // a minimal number of high-level vector shuffle nodes.
10088 /// between splitting the shuffle into 128-bit components and stitching those
10141 /// \brief Lower a vector shuffle crossing multiple 128-bit lanes as
10146 /// instructions in the worst case for a single-input cross lane shuffle which
10147 /// is lower than any other fully general cross-lane shuffle strategy I'm aware
10148 /// of. Special cases for each particular shuffle pattern should be handled
10230   // convert the 64-bit shuffle mask selection values into 128-bit
10253   // Shuffle mask values <  4 are selecting elements of V1.
10254   // Shuffle mask values >= 4 are selecting elements of V2.
10276 /// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
10280 /// in a single-input non-lane-crossing shuffle with a repeating shuffle mask in
10299   // See if we can build a hypothetical 128-bit lane-fixing shuffle mask. Also
10319     // Check that within each lane we have a consistent shuffle mask.
10329   // First shuffle the lanes into place.
10347   // Now do a simple shuffle that isn't lane crossing.
10417   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
10468   // shuffle. However, if we have AVX2 and either inputs are already in place,
10469   // we will be able to shuffle even across lanes the other input in a single
10499   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
10516   // When the shuffle is mirrored between the 128-bit lanes of the unit, we can
10552   // shuffle. However, if we have AVX2 and either inputs are already in place,
10553   // we will be able to shuffle even across lanes the other input in a single
10578   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
10589   // If the shuffle mask is repeated in each 128-bit lane, we have many more
10590   // options to efficiently lower the shuffle.
10620   // If we have a single input shuffle with different shuffle patterns in the
10643   // shuffle.
10670   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
10675   // shuffle in many cases.
10689   // If the shuffle mask is repeated in each 128-bit lane we can use more
10714   // If the shuffle patterns aren't repeated but it is a single input, directly
10727   // shuffle.
10749   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
10754   // shuffle in many cases.
10784     // There are no generalized cross-lane shuffle operations available on i16
10792       // As this is a single-input shuffle, the repeated mask should be
10819   // shuffle.
10840   assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
10845   // shuffle in many cases.
10875     // There are no generalized cross-lane shuffle operations available on i8
10895   // shuffle.
10907 /// shuffle or splits it into two 128-bit shuffles and fuses the results back
10966 /// \brief Try to lower a vector shuffle as a 128-bit shuffles.
10972          "Unexpected element type size for 128bit shuffle.");
10976   assert(VT.is512BitVector() && "Unexpected vector size for 128bit shuffle.");
10983   // Convert the 64-bit shuffle mask selection values into 128-bit selection
11026   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
11048   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
11066   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
11088   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
11106   assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
11121   assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
11131 /// shuffle or splits it into two 256-bit shuffles and fuses the results back
11179 // The only way to shuffle bits is to sign-extend the mask vector to SIMD
11180 // vector, shuffle and then truncate it back.
11257   // When we create a shuffle node we put the UNDEF node to second operand,
11264   // undef as well. This makes it easier to match the shuffle based solely on
11313   // Commute the shuffle as needed such that more elements come from V1 than
11314   // V2. This allows us to match the shuffle pattern strictly on how many
11417 /// \brief Try to lower a VSELECT instruction to a vector shuffle.
11432   // shuffles and re-use the shuffle lowering path for blends.
11451   // Try to lower this to a blend-style vector shuffle. This can handle all
12676     SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32,
12679                          DAG.getBitcast(MVT::v2f64, Shuffle), Sub);
13407   // Prepare truncation shuffle mask
15217   // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
15245 // Lower vector extended loads using a shuffle. If SSSE3 is not available we
15246 // may emit an illegal shuffle but the expansion is still better than scalar
15248 // we'll emit a shuffle and a arithmetic shift.
15251 // the shuffle phase or after the shuffle.
15365   // We can't shuffle using an illegal type.
16072     // Let the shuffle legalizer expand this shift amount node.
18066   // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
18084     // Merge the two vectors back together with a shuffle. This expands into 2
18224   // Shuffle it back into the right order.
18428                "Splat shuffle referencing second operand");
18508                  "Unexpected shuffle index found!");
18519           // Avoid introducing an extract element from a shuffle.
18593   // shifts per-lane and then shuffle the partial results back together.
18762       // all lanes and the upper i64 is ignored. These shuffle masks
19503   // The input vector is used as the shuffle mask that index elements into the
19826     // with a shuffle.
20839   // handle any possible shuffle mask that results.
22613 /// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the
22629 /// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the
22645 /// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
22676     // To match the shuffle mask, the first half of the mask should
22744 /// chain of single-use x86 shuffle instructions and accumulated the combined
22745 /// shuffle mask represented by them, this will try to pattern match that mask
22753   assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
22769            "Invalid shuffle index found!");
22772     // widening of shuffle operands (see function canWidenShuffleElements).
22773     // If the only shuffle index is equal to SM_SentinelZero then propagate
22774     // a zero vector. Otherwise, the combine shuffle mask is a no-op shuffle
22787   // For floating point shuffles, we don't have free copies in the shuffle
22791   // Note that even with AVX we prefer the PSHUFD form of shuffle for integer
22799       unsigned Shuffle;
22805         Shuffle = X86ISD::MOVDDUP;
22810         Shuffle = Lo ? X86ISD::MOVLHPS : X86ISD::MOVHLPS;
22813       if (Depth == 1 && Root->getOpcode() == Shuffle)
22817       if (Shuffle == X86ISD::MOVDDUP)
22818         Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
22820         Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
22829       unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP;
22831       if (Depth == 1 && Root->getOpcode() == Shuffle)
22835       Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
22843       unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
22845       if (Depth == 1 && Root->getOpcode() == Shuffle)
22849       Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
22867     unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
22868     if (Depth == 1 && Root->getOpcode() == Shuffle)
22883     Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
22895   // If we have 3 or more shuffle instructions or a chain involving PSHUFB, we
22930 /// \brief Fully generic combining of x86 shuffle instructions.
22932 /// This should be the last combine run over the x86 shuffle instructions. Once
22934 /// of single-use shuffle instructions, build a generic model of the cumulative
22935 /// shuffle operation, and check for simpler instructions which implement this
22941 ///    special-purpose shuffle.
22942 /// 2) Look for sequences of shuffle instructions with 3 or more total
22945 ///    to ensure we avoid using PSHUFB if we can implement the shuffle with
22948 ///    slightly) more expensive than the other shuffle instructions.
22950 /// Because this is inherently a quadratic operation (for each shuffle in
22952 /// This should never be an issue in practice as the shuffle lowering doesn't
23009   // Merge this shuffle operation's mask into our accumulated mask. Note that
23010   // this shuffle's mask will be the first applied to the input, followed by the
23052     // We can't check for single use, we have to check that this shuffle is the
23061   // Minor canonicalization of the accumulated shuffle mask to make it easier
23065   // performs an equivalent shuffle.
23088   // If we have more than 128-bits, only the low 128-bits of shuffle mask
23113     llvm_unreachable("No valid shuffle instruction found!");
23117 /// \brief Search for a combinable shuffle across a chain ending in pshufd.
23119 /// We walk up the chain and look for a combinable shuffle, skipping over
23120 /// shuffles that we could hoist this shuffle's transformation past without
23127          "Called with something other than an x86 128-bit half shuffle!");
23130   // Walk up a single-use chain looking for a combinable shuffle. Keep a stack
23146       // Found another dword shuffle.
23151       // dword shuffle, and the high words are self-contained.
23161       // dword shuffle, and the low words are self-contained.
23172       // shuffle into a preceding word shuffle.
23177       // Search for a half-shuffle which we can combine with.
23221   // Rebuild the chain around this new shuffle.
23251 /// \brief Search for a combinable shuffle across a chain ending in pshuflw or
23255 /// through shuffles which switch halves trying to find a shuffle of the same
23262       "Called with something other than an x86 128-bit half shuffle!");
23266   // Walk up a single-use chain looking for a combinable shuffle.
23294   // Combine away the bottom node as its shuffle will be accumulated into
23295   // a preceding shuffle.
23312     // Replace the combinable shuffle with the combined one, updating all users
23377   // Look for simplifications involving one or two shuffle instructions.
23384     assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!");
23387       return SDValue(); // We combined away this shuffle, so we're done.
23406     // Look for shuffle patterns which can be implemented as a single unpack.
23455 /// \brief Try to combine a shuffle into a target-specific add-sub node.
23457 /// We combine this directly on the abstract vector shuffle nodes so it is
23458 /// easier to generically match. We also insert dummy vector shuffle nodes for
23467   // FIXME: It would be easy and harmless to use the target shuffle mask
23480   // We require the first shuffle operand to be the FSUB node, and the second to
23515 /// PerformShuffleCombine - Performs several different shuffle combines.
23541   // the backend might introduce new shuffle dag nodes and bitcasts.
23544   // fold: (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) ->
23545   //       (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>)
23602     SDValue Shuffle =
23604     if (Shuffle.getNode())
23605       return Shuffle;
23607     // Try recursively combining arbitrary sequences of x86 shuffle
23610     // can evaluate how many specialized shuffle instructions are involved in
23624 /// specific shuffle of a load can be folded into a single element load.
23673   // If inputs to shuffle are the same for both ops, then allow 2 uses
23695   // If there's a bitcast before the shuffle, check if the load type and
23709   // Create shuffle node taking into account the case that its a unary shuffle
23710   SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT)
23712   Shuffle = DAG.getVectorShuffle(CurrentVT, dl,
23713                                  InVec.getOperand(0), Shuffle,
23715   Shuffle = DAG.getBitcast(OriginalVT, Shuffle);
23716   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
24386     SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
24387     if (Shuffle.getNode())
24388       return Shuffle;
25336   // A vector zext_in_reg may be represented as a shuffle,
25339   // We'd like to try to combine that into a shuffle with zero
25353   ShuffleVectorSDNode *Shuffle = cast<ShuffleVectorSDNode>(N0.getOperand(0));
25354   EVT SrcType = Shuffle->getValueType(0);
25356   // We expect a single-source shuffle
25357   if (Shuffle->getOperand(1)->getOpcode() != ISD::UNDEF)
25379   // We expect a shuffle of the form <0, u, u, u, 1, u, u, u...>
25386       if (Shuffle->getMaskElt(i) > 0) {
25392       if (Shuffle->getMaskElt(i) != (int)(i / ZextRatio)) {
25403   // Ok, perform the transformation - replace the shuffle with
25404   // a shuffle of the form <0, k, k, k, 1, k, k, k> with zero
25414   SDValue NewShuffle = DAG.getVectorShuffle(Shuffle->getValueType(0), DL,
25415     Shuffle->getOperand(0), DAG.getConstant(0, DL, SrcType), Mask);
25975   // Create a type on which we perform the shuffle
25987     // Can't shuffle using an illegal type.
26068   // Create a type on which we perform the shuffle
26079   // Can't shuffle using an illegal type.
26161   // Optimize trunc store (of multiple scalars) to shuffle and store.
26198     // Create a type on which we perform the shuffle
26209     // Can't shuffle using an illegal type.
26412   // At least one of the operands should be a vector shuffle.
26433   // If LHS is not a shuffle then pretend it is the shuffle
OpenGrok