1 ; RUN: llc < %s -march=x86 -mattr=sse41 -o %t 2 ; RUN: grep punpcklbw %t | count 16 3 ; RUN: grep punpckhbw %t | count 16 4 ; RUN: grep "pshufd" %t | count 16 5 6 ; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used 4 times) 7 8 ; Splat test for v16i8 9 define <16 x i8 > @shuf_16i8_0(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 10 entry: 11 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0 , i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 > 12 ret <16 x i8 > %tmp6 13 } 14 15 define <16 x i8 > @shuf_16i8_1(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 16 entry: 17 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef > 18 ret <16 x i8 > %tmp6 19 } 20 21 define <16 x i8 > @shuf_16i8_2(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 22 entry: 23 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2 , i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2 > 24 ret <16 x i8 > %tmp6 25 } 26 27 define <16 x i8 > @shuf_16i8_3(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 28 entry: 29 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3 , i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3 > 30 ret <16 x i8 > %tmp6 31 } 32 33 34 define <16 x i8 > @shuf_16i8_4(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 35 entry: 36 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef > 37 ret <16 x i8 > %tmp6 38 } 39 40 define <16 x i8 > @shuf_16i8_5(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 41 entry: 42 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5 , i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5 > 43 ret <16 x i8 > %tmp6 44 } 45 46 define <16 x i8 > @shuf_16i8_6(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 47 entry: 48 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6 , i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6 > 49 ret <16 x i8 > %tmp6 50 } 51 52 define <16 x i8 > @shuf_16i8_7(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 53 entry: 54 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef , i32 undef, i32 undef, i32 undef , i32 undef > 55 ret <16 x i8 > %tmp6 56 } 57 58 define <16 x i8 > @shuf_16i8_8(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 59 entry: 60 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8 , i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 > 61 ret <16 x i8 > %tmp6 62 } 63 64 define <16 x i8 > @shuf_16i8_9(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 65 entry: 66 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9 , i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9 > 67 ret <16 x i8 > %tmp6 68 } 69 70 define <16 x i8 > @shuf_16i8_10(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 71 entry: 72 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10 , i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10 > 73 ret <16 x i8 > %tmp6 74 } 75 76 define <16 x i8 > @shuf_16i8_11(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 77 entry: 78 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11 , i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11 > 79 ret <16 x i8 > %tmp6 80 } 81 82 define <16 x i8 > @shuf_16i8_12(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 83 entry: 84 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12 , i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12 > 85 ret <16 x i8 > %tmp6 86 } 87 88 define <16 x i8 > @shuf_16i8_13(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 89 entry: 90 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13 , i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13 > 91 ret <16 x i8 > %tmp6 92 } 93 94 define <16 x i8 > @shuf_16i8_14(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 95 entry: 96 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14 , i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14 > 97 ret <16 x i8 > %tmp6 98 } 99 100 define <16 x i8 > @shuf_16i8_15(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone { 101 entry: 102 %tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15 , i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15 > 103 ret <16 x i8 > %tmp6 104 } 105