1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VLBW 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512vbmi,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VLVBMI 10 11 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) { 12 ; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 13 ; SSE2: # %bb.0: 14 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 15 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 16 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 17 ; SSE2-NEXT: retq 18 ; 19 ; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 20 ; SSSE3: # %bb.0: 21 ; SSSE3-NEXT: pxor %xmm1, %xmm1 22 ; SSSE3-NEXT: pshufb %xmm1, %xmm0 23 ; SSSE3-NEXT: retq 24 ; 25 ; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 26 ; SSE41: # %bb.0: 27 ; SSE41-NEXT: pxor %xmm1, %xmm1 28 ; SSE41-NEXT: pshufb %xmm1, %xmm0 29 ; SSE41-NEXT: retq 30 ; 31 ; AVX1-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 32 ; AVX1: # %bb.0: 33 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 34 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 35 ; AVX1-NEXT: retq 36 ; 37 ; AVX2OR512VL-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 38 ; AVX2OR512VL: # %bb.0: 39 ; AVX2OR512VL-NEXT: vpbroadcastb %xmm0, %xmm0 40 ; AVX2OR512VL-NEXT: retq 41 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 42 ret <16 x i8> %shuffle 43 } 44 45 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) { 46 ; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 47 ; SSE2: # %bb.0: 48 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 49 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 50 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 51 ; SSE2-NEXT: retq 52 ; 53 ; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 54 ; SSSE3: # %bb.0: 55 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 56 ; SSSE3-NEXT: retq 57 ; 58 ; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 59 ; SSE41: # %bb.0: 60 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 61 ; SSE41-NEXT: retq 62 ; 63 ; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 64 ; AVX: # %bb.0: 65 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 66 ; AVX-NEXT: retq 67 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 68 ret <16 x i8> %shuffle 69 } 70 71 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) { 72 ; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 73 ; SSE2: # %bb.0: 74 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 75 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 76 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 77 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 78 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 79 ; SSE2-NEXT: retq 80 ; 81 ; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 82 ; SSSE3: # %bb.0: 83 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 84 ; SSSE3-NEXT: retq 85 ; 86 ; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 87 ; SSE41: # %bb.0: 88 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 89 ; SSE41-NEXT: retq 90 ; 91 ; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 92 ; AVX: # %bb.0: 93 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 94 ; AVX-NEXT: retq 95 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 96 ret <16 x i8> %shuffle 97 } 98 99 define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) { 100 ; SSE-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 101 ; SSE: # %bb.0: 102 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 103 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 104 ; SSE-NEXT: retq 105 ; 106 ; AVX1-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 107 ; AVX1: # %bb.0: 108 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 109 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 110 ; AVX1-NEXT: retq 111 ; 112 ; AVX2-SLOW-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 113 ; AVX2-SLOW: # %bb.0: 114 ; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 115 ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 116 ; AVX2-SLOW-NEXT: retq 117 ; 118 ; AVX2-FAST-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 119 ; AVX2-FAST: # %bb.0: 120 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3] 121 ; AVX2-FAST-NEXT: retq 122 ; 123 ; AVX512VL-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 124 ; AVX512VL: # %bb.0: 125 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3] 126 ; AVX512VL-NEXT: retq 127 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> 128 ret <16 x i8> %shuffle 129 } 130 131 define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) { 132 ; SSE-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 133 ; SSE: # %bb.0: 134 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 135 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 136 ; SSE-NEXT: retq 137 ; 138 ; AVX1-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 139 ; AVX1: # %bb.0: 140 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 141 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 142 ; AVX1-NEXT: retq 143 ; 144 ; AVX2-SLOW-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 145 ; AVX2-SLOW: # %bb.0: 146 ; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 147 ; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 148 ; AVX2-SLOW-NEXT: retq 149 ; 150 ; AVX2-FAST-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 151 ; AVX2-FAST: # %bb.0: 152 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7] 153 ; AVX2-FAST-NEXT: retq 154 ; 155 ; AVX512VL-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 156 ; AVX512VL: # %bb.0: 157 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7] 158 ; AVX512VL-NEXT: retq 159 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> 160 ret <16 x i8> %shuffle 161 } 162 163 define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) { 164 ; SSE2-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 165 ; SSE2: # %bb.0: 166 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 167 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 168 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 169 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 170 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 171 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] 172 ; SSE2-NEXT: retq 173 ; 174 ; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 175 ; SSSE3: # %bb.0: 176 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 177 ; SSSE3-NEXT: retq 178 ; 179 ; SSE41-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 180 ; SSE41: # %bb.0: 181 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 182 ; SSE41-NEXT: retq 183 ; 184 ; AVX-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 185 ; AVX: # %bb.0: 186 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 187 ; AVX-NEXT: retq 188 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 189 ret <16 x i8> %shuffle 190 } 191 192 define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) { 193 ; SSE-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07: 194 ; SSE: # %bb.0: 195 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 196 ; SSE-NEXT: retq 197 ; 198 ; AVX-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07: 199 ; AVX: # %bb.0: 200 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 201 ; AVX-NEXT: retq 202 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 203 ret <16 x i8> %shuffle 204 } 205 206 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) { 207 ; SSE-LABEL: shuffle_v16i8_0101010101010101: 208 ; SSE: # %bb.0: 209 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 210 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 211 ; SSE-NEXT: retq 212 ; 213 ; AVX1-LABEL: shuffle_v16i8_0101010101010101: 214 ; AVX1: # %bb.0: 215 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 216 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 217 ; AVX1-NEXT: retq 218 ; 219 ; AVX2OR512VL-LABEL: shuffle_v16i8_0101010101010101: 220 ; AVX2OR512VL: # %bb.0: 221 ; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %xmm0 222 ; AVX2OR512VL-NEXT: retq 223 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 224 ret <16 x i8> %shuffle 225 } 226 227 define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) { 228 ; SSE-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 229 ; SSE: # %bb.0: 230 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 231 ; SSE-NEXT: retq 232 ; 233 ; AVX-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 234 ; AVX: # %bb.0: 235 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 236 ; AVX-NEXT: retq 237 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 238 ret <16 x i8> %shuffle 239 } 240 241 define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(<16 x i8> %a, <16 x i8> %b) { 242 ; SSE-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31: 243 ; SSE: # %bb.0: 244 ; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 245 ; SSE-NEXT: retq 246 ; 247 ; AVX-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31: 248 ; AVX: # %bb.0: 249 ; AVX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 250 ; AVX-NEXT: retq 251 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 252 ret <16 x i8> %shuffle 253 } 254 255 define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) { 256 ; SSE2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 257 ; SSE2: # %bb.0: 258 ; SSE2-NEXT: pxor %xmm2, %xmm2 259 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 260 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7] 261 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 262 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 263 ; SSE2-NEXT: por %xmm2, %xmm0 264 ; SSE2-NEXT: retq 265 ; 266 ; SSSE3-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 267 ; SSSE3: # %bb.0: 268 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 269 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 270 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 271 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 272 ; SSSE3-NEXT: retq 273 ; 274 ; SSE41-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 275 ; SSE41: # %bb.0: 276 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 277 ; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 278 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 279 ; SSE41-NEXT: movdqa %xmm1, %xmm0 280 ; SSE41-NEXT: retq 281 ; 282 ; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 283 ; AVX1: # %bb.0: 284 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 285 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 286 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 287 ; AVX1-NEXT: retq 288 ; 289 ; AVX2OR512VL-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 290 ; AVX2OR512VL: # %bb.0: 291 ; AVX2OR512VL-NEXT: vpbroadcastb %xmm1, %xmm1 292 ; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 293 ; AVX2OR512VL-NEXT: retq 294 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7> 295 ret <16 x i8> %shuffle 296 } 297 298 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) { 299 ; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 300 ; SSE2: # %bb.0: 301 ; SSE2-NEXT: pxor %xmm1, %xmm1 302 ; SSE2-NEXT: movdqa %xmm0, %xmm2 303 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 304 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] 305 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] 306 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 307 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 308 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 309 ; SSE2-NEXT: packuswb %xmm2, %xmm0 310 ; SSE2-NEXT: retq 311 ; 312 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 313 ; SSSE3: # %bb.0: 314 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 315 ; SSSE3-NEXT: retq 316 ; 317 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 318 ; SSE41: # %bb.0: 319 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 320 ; SSE41-NEXT: retq 321 ; 322 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 323 ; AVX: # %bb.0: 324 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 325 ; AVX-NEXT: retq 326 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 327 ret <16 x i8> %shuffle 328 } 329 330 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { 331 ; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 332 ; SSE2: # %bb.0: 333 ; SSE2-NEXT: pxor %xmm2, %xmm2 334 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 335 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] 336 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] 337 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 338 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 339 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 340 ; SSE2-NEXT: packuswb %xmm1, %xmm0 341 ; SSE2-NEXT: retq 342 ; 343 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 344 ; SSSE3: # %bb.0: 345 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 346 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 347 ; SSSE3-NEXT: retq 348 ; 349 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 350 ; SSE41: # %bb.0: 351 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 352 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 353 ; SSE41-NEXT: retq 354 ; 355 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 356 ; AVX: # %bb.0: 357 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 358 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 359 ; AVX-NEXT: retq 360 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20> 361 ret <16 x i8> %shuffle 362 } 363 364 define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { 365 ; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 366 ; SSE2: # %bb.0: 367 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 368 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 369 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 370 ; SSE2-NEXT: pxor %xmm1, %xmm1 371 ; SSE2-NEXT: movdqa %xmm0, %xmm2 372 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 373 ; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[3,2,1,0,4,5,6,7] 374 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 375 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4] 376 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1] 377 ; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[3,2,1,0,4,5,6,7] 378 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm2[0,1,2,3,7,6,5,4] 379 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1] 380 ; SSE2-NEXT: packuswb %xmm1, %xmm0 381 ; SSE2-NEXT: retq 382 ; 383 ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 384 ; SSSE3: # %bb.0: 385 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 386 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 387 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 388 ; SSSE3-NEXT: retq 389 ; 390 ; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 391 ; SSE41: # %bb.0: 392 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 393 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 394 ; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 395 ; SSE41-NEXT: retq 396 ; 397 ; AVX-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 398 ; AVX: # %bb.0: 399 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 400 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 401 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 402 ; AVX-NEXT: retq 403 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20> 404 ret <16 x i8> %shuffle 405 } 406 407 define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i8> %a, <16 x i8> %b) { 408 ; SSE2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 409 ; SSE2: # %bb.0: 410 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 411 ; SSE2-NEXT: andps %xmm2, %xmm0 412 ; SSE2-NEXT: andnps %xmm1, %xmm2 413 ; SSE2-NEXT: orps %xmm2, %xmm0 414 ; SSE2-NEXT: retq 415 ; 416 ; SSSE3-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 417 ; SSSE3: # %bb.0: 418 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u] 419 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 420 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 421 ; SSSE3-NEXT: retq 422 ; 423 ; SSE41-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 424 ; SSE41: # %bb.0: 425 ; SSE41-NEXT: movdqa %xmm0, %xmm2 426 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 427 ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 428 ; SSE41-NEXT: movdqa %xmm1, %xmm0 429 ; SSE41-NEXT: retq 430 ; 431 ; AVX1OR2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 432 ; AVX1OR2: # %bb.0: 433 ; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 434 ; AVX1OR2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 435 ; AVX1OR2-NEXT: retq 436 ; 437 ; AVX512VL-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 438 ; AVX512VL: # %bb.0: 439 ; AVX512VL-NEXT: movw $-21846, %ax # imm = 0xAAAA 440 ; AVX512VL-NEXT: kmovd %eax, %k1 441 ; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} 442 ; AVX512VL-NEXT: retq 443 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 444 ret <16 x i8> %shuffle 445 } 446 447 define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(<16 x i8> %a, <16 x i8> %b) { 448 ; SSE2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 449 ; SSE2: # %bb.0: 450 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 451 ; SSE2-NEXT: andps %xmm2, %xmm0 452 ; SSE2-NEXT: andnps %xmm1, %xmm2 453 ; SSE2-NEXT: orps %xmm2, %xmm0 454 ; SSE2-NEXT: retq 455 ; 456 ; SSSE3-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 457 ; SSSE3: # %bb.0: 458 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[15] 459 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[4,5,6],zero,xmm0[8,9,10],zero,xmm0[12,13,14],zero 460 ; SSSE3-NEXT: por %xmm1, %xmm0 461 ; SSSE3-NEXT: retq 462 ; 463 ; SSE41-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 464 ; SSE41: # %bb.0: 465 ; SSE41-NEXT: movdqa %xmm0, %xmm2 466 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 467 ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 468 ; SSE41-NEXT: movdqa %xmm1, %xmm0 469 ; SSE41-NEXT: retq 470 ; 471 ; AVX1OR2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 472 ; AVX1OR2: # %bb.0: 473 ; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 474 ; AVX1OR2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 475 ; AVX1OR2-NEXT: retq 476 ; 477 ; AVX512VL-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 478 ; AVX512VL: # %bb.0: 479 ; AVX512VL-NEXT: movw $-30584, %ax # imm = 0x8888 480 ; AVX512VL-NEXT: kmovd %eax, %k1 481 ; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} 482 ; AVX512VL-NEXT: retq 483 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> 484 ret <16 x i8> %shuffle 485 } 486 487 define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz(<16 x i8> %a) { 488 ; SSE-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: 489 ; SSE: # %bb.0: 490 ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 491 ; SSE-NEXT: retq 492 ; 493 ; AVX-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: 494 ; AVX: # %bb.0: 495 ; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 496 ; AVX-NEXT: retq 497 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> 498 ret <16 x i8> %shuffle 499 } 500 501 define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) { 502 ; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 503 ; SSE2: # %bb.0: 504 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 505 ; SSE2-NEXT: andps %xmm2, %xmm0 506 ; SSE2-NEXT: andnps %xmm1, %xmm2 507 ; SSE2-NEXT: orps %xmm2, %xmm0 508 ; SSE2-NEXT: retq 509 ; 510 ; SSSE3-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 511 ; SSSE3: # %bb.0: 512 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[4],zero,zero,xmm1[7],zero,zero,zero,zero,xmm1[12],zero,zero,xmm1[15] 513 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,xmm0[5,6],zero,xmm0[8,9,10,11],zero,xmm0[13,14],zero 514 ; SSSE3-NEXT: por %xmm1, %xmm0 515 ; SSSE3-NEXT: retq 516 ; 517 ; SSE41-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 518 ; SSE41: # %bb.0: 519 ; SSE41-NEXT: movdqa %xmm0, %xmm2 520 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 521 ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 522 ; SSE41-NEXT: movdqa %xmm1, %xmm0 523 ; SSE41-NEXT: retq 524 ; 525 ; AVX1OR2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 526 ; AVX1OR2: # %bb.0: 527 ; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 528 ; AVX1OR2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 529 ; AVX1OR2-NEXT: retq 530 ; 531 ; AVX512VL-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 532 ; AVX512VL: # %bb.0: 533 ; AVX512VL-NEXT: movw $-28528, %ax # imm = 0x9090 534 ; AVX512VL-NEXT: kmovd %eax, %k1 535 ; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} 536 ; AVX512VL-NEXT: retq 537 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 13, i32 14, i32 31> 538 ret <16 x i8> %shuffle 539 } 540 541 define <16 x i8> @shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15(<16 x i8> %a, <16 x i8> %b) { 542 ; SSE2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 543 ; SSE2: # %bb.0: 544 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] 545 ; SSE2-NEXT: andps %xmm2, %xmm1 546 ; SSE2-NEXT: andnps %xmm0, %xmm2 547 ; SSE2-NEXT: orps %xmm1, %xmm2 548 ; SSE2-NEXT: movaps %xmm2, %xmm0 549 ; SSE2-NEXT: retq 550 ; 551 ; SSSE3-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 552 ; SSSE3: # %bb.0: 553 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,xmm0[10,11],zero,xmm0[13],zero,xmm0[15] 554 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[8,9],zero,zero,xmm1[12],zero,xmm1[14],zero 555 ; SSSE3-NEXT: por %xmm1, %xmm0 556 ; SSSE3-NEXT: retq 557 ; 558 ; SSE41-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 559 ; SSE41: # %bb.0: 560 ; SSE41-NEXT: movdqa %xmm0, %xmm2 561 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] 562 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 563 ; SSE41-NEXT: movdqa %xmm2, %xmm0 564 ; SSE41-NEXT: retq 565 ; 566 ; AVX1OR2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 567 ; AVX1OR2: # %bb.0: 568 ; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] 569 ; AVX1OR2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 570 ; AVX1OR2-NEXT: retq 571 ; 572 ; AVX512VL-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 573 ; AVX512VL: # %bb.0: 574 ; AVX512VL-NEXT: movw $-21264, %ax # imm = 0xACF0 575 ; AVX512VL-NEXT: kmovd %eax, %k1 576 ; AVX512VL-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} 577 ; AVX512VL-NEXT: retq 578 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 13, i32 30, i32 15> 579 ret <16 x i8> %shuffle 580 } 581 582 define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) { 583 ; SSE2-LABEL: trunc_v4i32_shuffle: 584 ; SSE2: # %bb.0: 585 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 586 ; SSE2-NEXT: packuswb %xmm0, %xmm0 587 ; SSE2-NEXT: packuswb %xmm0, %xmm0 588 ; SSE2-NEXT: retq 589 ; 590 ; SSSE3-LABEL: trunc_v4i32_shuffle: 591 ; SSSE3: # %bb.0: 592 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 593 ; SSSE3-NEXT: retq 594 ; 595 ; SSE41-LABEL: trunc_v4i32_shuffle: 596 ; SSE41: # %bb.0: 597 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 598 ; SSE41-NEXT: retq 599 ; 600 ; AVX-LABEL: trunc_v4i32_shuffle: 601 ; AVX: # %bb.0: 602 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 603 ; AVX-NEXT: retq 604 %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 605 ret <16 x i8> %shuffle 606 } 607 608 define <16 x i8> @stress_test0(<16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i8> %s.0.7, <16 x i8> %s.0.8, <16 x i8> %s.0.9) { 609 ; We don't have anything useful to check here. This generates 100s of 610 ; instructions. Instead, just make sure we survived codegen. 611 ; ALL-LABEL: stress_test0: 612 ; ALL: retq 613 entry: 614 %s.1.4 = shufflevector <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i32> <i32 1, i32 22, i32 21, i32 28, i32 3, i32 16, i32 6, i32 1, i32 19, i32 29, i32 12, i32 31, i32 2, i32 3, i32 3, i32 6> 615 %s.1.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i32> <i32 31, i32 20, i32 12, i32 19, i32 2, i32 15, i32 12, i32 31, i32 2, i32 28, i32 2, i32 30, i32 7, i32 8, i32 17, i32 28> 616 %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> %s.0.9, <16 x i32> <i32 14, i32 10, i32 17, i32 5, i32 17, i32 9, i32 17, i32 21, i32 31, i32 24, i32 16, i32 6, i32 20, i32 28, i32 23, i32 8> 617 %s.2.2 = shufflevector <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i32> <i32 20, i32 9, i32 21, i32 11, i32 11, i32 4, i32 3, i32 18, i32 3, i32 30, i32 4, i32 31, i32 11, i32 24, i32 13, i32 29> 618 %s.3.2 = shufflevector <16 x i8> %s.2.2, <16 x i8> %s.1.4, <16 x i32> <i32 15, i32 13, i32 5, i32 11, i32 7, i32 17, i32 14, i32 22, i32 22, i32 16, i32 7, i32 24, i32 16, i32 22, i32 7, i32 29> 619 %s.5.4 = shufflevector <16 x i8> %s.1.5, <16 x i8> %s.1.8, <16 x i32> <i32 3, i32 13, i32 19, i32 7, i32 23, i32 11, i32 1, i32 9, i32 16, i32 25, i32 2, i32 7, i32 0, i32 21, i32 23, i32 17> 620 %s.6.1 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.3.2, <16 x i32> <i32 11, i32 2, i32 28, i32 31, i32 27, i32 3, i32 9, i32 27, i32 25, i32 25, i32 14, i32 7, i32 12, i32 28, i32 12, i32 23> 621 %s.7.1 = shufflevector <16 x i8> %s.6.1, <16 x i8> %s.3.2, <16 x i32> <i32 15, i32 29, i32 14, i32 0, i32 29, i32 15, i32 26, i32 30, i32 6, i32 7, i32 2, i32 8, i32 12, i32 10, i32 29, i32 17> 622 %s.7.2 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.5.4, <16 x i32> <i32 3, i32 29, i32 3, i32 19, i32 undef, i32 20, i32 undef, i32 3, i32 27, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 623 %s.16.0 = shufflevector <16 x i8> %s.7.1, <16 x i8> %s.7.2, <16 x i32> <i32 13, i32 1, i32 16, i32 16, i32 6, i32 7, i32 29, i32 18, i32 19, i32 28, i32 undef, i32 undef, i32 31, i32 1, i32 undef, i32 10> 624 ret <16 x i8> %s.16.0 625 } 626 627 define <16 x i8> @undef_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind { 628 ; There is nothing interesting to check about these instructions other than 629 ; that they survive codegen. However, we actually do better and delete all of 630 ; them because the result is 'undef'. 631 ; 632 ; ALL-LABEL: undef_test1: 633 ; ALL: # %bb.0: # %entry 634 ; ALL-NEXT: retq 635 entry: 636 %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> undef, <16 x i32> <i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 6, i32 undef, i32 6, i32 undef, i32 14, i32 14, i32 undef, i32 undef, i32 0> 637 %s.2.4 = shufflevector <16 x i8> undef, <16 x i8> %s.0.5, <16 x i32> <i32 21, i32 undef, i32 undef, i32 19, i32 undef, i32 undef, i32 29, i32 24, i32 21, i32 23, i32 21, i32 17, i32 19, i32 undef, i32 20, i32 22> 638 %s.2.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> undef, <16 x i32> <i32 3, i32 8, i32 undef, i32 7, i32 undef, i32 10, i32 8, i32 0, i32 15, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 9> 639 %s.2.9 = shufflevector <16 x i8> %s.0.9, <16 x i8> undef, <16 x i32> <i32 7, i32 undef, i32 14, i32 7, i32 8, i32 undef, i32 7, i32 8, i32 5, i32 15, i32 undef, i32 1, i32 11, i32 undef, i32 undef, i32 11> 640 %s.3.4 = shufflevector <16 x i8> %s.2.4, <16 x i8> %s.0.5, <16 x i32> <i32 5, i32 0, i32 21, i32 6, i32 15, i32 27, i32 22, i32 21, i32 4, i32 22, i32 19, i32 26, i32 9, i32 26, i32 8, i32 29> 641 %s.3.9 = shufflevector <16 x i8> %s.2.9, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 8, i32 1, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 undef> 642 %s.4.7 = shufflevector <16 x i8> %s.1.8, <16 x i8> %s.2.9, <16 x i32> <i32 9, i32 0, i32 22, i32 20, i32 24, i32 7, i32 21, i32 17, i32 20, i32 12, i32 19, i32 23, i32 2, i32 9, i32 17, i32 10> 643 %s.4.8 = shufflevector <16 x i8> %s.2.9, <16 x i8> %s.3.9, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 10, i32 undef, i32 0, i32 5, i32 undef, i32 9, i32 undef> 644 %s.5.7 = shufflevector <16 x i8> %s.4.7, <16 x i8> %s.4.8, <16 x i32> <i32 16, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 645 %s.8.4 = shufflevector <16 x i8> %s.3.4, <16 x i8> %s.5.7, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 28, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 646 %s.9.4 = shufflevector <16 x i8> %s.8.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 10, i32 5> 647 %s.10.4 = shufflevector <16 x i8> %s.9.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 648 %s.12.4 = shufflevector <16 x i8> %s.10.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef> 649 650 ret <16 x i8> %s.12.4 651 } 652 653 define <16 x i8> @PR20540(<8 x i8> %a) { 654 ; SSE2-LABEL: PR20540: 655 ; SSE2: # %bb.0: 656 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 657 ; SSE2-NEXT: packuswb %xmm0, %xmm0 658 ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 659 ; SSE2-NEXT: retq 660 ; 661 ; SSSE3-LABEL: PR20540: 662 ; SSSE3: # %bb.0: 663 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero 664 ; SSSE3-NEXT: retq 665 ; 666 ; SSE41-LABEL: PR20540: 667 ; SSE41: # %bb.0: 668 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero 669 ; SSE41-NEXT: retq 670 ; 671 ; AVX-LABEL: PR20540: 672 ; AVX: # %bb.0: 673 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero 674 ; AVX-NEXT: retq 675 %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 676 ret <16 x i8> %shuffle 677 } 678 679 define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { 680 ; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 681 ; SSE: # %bb.0: 682 ; SSE-NEXT: movzbl %dil, %eax 683 ; SSE-NEXT: movd %eax, %xmm0 684 ; SSE-NEXT: retq 685 ; 686 ; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 687 ; AVX: # %bb.0: 688 ; AVX-NEXT: movzbl %dil, %eax 689 ; AVX-NEXT: vmovd %eax, %xmm0 690 ; AVX-NEXT: retq 691 %a = insertelement <16 x i8> undef, i8 %i, i32 0 692 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 693 ret <16 x i8> %shuffle 694 } 695 696 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { 697 ; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 698 ; SSE2: # %bb.0: 699 ; SSE2-NEXT: shll $8, %edi 700 ; SSE2-NEXT: pxor %xmm0, %xmm0 701 ; SSE2-NEXT: pinsrw $2, %edi, %xmm0 702 ; SSE2-NEXT: retq 703 ; 704 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 705 ; SSSE3: # %bb.0: 706 ; SSSE3-NEXT: shll $8, %edi 707 ; SSSE3-NEXT: pxor %xmm0, %xmm0 708 ; SSSE3-NEXT: pinsrw $2, %edi, %xmm0 709 ; SSSE3-NEXT: retq 710 ; 711 ; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 712 ; SSE41: # %bb.0: 713 ; SSE41-NEXT: pxor %xmm0, %xmm0 714 ; SSE41-NEXT: pinsrb $5, %edi, %xmm0 715 ; SSE41-NEXT: retq 716 ; 717 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 718 ; AVX: # %bb.0: 719 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 720 ; AVX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 721 ; AVX-NEXT: retq 722 %a = insertelement <16 x i8> undef, i8 %i, i32 0 723 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 724 ret <16 x i8> %shuffle 725 } 726 727 define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) { 728 ; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 729 ; SSE2: # %bb.0: 730 ; SSE2-NEXT: shll $8, %edi 731 ; SSE2-NEXT: pxor %xmm0, %xmm0 732 ; SSE2-NEXT: pinsrw $7, %edi, %xmm0 733 ; SSE2-NEXT: retq 734 ; 735 ; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 736 ; SSSE3: # %bb.0: 737 ; SSSE3-NEXT: shll $8, %edi 738 ; SSSE3-NEXT: pxor %xmm0, %xmm0 739 ; SSSE3-NEXT: pinsrw $7, %edi, %xmm0 740 ; SSSE3-NEXT: retq 741 ; 742 ; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 743 ; SSE41: # %bb.0: 744 ; SSE41-NEXT: pxor %xmm0, %xmm0 745 ; SSE41-NEXT: pinsrb $15, %edi, %xmm0 746 ; SSE41-NEXT: retq 747 ; 748 ; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 749 ; AVX: # %bb.0: 750 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 751 ; AVX-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 752 ; AVX-NEXT: retq 753 %a = insertelement <16 x i8> undef, i8 %i, i32 0 754 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16> 755 ret <16 x i8> %shuffle 756 } 757 758 define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { 759 ; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 760 ; SSE2: # %bb.0: 761 ; SSE2-NEXT: movzbl %dil, %eax 762 ; SSE2-NEXT: pxor %xmm0, %xmm0 763 ; SSE2-NEXT: pinsrw $1, %eax, %xmm0 764 ; SSE2-NEXT: retq 765 ; 766 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 767 ; SSSE3: # %bb.0: 768 ; SSSE3-NEXT: movzbl %dil, %eax 769 ; SSSE3-NEXT: pxor %xmm0, %xmm0 770 ; SSSE3-NEXT: pinsrw $1, %eax, %xmm0 771 ; SSSE3-NEXT: retq 772 ; 773 ; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 774 ; SSE41: # %bb.0: 775 ; SSE41-NEXT: pxor %xmm0, %xmm0 776 ; SSE41-NEXT: pinsrb $2, %edi, %xmm0 777 ; SSE41-NEXT: retq 778 ; 779 ; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 780 ; AVX: # %bb.0: 781 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 782 ; AVX-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0 783 ; AVX-NEXT: retq 784 %a = insertelement <16 x i8> undef, i8 %i, i32 3 785 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 786 ret <16 x i8> %shuffle 787 } 788 789 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu(<16 x i8> %a) { 790 ; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu: 791 ; SSE: # %bb.0: 792 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] 793 ; SSE-NEXT: retq 794 ; 795 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu: 796 ; AVX: # %bb.0: 797 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] 798 ; AVX-NEXT: retq 799 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 undef, i32 18, i32 undef> 800 ret <16 x i8> %shuffle 801 } 802 803 define <16 x i8> @shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) { 804 ; SSE-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 805 ; SSE: # %bb.0: 806 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 807 ; SSE-NEXT: retq 808 ; 809 ; AVX-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 810 ; AVX: # %bb.0: 811 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 812 ; AVX-NEXT: retq 813 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 28, i32 undef, i32 30, i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0> 814 ret <16 x i8> %shuffle 815 } 816 817 define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { 818 ; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 819 ; SSE2: # %bb.0: 820 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 821 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 822 ; SSE2-NEXT: por %xmm1, %xmm0 823 ; SSE2-NEXT: retq 824 ; 825 ; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 826 ; SSSE3: # %bb.0: 827 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 828 ; SSSE3-NEXT: retq 829 ; 830 ; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 831 ; SSE41: # %bb.0: 832 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 833 ; SSE41-NEXT: retq 834 ; 835 ; AVX-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 836 ; AVX: # %bb.0: 837 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 838 ; AVX-NEXT: retq 839 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 840 ret <16 x i8> %shuffle 841 } 842 843 define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { 844 ; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 845 ; SSE2: # %bb.0: 846 ; SSE2-NEXT: movdqa %xmm0, %xmm1 847 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 848 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 849 ; SSE2-NEXT: por %xmm1, %xmm0 850 ; SSE2-NEXT: retq 851 ; 852 ; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 853 ; SSSE3: # %bb.0: 854 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 855 ; SSSE3-NEXT: retq 856 ; 857 ; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 858 ; SSE41: # %bb.0: 859 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 860 ; SSE41-NEXT: retq 861 ; 862 ; AVX-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 863 ; AVX: # %bb.0: 864 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 865 ; AVX-NEXT: retq 866 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 867 ret <16 x i8> %shuffle 868 } 869 870 define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) { 871 ; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 872 ; SSE2: # %bb.0: 873 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 874 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] 875 ; SSE2-NEXT: por %xmm1, %xmm0 876 ; SSE2-NEXT: retq 877 ; 878 ; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 879 ; SSSE3: # %bb.0: 880 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 881 ; SSSE3-NEXT: retq 882 ; 883 ; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 884 ; SSE41: # %bb.0: 885 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 886 ; SSE41-NEXT: retq 887 ; 888 ; AVX-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 889 ; AVX: # %bb.0: 890 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 891 ; AVX-NEXT: retq 892 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0> 893 ret <16 x i8> %shuffle 894 } 895 896 define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) { 897 ; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 898 ; SSE2: # %bb.0: 899 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 900 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0] 901 ; SSE2-NEXT: por %xmm1, %xmm0 902 ; SSE2-NEXT: retq 903 ; 904 ; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 905 ; SSSE3: # %bb.0: 906 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 907 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 908 ; SSSE3-NEXT: retq 909 ; 910 ; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 911 ; SSE41: # %bb.0: 912 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 913 ; SSE41-NEXT: movdqa %xmm1, %xmm0 914 ; SSE41-NEXT: retq 915 ; 916 ; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 917 ; AVX: # %bb.0: 918 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 919 ; AVX-NEXT: retq 920 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 921 ret <16 x i8> %shuffle 922 } 923 924 define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) { 925 ; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 926 ; SSE2: # %bb.0: 927 ; SSE2-NEXT: movdqa %xmm0, %xmm1 928 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 929 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] 930 ; SSE2-NEXT: por %xmm1, %xmm0 931 ; SSE2-NEXT: retq 932 ; 933 ; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 934 ; SSSE3: # %bb.0: 935 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 936 ; SSSE3-NEXT: retq 937 ; 938 ; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 939 ; SSE41: # %bb.0: 940 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 941 ; SSE41-NEXT: retq 942 ; 943 ; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 944 ; AVX: # %bb.0: 945 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 946 ; AVX-NEXT: retq 947 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0> 948 ret <16 x i8> %shuffle 949 } 950 951 define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) { 952 ; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 953 ; SSE2: # %bb.0: 954 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 955 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 956 ; SSE2-NEXT: por %xmm1, %xmm0 957 ; SSE2-NEXT: retq 958 ; 959 ; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 960 ; SSSE3: # %bb.0: 961 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 962 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 963 ; SSSE3-NEXT: retq 964 ; 965 ; SSE41-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 966 ; SSE41: # %bb.0: 967 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 968 ; SSE41-NEXT: movdqa %xmm1, %xmm0 969 ; SSE41-NEXT: retq 970 ; 971 ; AVX-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 972 ; AVX: # %bb.0: 973 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 974 ; AVX-NEXT: retq 975 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 976 ret <16 x i8> %shuffle 977 } 978 979 ; PR31151 980 define <16 x i8> @shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23(<16 x i8> %val1, <16 x i8> %val2) { 981 ; SSE2-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: 982 ; SSE2: # %bb.0: 983 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 984 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,1,3] 985 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 986 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 987 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 988 ; SSE2-NEXT: pand %xmm1, %xmm0 989 ; SSE2-NEXT: pandn %xmm2, %xmm1 990 ; SSE2-NEXT: por %xmm0, %xmm1 991 ; SSE2-NEXT: movdqa %xmm1, %xmm0 992 ; SSE2-NEXT: retq 993 ; 994 ; SSSE3-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: 995 ; SSSE3: # %bb.0: 996 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 997 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 998 ; SSSE3-NEXT: retq 999 ; 1000 ; SSE41-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: 1001 ; SSE41: # %bb.0: 1002 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1003 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 1004 ; SSE41-NEXT: retq 1005 ; 1006 ; AVX-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: 1007 ; AVX: # %bb.0: 1008 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1009 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 1010 ; AVX-NEXT: retq 1011 %shuffle = shufflevector <16 x i8> %val1, <16 x i8> %val2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23> 1012 ret <16 x i8> %shuffle 1013 } 1014 1015 define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) { 1016 ; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 1017 ; SSE2: # %bb.0: 1018 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1019 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1020 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 1021 ; SSE2-NEXT: retq 1022 ; 1023 ; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 1024 ; SSSE3: # %bb.0: 1025 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1026 ; SSSE3-NEXT: retq 1027 ; 1028 ; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 1029 ; SSE41: # %bb.0: 1030 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1031 ; SSE41-NEXT: retq 1032 ; 1033 ; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 1034 ; AVX: # %bb.0: 1035 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1036 ; AVX-NEXT: retq 1037 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1038 ret <16 x i8> %shuffle 1039 } 1040 1041 define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) { 1042 ; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 1043 ; SSE2: # %bb.0: 1044 ; SSE2-NEXT: pxor %xmm1, %xmm1 1045 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1046 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1047 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1048 ; SSE2-NEXT: retq 1049 ; 1050 ; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 1051 ; SSSE3: # %bb.0: 1052 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1053 ; SSSE3-NEXT: retq 1054 ; 1055 ; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 1056 ; SSE41: # %bb.0: 1057 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1058 ; SSE41-NEXT: retq 1059 ; 1060 ; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 1061 ; AVX: # %bb.0: 1062 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1063 ; AVX-NEXT: retq 1064 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 1, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 1065 ret <16 x i8> %shuffle 1066 } 1067 1068 define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(<16 x i8> %a) { 1069 ; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 1070 ; SSE2: # %bb.0: 1071 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1072 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1073 ; SSE2-NEXT: retq 1074 ; 1075 ; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 1076 ; SSSE3: # %bb.0: 1077 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1078 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1079 ; SSSE3-NEXT: retq 1080 ; 1081 ; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 1082 ; SSE41: # %bb.0: 1083 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1084 ; SSE41-NEXT: retq 1085 ; 1086 ; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 1087 ; AVX: # %bb.0: 1088 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1089 ; AVX-NEXT: retq 1090 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef> 1091 ret <16 x i8> %shuffle 1092 } 1093 1094 define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(<16 x i8> %a) { 1095 ; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1096 ; SSE2: # %bb.0: 1097 ; SSE2-NEXT: pxor %xmm1, %xmm1 1098 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1099 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1100 ; SSE2-NEXT: retq 1101 ; 1102 ; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1103 ; SSSE3: # %bb.0: 1104 ; SSSE3-NEXT: pxor %xmm1, %xmm1 1105 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1106 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1107 ; SSSE3-NEXT: retq 1108 ; 1109 ; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1110 ; SSE41: # %bb.0: 1111 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1112 ; SSE41-NEXT: retq 1113 ; 1114 ; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1115 ; AVX: # %bb.0: 1116 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1117 ; AVX-NEXT: retq 1118 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31> 1119 ret <16 x i8> %shuffle 1120 } 1121 1122 define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(<16 x i8> %a) { 1123 ; SSE2-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1124 ; SSE2: # %bb.0: 1125 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1126 ; SSE2-NEXT: retq 1127 ; 1128 ; SSSE3-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1129 ; SSSE3: # %bb.0: 1130 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1131 ; SSSE3-NEXT: retq 1132 ; 1133 ; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1134 ; SSE41: # %bb.0: 1135 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1136 ; SSE41-NEXT: retq 1137 ; 1138 ; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1139 ; AVX: # %bb.0: 1140 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1141 ; AVX-NEXT: retq 1142 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef> 1143 ret <16 x i8> %shuffle 1144 } 1145 1146 define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(<16 x i8> %a) { 1147 ; SSE2-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1148 ; SSE2: # %bb.0: 1149 ; SSE2-NEXT: pxor %xmm1, %xmm1 1150 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1151 ; SSE2-NEXT: retq 1152 ; 1153 ; SSSE3-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1154 ; SSSE3: # %bb.0: 1155 ; SSSE3-NEXT: pxor %xmm1, %xmm1 1156 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1157 ; SSSE3-NEXT: retq 1158 ; 1159 ; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1160 ; SSE41: # %bb.0: 1161 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1162 ; SSE41-NEXT: retq 1163 ; 1164 ; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1165 ; AVX: # %bb.0: 1166 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1167 ; AVX-NEXT: retq 1168 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31> 1169 ret <16 x i8> %shuffle 1170 } 1171 1172 define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(<16 x i8> %a, <16 x i8> %b) { 1173 ; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1174 ; SSE2: # %bb.0: # %entry 1175 ; SSE2-NEXT: pxor %xmm2, %xmm2 1176 ; SSE2-NEXT: movdqa %xmm0, %xmm3 1177 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1178 ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm3[1,3,2,0,4,5,6,7] 1179 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,2,1] 1180 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [65535,65535,65535,0,65535,0,0,65535] 1181 ; SSE2-NEXT: pand %xmm5, %xmm4 1182 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] 1183 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,3,0,1] 1184 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,2,2,4,5,6,7] 1185 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7] 1186 ; SSE2-NEXT: pandn %xmm2, %xmm5 1187 ; SSE2-NEXT: por %xmm4, %xmm5 1188 ; SSE2-NEXT: psrlq $16, %xmm0 1189 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 1190 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,1,3] 1191 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1192 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,4] 1193 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 1194 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1195 ; SSE2-NEXT: packuswb %xmm5, %xmm2 1196 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] 1197 ; SSE2-NEXT: pand %xmm0, %xmm2 1198 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,1,1,3,4,5,6,7] 1199 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1200 ; SSE2-NEXT: pandn %xmm1, %xmm0 1201 ; SSE2-NEXT: por %xmm2, %xmm0 1202 ; SSE2-NEXT: retq 1203 ; 1204 ; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1205 ; SSSE3: # %bb.0: # %entry 1206 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1207 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1208 ; SSSE3-NEXT: por %xmm1, %xmm0 1209 ; SSSE3-NEXT: retq 1210 ; 1211 ; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1212 ; SSE41: # %bb.0: # %entry 1213 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1214 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1215 ; SSE41-NEXT: por %xmm1, %xmm0 1216 ; SSE41-NEXT: retq 1217 ; 1218 ; AVX1OR2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1219 ; AVX1OR2: # %bb.0: # %entry 1220 ; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1221 ; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1222 ; AVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0 1223 ; AVX1OR2-NEXT: retq 1224 ; 1225 ; AVX512VLBW-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1226 ; AVX512VLBW: # %bb.0: # %entry 1227 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1228 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1229 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 1230 ; AVX512VLBW-NEXT: retq 1231 ; 1232 ; AVX512VLVBMI-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1233 ; AVX512VLVBMI: # %bb.0: # %entry 1234 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = <u,10,2,7,22,14,7,2,18,3,1,14,18,9,11,0> 1235 ; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 1236 ; AVX512VLVBMI-NEXT: retq 1237 entry: 1238 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 undef, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0> 1239 1240 ret <16 x i8> %shuffle 1241 } 1242 1243 define <16 x i8> @shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30(<8 x i16> %a0, <8 x i16> %a1) { 1244 ; SSE-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 1245 ; SSE: # %bb.0: 1246 ; SSE-NEXT: psrlw $8, %xmm0 1247 ; SSE-NEXT: psrlw $8, %xmm1 1248 ; SSE-NEXT: packuswb %xmm1, %xmm0 1249 ; SSE-NEXT: retq 1250 ; 1251 ; AVX-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 1252 ; AVX: # %bb.0: 1253 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 1254 ; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 1255 ; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 1256 ; AVX-NEXT: retq 1257 %1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1258 %2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1259 %3 = bitcast <8 x i16> %1 to <16 x i8> 1260 %4 = bitcast <8 x i16> %2 to <16 x i8> 1261 %5 = shufflevector <16 x i8> %3, <16 x i8> %4, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 1262 ret <16 x i8> %5 1263 } 1264 1265 define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) { 1266 ; Nothing interesting to test here. Just make sure we didn't crashe. 1267 ; ALL-LABEL: stress_test2: 1268 ; ALL: retq 1269 entry: 1270 %s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> <i32 29, i32 30, i32 2, i32 16, i32 26, i32 21, i32 11, i32 26, i32 26, i32 3, i32 4, i32 5, i32 30, i32 28, i32 15, i32 5> 1271 %s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> <i32 31, i32 1, i32 24, i32 12, i32 28, i32 5, i32 2, i32 9, i32 29, i32 1, i32 31, i32 5, i32 6, i32 17, i32 15, i32 22> 1272 %s.2.0 = shufflevector <16 x i8> %s.1.0, <16 x i8> %s.1.1, <16 x i32> <i32 22, i32 1, i32 12, i32 3, i32 30, i32 4, i32 30, i32 undef, i32 1, i32 10, i32 14, i32 18, i32 27, i32 13, i32 16, i32 19> 1273 1274 ret <16 x i8> %s.2.0 1275 } 1276 1277 define void @constant_gets_selected(<4 x i32>* %ptr1, <4 x i32>* %ptr2) { 1278 ; SSE-LABEL: constant_gets_selected: 1279 ; SSE: # %bb.0: # %entry 1280 ; SSE-NEXT: xorps %xmm0, %xmm0 1281 ; SSE-NEXT: movaps %xmm0, (%rdi) 1282 ; SSE-NEXT: movaps %xmm0, (%rsi) 1283 ; SSE-NEXT: retq 1284 ; 1285 ; AVX-LABEL: constant_gets_selected: 1286 ; AVX: # %bb.0: # %entry 1287 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 1288 ; AVX-NEXT: vmovaps %xmm0, (%rdi) 1289 ; AVX-NEXT: vmovaps %xmm0, (%rsi) 1290 ; AVX-NEXT: retq 1291 entry: 1292 %weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8> 1293 %shuffle.i = shufflevector <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %weird_zero, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27> 1294 %weirder_zero = bitcast <16 x i8> %shuffle.i to <4 x i32> 1295 store <4 x i32> %weirder_zero, <4 x i32>* %ptr1, align 16 1296 store <4 x i32> zeroinitializer, <4 x i32>* %ptr2, align 16 1297 ret void 1298 } 1299 1300 ; 1301 ; Shuffle to logical bit shifts 1302 ; 1303 1304 define <16 x i8> @shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i8> %a, <16 x i8> %b) { 1305 ; SSE-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1306 ; SSE: # %bb.0: 1307 ; SSE-NEXT: psllw $8, %xmm0 1308 ; SSE-NEXT: retq 1309 ; 1310 ; AVX-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1311 ; AVX: # %bb.0: 1312 ; AVX-NEXT: vpsllw $8, %xmm0, %xmm0 1313 ; AVX-NEXT: retq 1314 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14> 1315 ret <16 x i8> %shuffle 1316 } 1317 1318 define <16 x i8> @shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i8> %a, <16 x i8> %b) { 1319 ; SSE-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1320 ; SSE: # %bb.0: 1321 ; SSE-NEXT: pslld $24, %xmm0 1322 ; SSE-NEXT: retq 1323 ; 1324 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1325 ; AVX: # %bb.0: 1326 ; AVX-NEXT: vpslld $24, %xmm0, %xmm0 1327 ; AVX-NEXT: retq 1328 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12> 1329 ret <16 x i8> %shuffle 1330 } 1331 1332 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08(<16 x i8> %a, <16 x i8> %b) { 1333 ; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08: 1334 ; SSE: # %bb.0: 1335 ; SSE-NEXT: psllq $56, %xmm0 1336 ; SSE-NEXT: retq 1337 ; 1338 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08: 1339 ; AVX: # %bb.0: 1340 ; AVX-NEXT: vpsllq $56, %xmm0, %xmm0 1341 ; AVX-NEXT: retq 1342 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8> 1343 ret <16 x i8> %shuffle 1344 } 1345 1346 define <16 x i8> @shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { 1347 ; SSE-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14: 1348 ; SSE: # %bb.0: 1349 ; SSE-NEXT: psllq $8, %xmm0 1350 ; SSE-NEXT: retq 1351 ; 1352 ; AVX-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14: 1353 ; AVX: # %bb.0: 1354 ; AVX-NEXT: vpsllq $8, %xmm0, %xmm0 1355 ; AVX-NEXT: retq 1356 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 undef, i32 2, i32 3, i32 undef, i32 5, i32 6, i32 16, i32 8, i32 9, i32 undef, i32 11, i32 12, i32 13, i32 14> 1357 ret <16 x i8> %shuffle 1358 } 1359 1360 define <16 x i8> @shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz(<16 x i8> %a, <16 x i8> %b) { 1361 ; SSE-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz: 1362 ; SSE: # %bb.0: 1363 ; SSE-NEXT: psrlw $8, %xmm0 1364 ; SSE-NEXT: retq 1365 ; 1366 ; AVX-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz: 1367 ; AVX: # %bb.0: 1368 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 1369 ; AVX-NEXT: retq 1370 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 undef, i32 16, i32 undef, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16> 1371 ret <16 x i8> %shuffle 1372 } 1373 1374 define <16 x i8> @shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz(<16 x i8> %a, <16 x i8> %b) { 1375 ; SSE-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz: 1376 ; SSE: # %bb.0: 1377 ; SSE-NEXT: psrld $16, %xmm0 1378 ; SSE-NEXT: retq 1379 ; 1380 ; AVX-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz: 1381 ; AVX: # %bb.0: 1382 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 1383 ; AVX-NEXT: retq 1384 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 15, i32 16, i32 16> 1385 ret <16 x i8> %shuffle 1386 } 1387 1388 define <16 x i8> @shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz(<16 x i8> %a, <16 x i8> %b) { 1389 ; SSE-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz: 1390 ; SSE: # %bb.0: 1391 ; SSE-NEXT: psrlq $56, %xmm0 1392 ; SSE-NEXT: retq 1393 ; 1394 ; AVX-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz: 1395 ; AVX: # %bb.0: 1396 ; AVX-NEXT: vpsrlq $56, %xmm0, %xmm0 1397 ; AVX-NEXT: retq 1398 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16> 1399 ret <16 x i8> %shuffle 1400 } 1401 1402 define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) { 1403 ; SSE2-LABEL: PR12412: 1404 ; SSE2: # %bb.0: # %entry 1405 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 1406 ; SSE2-NEXT: pand %xmm2, %xmm1 1407 ; SSE2-NEXT: pand %xmm2, %xmm0 1408 ; SSE2-NEXT: packuswb %xmm1, %xmm0 1409 ; SSE2-NEXT: retq 1410 ; 1411 ; SSSE3-LABEL: PR12412: 1412 ; SSSE3: # %bb.0: # %entry 1413 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1414 ; SSSE3-NEXT: pshufb %xmm2, %xmm1 1415 ; SSSE3-NEXT: pshufb %xmm2, %xmm0 1416 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1417 ; SSSE3-NEXT: retq 1418 ; 1419 ; SSE41-LABEL: PR12412: 1420 ; SSE41: # %bb.0: # %entry 1421 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1422 ; SSE41-NEXT: pshufb %xmm2, %xmm1 1423 ; SSE41-NEXT: pshufb %xmm2, %xmm0 1424 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1425 ; SSE41-NEXT: retq 1426 ; 1427 ; AVX-LABEL: PR12412: 1428 ; AVX: # %bb.0: # %entry 1429 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1430 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1431 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1432 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1433 ; AVX-NEXT: retq 1434 entry: 1435 %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 1436 ret <16 x i8> %0 1437 } 1438 1439 define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(<16 x i8> %a) { 1440 ; SSE-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz: 1441 ; SSE: # %bb.0: 1442 ; SSE-NEXT: psrld $8, %xmm0 1443 ; SSE-NEXT: retq 1444 ; 1445 ; AVX-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz: 1446 ; AVX: # %bb.0: 1447 ; AVX-NEXT: vpsrld $8, %xmm0, %xmm0 1448 ; AVX-NEXT: retq 1449 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16, i32 undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16> 1450 ret <16 x i8> %shuffle 1451 } 1452 1453 define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) { 1454 ; SSE-LABEL: shuffle_v16i8_bitcast_unpack: 1455 ; SSE: # %bb.0: 1456 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1457 ; SSE-NEXT: retq 1458 ; 1459 ; AVX-LABEL: shuffle_v16i8_bitcast_unpack: 1460 ; AVX: # %bb.0: 1461 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1462 ; AVX-NEXT: retq 1463 %shuffle8 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32 2, i32 18, i32 1, i32 17, i32 0, i32 16> 1464 %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float> 1465 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1466 %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16> 1467 %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 1468 %bitcast8 = bitcast <8 x i16> %shuffle16 to <16 x i8> 1469 ret <16 x i8> %bitcast8 1470 } 1471 1472 define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) { 1473 ; SSE2-LABEL: insert_dup_mem_v16i8_i32: 1474 ; SSE2: # %bb.0: 1475 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1476 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1477 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 1478 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1479 ; SSE2-NEXT: retq 1480 ; 1481 ; SSSE3-LABEL: insert_dup_mem_v16i8_i32: 1482 ; SSSE3: # %bb.0: 1483 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1484 ; SSSE3-NEXT: pxor %xmm1, %xmm1 1485 ; SSSE3-NEXT: pshufb %xmm1, %xmm0 1486 ; SSSE3-NEXT: retq 1487 ; 1488 ; SSE41-LABEL: insert_dup_mem_v16i8_i32: 1489 ; SSE41: # %bb.0: 1490 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1491 ; SSE41-NEXT: pxor %xmm1, %xmm1 1492 ; SSE41-NEXT: pshufb %xmm1, %xmm0 1493 ; SSE41-NEXT: retq 1494 ; 1495 ; AVX1-LABEL: insert_dup_mem_v16i8_i32: 1496 ; AVX1: # %bb.0: 1497 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1498 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1499 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 1500 ; AVX1-NEXT: retq 1501 ; 1502 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i8_i32: 1503 ; AVX2OR512VL: # %bb.0: 1504 ; AVX2OR512VL-NEXT: vpbroadcastb (%rdi), %xmm0 1505 ; AVX2OR512VL-NEXT: retq 1506 %tmp = load i32, i32* %ptr, align 4 1507 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 1508 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 1509 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer 1510 ret <16 x i8> %tmp3 1511 } 1512 1513 define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) { 1514 ; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8: 1515 ; SSE2: # %bb.0: 1516 ; SSE2-NEXT: movsbl (%rdi), %eax 1517 ; SSE2-NEXT: movd %eax, %xmm0 1518 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1519 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 1520 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1521 ; SSE2-NEXT: retq 1522 ; 1523 ; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8: 1524 ; SSSE3: # %bb.0: 1525 ; SSSE3-NEXT: movsbl (%rdi), %eax 1526 ; SSSE3-NEXT: movd %eax, %xmm0 1527 ; SSSE3-NEXT: pxor %xmm1, %xmm1 1528 ; SSSE3-NEXT: pshufb %xmm1, %xmm0 1529 ; SSSE3-NEXT: retq 1530 ; 1531 ; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8: 1532 ; SSE41: # %bb.0: 1533 ; SSE41-NEXT: movsbl (%rdi), %eax 1534 ; SSE41-NEXT: movd %eax, %xmm0 1535 ; SSE41-NEXT: pxor %xmm1, %xmm1 1536 ; SSE41-NEXT: pshufb %xmm1, %xmm0 1537 ; SSE41-NEXT: retq 1538 ; 1539 ; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8: 1540 ; AVX1: # %bb.0: 1541 ; AVX1-NEXT: movsbl (%rdi), %eax 1542 ; AVX1-NEXT: vmovd %eax, %xmm0 1543 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1544 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 1545 ; AVX1-NEXT: retq 1546 ; 1547 ; AVX2OR512VL-LABEL: insert_dup_mem_v16i8_sext_i8: 1548 ; AVX2OR512VL: # %bb.0: 1549 ; AVX2OR512VL-NEXT: vpbroadcastb (%rdi), %xmm0 1550 ; AVX2OR512VL-NEXT: retq 1551 %tmp = load i8, i8* %ptr, align 1 1552 %tmp1 = sext i8 %tmp to i32 1553 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 1554 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 1555 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> zeroinitializer 1556 ret <16 x i8> %tmp4 1557 } 1558 1559 define <16 x i8> @insert_dup_elt1_mem_v16i8_i32(i32* %ptr) { 1560 ; SSE2-LABEL: insert_dup_elt1_mem_v16i8_i32: 1561 ; SSE2: # %bb.0: 1562 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1563 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1564 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 1565 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1566 ; SSE2-NEXT: retq 1567 ; 1568 ; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_i32: 1569 ; SSSE3: # %bb.0: 1570 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1571 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1572 ; SSSE3-NEXT: retq 1573 ; 1574 ; SSE41-LABEL: insert_dup_elt1_mem_v16i8_i32: 1575 ; SSE41: # %bb.0: 1576 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1577 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1578 ; SSE41-NEXT: retq 1579 ; 1580 ; AVX1-LABEL: insert_dup_elt1_mem_v16i8_i32: 1581 ; AVX1: # %bb.0: 1582 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1583 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1584 ; AVX1-NEXT: retq 1585 ; 1586 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v16i8_i32: 1587 ; AVX2OR512VL: # %bb.0: 1588 ; AVX2OR512VL-NEXT: vpbroadcastb 1(%rdi), %xmm0 1589 ; AVX2OR512VL-NEXT: retq 1590 %tmp = load i32, i32* %ptr, align 4 1591 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 1592 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 1593 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1594 ret <16 x i8> %tmp3 1595 } 1596 1597 define <16 x i8> @insert_dup_elt2_mem_v16i8_i32(i32* %ptr) { 1598 ; SSE2-LABEL: insert_dup_elt2_mem_v16i8_i32: 1599 ; SSE2: # %bb.0: 1600 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1601 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1602 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,3,4,5,6,7] 1603 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1604 ; SSE2-NEXT: retq 1605 ; 1606 ; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_i32: 1607 ; SSSE3: # %bb.0: 1608 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1609 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1610 ; SSSE3-NEXT: retq 1611 ; 1612 ; SSE41-LABEL: insert_dup_elt2_mem_v16i8_i32: 1613 ; SSE41: # %bb.0: 1614 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1615 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1616 ; SSE41-NEXT: retq 1617 ; 1618 ; AVX1-LABEL: insert_dup_elt2_mem_v16i8_i32: 1619 ; AVX1: # %bb.0: 1620 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1621 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1622 ; AVX1-NEXT: retq 1623 ; 1624 ; AVX2OR512VL-LABEL: insert_dup_elt2_mem_v16i8_i32: 1625 ; AVX2OR512VL: # %bb.0: 1626 ; AVX2OR512VL-NEXT: vpbroadcastb 2(%rdi), %xmm0 1627 ; AVX2OR512VL-NEXT: retq 1628 %tmp = load i32, i32* %ptr, align 4 1629 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 1630 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 1631 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 1632 ret <16 x i8> %tmp3 1633 } 1634 1635 define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) { 1636 ; SSE2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1637 ; SSE2: # %bb.0: 1638 ; SSE2-NEXT: movsbl (%rdi), %eax 1639 ; SSE2-NEXT: movd %eax, %xmm0 1640 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1641 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 1642 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1643 ; SSE2-NEXT: retq 1644 ; 1645 ; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1646 ; SSSE3: # %bb.0: 1647 ; SSSE3-NEXT: movsbl (%rdi), %eax 1648 ; SSSE3-NEXT: movd %eax, %xmm0 1649 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1650 ; SSSE3-NEXT: retq 1651 ; 1652 ; SSE41-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1653 ; SSE41: # %bb.0: 1654 ; SSE41-NEXT: movsbl (%rdi), %eax 1655 ; SSE41-NEXT: movd %eax, %xmm0 1656 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1657 ; SSE41-NEXT: retq 1658 ; 1659 ; AVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1660 ; AVX1: # %bb.0: 1661 ; AVX1-NEXT: movsbl (%rdi), %eax 1662 ; AVX1-NEXT: vmovd %eax, %xmm0 1663 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1664 ; AVX1-NEXT: retq 1665 ; 1666 ; AVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1667 ; AVX2: # %bb.0: 1668 ; AVX2-NEXT: movsbl (%rdi), %eax 1669 ; AVX2-NEXT: shrl $8, %eax 1670 ; AVX2-NEXT: vmovd %eax, %xmm0 1671 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 1672 ; AVX2-NEXT: retq 1673 ; 1674 ; AVX512VL-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1675 ; AVX512VL: # %bb.0: 1676 ; AVX512VL-NEXT: movsbl (%rdi), %eax 1677 ; AVX512VL-NEXT: shrl $8, %eax 1678 ; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0 1679 ; AVX512VL-NEXT: retq 1680 %tmp = load i8, i8* %ptr, align 1 1681 %tmp1 = sext i8 %tmp to i32 1682 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 1683 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 1684 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1685 ret <16 x i8> %tmp4 1686 } 1687 1688 define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) { 1689 ; SSE2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1690 ; SSE2: # %bb.0: 1691 ; SSE2-NEXT: movsbl (%rdi), %eax 1692 ; SSE2-NEXT: movd %eax, %xmm0 1693 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1694 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,3,4,5,6,7] 1695 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1696 ; SSE2-NEXT: retq 1697 ; 1698 ; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1699 ; SSSE3: # %bb.0: 1700 ; SSSE3-NEXT: movsbl (%rdi), %eax 1701 ; SSSE3-NEXT: movd %eax, %xmm0 1702 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1703 ; SSSE3-NEXT: retq 1704 ; 1705 ; SSE41-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1706 ; SSE41: # %bb.0: 1707 ; SSE41-NEXT: movsbl (%rdi), %eax 1708 ; SSE41-NEXT: movd %eax, %xmm0 1709 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1710 ; SSE41-NEXT: retq 1711 ; 1712 ; AVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1713 ; AVX1: # %bb.0: 1714 ; AVX1-NEXT: movsbl (%rdi), %eax 1715 ; AVX1-NEXT: vmovd %eax, %xmm0 1716 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1717 ; AVX1-NEXT: retq 1718 ; 1719 ; AVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1720 ; AVX2: # %bb.0: 1721 ; AVX2-NEXT: movsbl (%rdi), %eax 1722 ; AVX2-NEXT: shrl $16, %eax 1723 ; AVX2-NEXT: vmovd %eax, %xmm0 1724 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 1725 ; AVX2-NEXT: retq 1726 ; 1727 ; AVX512VL-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1728 ; AVX512VL: # %bb.0: 1729 ; AVX512VL-NEXT: movsbl (%rdi), %eax 1730 ; AVX512VL-NEXT: shrl $16, %eax 1731 ; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0 1732 ; AVX512VL-NEXT: retq 1733 %tmp = load i8, i8* %ptr, align 1 1734 %tmp1 = sext i8 %tmp to i32 1735 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 1736 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 1737 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 1738 ret <16 x i8> %tmp4 1739 } 1740 1741 define <16 x i8> @PR31364(i8* nocapture readonly %a, i8* nocapture readonly %b) { 1742 ; SSE2-LABEL: PR31364: 1743 ; SSE2: # %bb.0: 1744 ; SSE2-NEXT: movzbl (%rdi), %eax 1745 ; SSE2-NEXT: movzbl (%rsi), %ecx 1746 ; SSE2-NEXT: shll $8, %ecx 1747 ; SSE2-NEXT: orl %eax, %ecx 1748 ; SSE2-NEXT: movzwl %cx, %eax 1749 ; SSE2-NEXT: movd %eax, %xmm1 1750 ; SSE2-NEXT: pxor %xmm0, %xmm0 1751 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1752 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[1,1,1,3,4,5,6,7] 1753 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 1754 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3] 1755 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7] 1756 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,4,4] 1757 ; SSE2-NEXT: packuswb %xmm1, %xmm0 1758 ; SSE2-NEXT: retq 1759 ; 1760 ; SSSE3-LABEL: PR31364: 1761 ; SSSE3: # %bb.0: 1762 ; SSSE3-NEXT: movzbl (%rdi), %eax 1763 ; SSSE3-NEXT: movzbl (%rsi), %ecx 1764 ; SSSE3-NEXT: shll $8, %ecx 1765 ; SSSE3-NEXT: orl %eax, %ecx 1766 ; SSSE3-NEXT: movzwl %cx, %eax 1767 ; SSSE3-NEXT: movd %eax, %xmm0 1768 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0] 1769 ; SSSE3-NEXT: retq 1770 ; 1771 ; SSE41-LABEL: PR31364: 1772 ; SSE41: # %bb.0: 1773 ; SSE41-NEXT: pxor %xmm0, %xmm0 1774 ; SSE41-NEXT: pinsrb $0, (%rdi), %xmm0 1775 ; SSE41-NEXT: pinsrb $1, (%rsi), %xmm0 1776 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0] 1777 ; SSE41-NEXT: retq 1778 ; 1779 ; AVX-LABEL: PR31364: 1780 ; AVX: # %bb.0: 1781 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1782 ; AVX-NEXT: vpinsrb $0, (%rdi), %xmm0, %xmm0 1783 ; AVX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm0 1784 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0] 1785 ; AVX-NEXT: retq 1786 %v0 = load i8, i8* %a, align 1 1787 %vecins = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %v0, i32 0 1788 %v1 = load i8, i8* %b, align 1 1789 %vecins2 = insertelement <16 x i8> %vecins, i8 %v1, i32 1 1790 %result = shufflevector <16 x i8> %vecins2, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 3, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0> 1791 ret <16 x i8> %result 1792 } 1793 1794 define <16 x i8> @PR31301(i8* nocapture readonly %x, i8* nocapture readonly %y) { 1795 ; SSE2-LABEL: PR31301: 1796 ; SSE2: # %bb.0: # %entry 1797 ; SSE2-NEXT: movzbl (%rdi), %eax 1798 ; SSE2-NEXT: movd %eax, %xmm0 1799 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1800 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 1801 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1802 ; SSE2-NEXT: movzbl (%rsi), %eax 1803 ; SSE2-NEXT: movd %eax, %xmm1 1804 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1805 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] 1806 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 1807 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1808 ; SSE2-NEXT: retq 1809 ; 1810 ; SSSE3-LABEL: PR31301: 1811 ; SSSE3: # %bb.0: # %entry 1812 ; SSSE3-NEXT: movzbl (%rdi), %eax 1813 ; SSSE3-NEXT: movd %eax, %xmm0 1814 ; SSSE3-NEXT: pxor %xmm1, %xmm1 1815 ; SSSE3-NEXT: pshufb %xmm1, %xmm0 1816 ; SSSE3-NEXT: movzbl (%rsi), %eax 1817 ; SSSE3-NEXT: movd %eax, %xmm2 1818 ; SSSE3-NEXT: pshufb %xmm1, %xmm2 1819 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1820 ; SSSE3-NEXT: retq 1821 ; 1822 ; SSE41-LABEL: PR31301: 1823 ; SSE41: # %bb.0: # %entry 1824 ; SSE41-NEXT: movzbl (%rdi), %eax 1825 ; SSE41-NEXT: movd %eax, %xmm0 1826 ; SSE41-NEXT: pxor %xmm1, %xmm1 1827 ; SSE41-NEXT: pshufb %xmm1, %xmm0 1828 ; SSE41-NEXT: movzbl (%rsi), %eax 1829 ; SSE41-NEXT: movd %eax, %xmm2 1830 ; SSE41-NEXT: pshufb %xmm1, %xmm2 1831 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1832 ; SSE41-NEXT: retq 1833 ; 1834 ; AVX1-LABEL: PR31301: 1835 ; AVX1: # %bb.0: # %entry 1836 ; AVX1-NEXT: movzbl (%rdi), %eax 1837 ; AVX1-NEXT: vmovd %eax, %xmm0 1838 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1839 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 1840 ; AVX1-NEXT: movzbl (%rsi), %eax 1841 ; AVX1-NEXT: vmovd %eax, %xmm2 1842 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1 1843 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1844 ; AVX1-NEXT: retq 1845 ; 1846 ; AVX2OR512VL-LABEL: PR31301: 1847 ; AVX2OR512VL: # %bb.0: # %entry 1848 ; AVX2OR512VL-NEXT: vpbroadcastb (%rdi), %xmm0 1849 ; AVX2OR512VL-NEXT: vpbroadcastb (%rsi), %xmm1 1850 ; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1851 ; AVX2OR512VL-NEXT: retq 1852 entry: 1853 %0 = load i8, i8* %x, align 1 1854 %1 = insertelement <16 x i8> undef, i8 %0, i32 0 1855 %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1856 %2 = load i8, i8* %y, align 1 1857 %3 = insertelement <16 x i8> undef, i8 %2, i32 0 1858 %lane3 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1859 %vzip.i = shufflevector <16 x i8> %lane, <16 x i8> %lane3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 1860 ret <16 x i8> %vzip.i 1861 } 1862