1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST 10 11 define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) { 12 ; SSE-LABEL: shuffle_v8i16_01012323: 13 ; SSE: # %bb.0: 14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 15 ; SSE-NEXT: retq 16 ; 17 ; AVX-LABEL: shuffle_v8i16_01012323: 18 ; AVX: # %bb.0: 19 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1] 20 ; AVX-NEXT: retq 21 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3> 22 ret <8 x i16> %shuffle 23 } 24 define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) { 25 ; SSE-LABEL: shuffle_v8i16_67452301: 26 ; SSE: # %bb.0: 27 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 28 ; SSE-NEXT: retq 29 ; 30 ; AVX-LABEL: shuffle_v8i16_67452301: 31 ; AVX: # %bb.0: 32 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 33 ; AVX-NEXT: retq 34 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1> 35 ret <8 x i16> %shuffle 36 } 37 define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) { 38 ; SSE2-LABEL: shuffle_v8i16_456789AB: 39 ; SSE2: # %bb.0: 40 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 41 ; SSE2-NEXT: retq 42 ; 43 ; SSSE3-LABEL: shuffle_v8i16_456789AB: 44 ; SSSE3: # %bb.0: 45 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 46 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 47 ; SSSE3-NEXT: retq 48 ; 49 ; SSE41-LABEL: shuffle_v8i16_456789AB: 50 ; SSE41: # %bb.0: 51 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 52 ; SSE41-NEXT: movdqa %xmm1, %xmm0 53 ; SSE41-NEXT: retq 54 ; 55 ; AVX-LABEL: shuffle_v8i16_456789AB: 56 ; AVX: # %bb.0: 57 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 58 ; AVX-NEXT: retq 59 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 60 ret <8 x i16> %shuffle 61 } 62 63 define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) { 64 ; SSE-LABEL: shuffle_v8i16_00000000: 65 ; SSE: # %bb.0: 66 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 67 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 68 ; SSE-NEXT: retq 69 ; 70 ; AVX1-LABEL: shuffle_v8i16_00000000: 71 ; AVX1: # %bb.0: 72 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 73 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 74 ; AVX1-NEXT: retq 75 ; 76 ; AVX2OR512VL-LABEL: shuffle_v8i16_00000000: 77 ; AVX2OR512VL: # %bb.0: 78 ; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %xmm0 79 ; AVX2OR512VL-NEXT: retq 80 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 81 ret <8 x i16> %shuffle 82 } 83 define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) { 84 ; SSE-LABEL: shuffle_v8i16_00004444: 85 ; SSE: # %bb.0: 86 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 87 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 88 ; SSE-NEXT: retq 89 ; 90 ; AVX1-LABEL: shuffle_v8i16_00004444: 91 ; AVX1: # %bb.0: 92 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 93 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 94 ; AVX1-NEXT: retq 95 ; 96 ; AVX2-SLOW-LABEL: shuffle_v8i16_00004444: 97 ; AVX2-SLOW: # %bb.0: 98 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 99 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 100 ; AVX2-SLOW-NEXT: retq 101 ; 102 ; AVX2-FAST-LABEL: shuffle_v8i16_00004444: 103 ; AVX2-FAST: # %bb.0: 104 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9] 105 ; AVX2-FAST-NEXT: retq 106 ; 107 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_00004444: 108 ; AVX512VL-SLOW: # %bb.0: 109 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 110 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 111 ; AVX512VL-SLOW-NEXT: retq 112 ; 113 ; AVX512VL-FAST-LABEL: shuffle_v8i16_00004444: 114 ; AVX512VL-FAST: # %bb.0: 115 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9] 116 ; AVX512VL-FAST-NEXT: retq 117 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 118 ret <8 x i16> %shuffle 119 } 120 define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) { 121 ; SSE-LABEL: shuffle_v8i16_u0u1u2u3: 122 ; SSE: # %bb.0: 123 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 124 ; SSE-NEXT: retq 125 ; 126 ; AVX-LABEL: shuffle_v8i16_u0u1u2u3: 127 ; AVX: # %bb.0: 128 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 129 ; AVX-NEXT: retq 130 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3> 131 ret <8 x i16> %shuffle 132 } 133 define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) { 134 ; SSE-LABEL: shuffle_v8i16_u4u5u6u7: 135 ; SSE: # %bb.0: 136 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 137 ; SSE-NEXT: retq 138 ; 139 ; AVX-LABEL: shuffle_v8i16_u4u5u6u7: 140 ; AVX: # %bb.0: 141 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 142 ; AVX-NEXT: retq 143 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7> 144 ret <8 x i16> %shuffle 145 } 146 define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) { 147 ; SSE-LABEL: shuffle_v8i16_31206745: 148 ; SSE: # %bb.0: 149 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 150 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 151 ; SSE-NEXT: retq 152 ; 153 ; AVX1-LABEL: shuffle_v8i16_31206745: 154 ; AVX1: # %bb.0: 155 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 156 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 157 ; AVX1-NEXT: retq 158 ; 159 ; AVX2-SLOW-LABEL: shuffle_v8i16_31206745: 160 ; AVX2-SLOW: # %bb.0: 161 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 162 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 163 ; AVX2-SLOW-NEXT: retq 164 ; 165 ; AVX2-FAST-LABEL: shuffle_v8i16_31206745: 166 ; AVX2-FAST: # %bb.0: 167 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11] 168 ; AVX2-FAST-NEXT: retq 169 ; 170 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_31206745: 171 ; AVX512VL-SLOW: # %bb.0: 172 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 173 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 174 ; AVX512VL-SLOW-NEXT: retq 175 ; 176 ; AVX512VL-FAST-LABEL: shuffle_v8i16_31206745: 177 ; AVX512VL-FAST: # %bb.0: 178 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11] 179 ; AVX512VL-FAST-NEXT: retq 180 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5> 181 ret <8 x i16> %shuffle 182 } 183 define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) { 184 ; SSE2-LABEL: shuffle_v8i16_44440000: 185 ; SSE2: # %bb.0: 186 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 187 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 188 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 189 ; SSE2-NEXT: retq 190 ; 191 ; SSSE3-LABEL: shuffle_v8i16_44440000: 192 ; SSSE3: # %bb.0: 193 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 194 ; SSSE3-NEXT: retq 195 ; 196 ; SSE41-LABEL: shuffle_v8i16_44440000: 197 ; SSE41: # %bb.0: 198 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 199 ; SSE41-NEXT: retq 200 ; 201 ; AVX-LABEL: shuffle_v8i16_44440000: 202 ; AVX: # %bb.0: 203 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 204 ; AVX-NEXT: retq 205 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0> 206 ret <8 x i16> %shuffle 207 } 208 define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) { 209 ; SSE-LABEL: shuffle_v8i16_23016745: 210 ; SSE: # %bb.0: 211 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 212 ; SSE-NEXT: retq 213 ; 214 ; AVX-LABEL: shuffle_v8i16_23016745: 215 ; AVX: # %bb.0: 216 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,2] 217 ; AVX-NEXT: retq 218 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> 219 ret <8 x i16> %shuffle 220 } 221 define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) { 222 ; SSE-LABEL: shuffle_v8i16_23026745: 223 ; SSE: # %bb.0: 224 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 225 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 226 ; SSE-NEXT: retq 227 ; 228 ; AVX1-LABEL: shuffle_v8i16_23026745: 229 ; AVX1: # %bb.0: 230 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 231 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 232 ; AVX1-NEXT: retq 233 ; 234 ; AVX2-SLOW-LABEL: shuffle_v8i16_23026745: 235 ; AVX2-SLOW: # %bb.0: 236 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 237 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 238 ; AVX2-SLOW-NEXT: retq 239 ; 240 ; AVX2-FAST-LABEL: shuffle_v8i16_23026745: 241 ; AVX2-FAST: # %bb.0: 242 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11] 243 ; AVX2-FAST-NEXT: retq 244 ; 245 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23026745: 246 ; AVX512VL-SLOW: # %bb.0: 247 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 248 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 249 ; AVX512VL-SLOW-NEXT: retq 250 ; 251 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23026745: 252 ; AVX512VL-FAST: # %bb.0: 253 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11] 254 ; AVX512VL-FAST-NEXT: retq 255 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5> 256 ret <8 x i16> %shuffle 257 } 258 define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) { 259 ; SSE-LABEL: shuffle_v8i16_23016747: 260 ; SSE: # %bb.0: 261 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 262 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 263 ; SSE-NEXT: retq 264 ; 265 ; AVX1-LABEL: shuffle_v8i16_23016747: 266 ; AVX1: # %bb.0: 267 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 268 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 269 ; AVX1-NEXT: retq 270 ; 271 ; AVX2-SLOW-LABEL: shuffle_v8i16_23016747: 272 ; AVX2-SLOW: # %bb.0: 273 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 274 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 275 ; AVX2-SLOW-NEXT: retq 276 ; 277 ; AVX2-FAST-LABEL: shuffle_v8i16_23016747: 278 ; AVX2-FAST: # %bb.0: 279 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15] 280 ; AVX2-FAST-NEXT: retq 281 ; 282 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23016747: 283 ; AVX512VL-SLOW: # %bb.0: 284 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 285 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 286 ; AVX512VL-SLOW-NEXT: retq 287 ; 288 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23016747: 289 ; AVX512VL-FAST: # %bb.0: 290 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15] 291 ; AVX512VL-FAST-NEXT: retq 292 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7> 293 ret <8 x i16> %shuffle 294 } 295 define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) { 296 ; SSE2-LABEL: shuffle_v8i16_75643120: 297 ; SSE2: # %bb.0: 298 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 299 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 300 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 301 ; SSE2-NEXT: retq 302 ; 303 ; SSSE3-LABEL: shuffle_v8i16_75643120: 304 ; SSSE3: # %bb.0: 305 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 306 ; SSSE3-NEXT: retq 307 ; 308 ; SSE41-LABEL: shuffle_v8i16_75643120: 309 ; SSE41: # %bb.0: 310 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 311 ; SSE41-NEXT: retq 312 ; 313 ; AVX-LABEL: shuffle_v8i16_75643120: 314 ; AVX: # %bb.0: 315 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 316 ; AVX-NEXT: retq 317 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0> 318 ret <8 x i16> %shuffle 319 } 320 321 define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) { 322 ; SSE2-LABEL: shuffle_v8i16_10545410: 323 ; SSE2: # %bb.0: 324 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 325 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] 326 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 327 ; SSE2-NEXT: retq 328 ; 329 ; SSSE3-LABEL: shuffle_v8i16_10545410: 330 ; SSSE3: # %bb.0: 331 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 332 ; SSSE3-NEXT: retq 333 ; 334 ; SSE41-LABEL: shuffle_v8i16_10545410: 335 ; SSE41: # %bb.0: 336 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 337 ; SSE41-NEXT: retq 338 ; 339 ; AVX-LABEL: shuffle_v8i16_10545410: 340 ; AVX: # %bb.0: 341 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 342 ; AVX-NEXT: retq 343 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0> 344 ret <8 x i16> %shuffle 345 } 346 define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) { 347 ; SSE2-LABEL: shuffle_v8i16_54105410: 348 ; SSE2: # %bb.0: 349 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 350 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 351 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 352 ; SSE2-NEXT: retq 353 ; 354 ; SSSE3-LABEL: shuffle_v8i16_54105410: 355 ; SSSE3: # %bb.0: 356 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 357 ; SSSE3-NEXT: retq 358 ; 359 ; SSE41-LABEL: shuffle_v8i16_54105410: 360 ; SSE41: # %bb.0: 361 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 362 ; SSE41-NEXT: retq 363 ; 364 ; AVX-LABEL: shuffle_v8i16_54105410: 365 ; AVX: # %bb.0: 366 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 367 ; AVX-NEXT: retq 368 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0> 369 ret <8 x i16> %shuffle 370 } 371 define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) { 372 ; SSE2-LABEL: shuffle_v8i16_54101054: 373 ; SSE2: # %bb.0: 374 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 375 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 376 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 377 ; SSE2-NEXT: retq 378 ; 379 ; SSSE3-LABEL: shuffle_v8i16_54101054: 380 ; SSSE3: # %bb.0: 381 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 382 ; SSSE3-NEXT: retq 383 ; 384 ; SSE41-LABEL: shuffle_v8i16_54101054: 385 ; SSE41: # %bb.0: 386 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 387 ; SSE41-NEXT: retq 388 ; 389 ; AVX-LABEL: shuffle_v8i16_54101054: 390 ; AVX: # %bb.0: 391 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 392 ; AVX-NEXT: retq 393 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4> 394 ret <8 x i16> %shuffle 395 } 396 define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) { 397 ; SSE2-LABEL: shuffle_v8i16_04400440: 398 ; SSE2: # %bb.0: 399 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 400 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 401 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6] 402 ; SSE2-NEXT: retq 403 ; 404 ; SSSE3-LABEL: shuffle_v8i16_04400440: 405 ; SSSE3: # %bb.0: 406 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 407 ; SSSE3-NEXT: retq 408 ; 409 ; SSE41-LABEL: shuffle_v8i16_04400440: 410 ; SSE41: # %bb.0: 411 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 412 ; SSE41-NEXT: retq 413 ; 414 ; AVX-LABEL: shuffle_v8i16_04400440: 415 ; AVX: # %bb.0: 416 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 417 ; AVX-NEXT: retq 418 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0> 419 ret <8 x i16> %shuffle 420 } 421 define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) { 422 ; SSE2-LABEL: shuffle_v8i16_40044004: 423 ; SSE2: # %bb.0: 424 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 425 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7] 426 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4] 427 ; SSE2-NEXT: retq 428 ; 429 ; SSSE3-LABEL: shuffle_v8i16_40044004: 430 ; SSSE3: # %bb.0: 431 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 432 ; SSSE3-NEXT: retq 433 ; 434 ; SSE41-LABEL: shuffle_v8i16_40044004: 435 ; SSE41: # %bb.0: 436 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 437 ; SSE41-NEXT: retq 438 ; 439 ; AVX-LABEL: shuffle_v8i16_40044004: 440 ; AVX: # %bb.0: 441 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 442 ; AVX-NEXT: retq 443 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4> 444 ret <8 x i16> %shuffle 445 } 446 447 define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) { 448 ; SSE2-LABEL: shuffle_v8i16_26405173: 449 ; SSE2: # %bb.0: 450 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 451 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 452 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 453 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 454 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7] 455 ; SSE2-NEXT: retq 456 ; 457 ; SSSE3-LABEL: shuffle_v8i16_26405173: 458 ; SSSE3: # %bb.0: 459 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 460 ; SSSE3-NEXT: retq 461 ; 462 ; SSE41-LABEL: shuffle_v8i16_26405173: 463 ; SSE41: # %bb.0: 464 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 465 ; SSE41-NEXT: retq 466 ; 467 ; AVX-LABEL: shuffle_v8i16_26405173: 468 ; AVX: # %bb.0: 469 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 470 ; AVX-NEXT: retq 471 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3> 472 ret <8 x i16> %shuffle 473 } 474 define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) { 475 ; SSE2-LABEL: shuffle_v8i16_20645173: 476 ; SSE2: # %bb.0: 477 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 478 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 479 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 480 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7] 481 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7] 482 ; SSE2-NEXT: retq 483 ; 484 ; SSSE3-LABEL: shuffle_v8i16_20645173: 485 ; SSSE3: # %bb.0: 486 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 487 ; SSSE3-NEXT: retq 488 ; 489 ; SSE41-LABEL: shuffle_v8i16_20645173: 490 ; SSE41: # %bb.0: 491 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 492 ; SSE41-NEXT: retq 493 ; 494 ; AVX-LABEL: shuffle_v8i16_20645173: 495 ; AVX: # %bb.0: 496 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 497 ; AVX-NEXT: retq 498 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3> 499 ret <8 x i16> %shuffle 500 } 501 define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) { 502 ; SSE2-LABEL: shuffle_v8i16_26401375: 503 ; SSE2: # %bb.0: 504 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 505 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 506 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2] 507 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 508 ; SSE2-NEXT: retq 509 ; 510 ; SSSE3-LABEL: shuffle_v8i16_26401375: 511 ; SSSE3: # %bb.0: 512 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 513 ; SSSE3-NEXT: retq 514 ; 515 ; SSE41-LABEL: shuffle_v8i16_26401375: 516 ; SSE41: # %bb.0: 517 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 518 ; SSE41-NEXT: retq 519 ; 520 ; AVX-LABEL: shuffle_v8i16_26401375: 521 ; AVX: # %bb.0: 522 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 523 ; AVX-NEXT: retq 524 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5> 525 ret <8 x i16> %shuffle 526 } 527 528 define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) { 529 ; SSE2-LABEL: shuffle_v8i16_66751643: 530 ; SSE2: # %bb.0: 531 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7] 532 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 533 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0] 534 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7] 535 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6] 536 ; SSE2-NEXT: retq 537 ; 538 ; SSSE3-LABEL: shuffle_v8i16_66751643: 539 ; SSSE3: # %bb.0: 540 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 541 ; SSSE3-NEXT: retq 542 ; 543 ; SSE41-LABEL: shuffle_v8i16_66751643: 544 ; SSE41: # %bb.0: 545 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 546 ; SSE41-NEXT: retq 547 ; 548 ; AVX-LABEL: shuffle_v8i16_66751643: 549 ; AVX: # %bb.0: 550 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 551 ; AVX-NEXT: retq 552 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3> 553 ret <8 x i16> %shuffle 554 } 555 556 define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) { 557 ; SSE2-LABEL: shuffle_v8i16_60514754: 558 ; SSE2: # %bb.0: 559 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] 560 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 561 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 562 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6] 563 ; SSE2-NEXT: retq 564 ; 565 ; SSSE3-LABEL: shuffle_v8i16_60514754: 566 ; SSSE3: # %bb.0: 567 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 568 ; SSSE3-NEXT: retq 569 ; 570 ; SSE41-LABEL: shuffle_v8i16_60514754: 571 ; SSE41: # %bb.0: 572 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 573 ; SSE41-NEXT: retq 574 ; 575 ; AVX-LABEL: shuffle_v8i16_60514754: 576 ; AVX: # %bb.0: 577 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 578 ; AVX-NEXT: retq 579 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4> 580 ret <8 x i16> %shuffle 581 } 582 583 define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) { 584 ; SSE2-LABEL: shuffle_v8i16_00444444: 585 ; SSE2: # %bb.0: 586 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 587 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 588 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 589 ; SSE2-NEXT: retq 590 ; 591 ; SSSE3-LABEL: shuffle_v8i16_00444444: 592 ; SSSE3: # %bb.0: 593 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 594 ; SSSE3-NEXT: retq 595 ; 596 ; SSE41-LABEL: shuffle_v8i16_00444444: 597 ; SSE41: # %bb.0: 598 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 599 ; SSE41-NEXT: retq 600 ; 601 ; AVX-LABEL: shuffle_v8i16_00444444: 602 ; AVX: # %bb.0: 603 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 604 ; AVX-NEXT: retq 605 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 606 ret <8 x i16> %shuffle 607 } 608 define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) { 609 ; SSE2-LABEL: shuffle_v8i16_44004444: 610 ; SSE2: # %bb.0: 611 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 612 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7] 613 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 614 ; SSE2-NEXT: retq 615 ; 616 ; SSSE3-LABEL: shuffle_v8i16_44004444: 617 ; SSSE3: # %bb.0: 618 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 619 ; SSSE3-NEXT: retq 620 ; 621 ; SSE41-LABEL: shuffle_v8i16_44004444: 622 ; SSE41: # %bb.0: 623 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 624 ; SSE41-NEXT: retq 625 ; 626 ; AVX-LABEL: shuffle_v8i16_44004444: 627 ; AVX: # %bb.0: 628 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 629 ; AVX-NEXT: retq 630 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 631 ret <8 x i16> %shuffle 632 } 633 define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) { 634 ; SSE2-LABEL: shuffle_v8i16_04404444: 635 ; SSE2: # %bb.0: 636 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 637 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 638 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 639 ; SSE2-NEXT: retq 640 ; 641 ; SSSE3-LABEL: shuffle_v8i16_04404444: 642 ; SSSE3: # %bb.0: 643 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 644 ; SSSE3-NEXT: retq 645 ; 646 ; SSE41-LABEL: shuffle_v8i16_04404444: 647 ; SSE41: # %bb.0: 648 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 649 ; SSE41-NEXT: retq 650 ; 651 ; AVX-LABEL: shuffle_v8i16_04404444: 652 ; AVX: # %bb.0: 653 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 654 ; AVX-NEXT: retq 655 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4> 656 ret <8 x i16> %shuffle 657 } 658 define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) { 659 ; SSE2-LABEL: shuffle_v8i16_04400000: 660 ; SSE2: # %bb.0: 661 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3] 662 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 663 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 664 ; SSE2-NEXT: retq 665 ; 666 ; SSSE3-LABEL: shuffle_v8i16_04400000: 667 ; SSSE3: # %bb.0: 668 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 669 ; SSSE3-NEXT: retq 670 ; 671 ; SSE41-LABEL: shuffle_v8i16_04400000: 672 ; SSE41: # %bb.0: 673 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 674 ; SSE41-NEXT: retq 675 ; 676 ; AVX-LABEL: shuffle_v8i16_04400000: 677 ; AVX: # %bb.0: 678 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 679 ; AVX-NEXT: retq 680 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0> 681 ret <8 x i16> %shuffle 682 } 683 define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) { 684 ; SSE-LABEL: shuffle_v8i16_04404567: 685 ; SSE: # %bb.0: 686 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 687 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 688 ; SSE-NEXT: retq 689 ; 690 ; AVX1-LABEL: shuffle_v8i16_04404567: 691 ; AVX1: # %bb.0: 692 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 693 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 694 ; AVX1-NEXT: retq 695 ; 696 ; AVX2-SLOW-LABEL: shuffle_v8i16_04404567: 697 ; AVX2-SLOW: # %bb.0: 698 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 699 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 700 ; AVX2-SLOW-NEXT: retq 701 ; 702 ; AVX2-FAST-LABEL: shuffle_v8i16_04404567: 703 ; AVX2-FAST: # %bb.0: 704 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15] 705 ; AVX2-FAST-NEXT: retq 706 ; 707 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_04404567: 708 ; AVX512VL-SLOW: # %bb.0: 709 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 710 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 711 ; AVX512VL-SLOW-NEXT: retq 712 ; 713 ; AVX512VL-FAST-LABEL: shuffle_v8i16_04404567: 714 ; AVX512VL-FAST: # %bb.0: 715 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15] 716 ; AVX512VL-FAST-NEXT: retq 717 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7> 718 ret <8 x i16> %shuffle 719 } 720 721 define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) { 722 ; SSE2-LABEL: shuffle_v8i16_0X444444: 723 ; SSE2: # %bb.0: 724 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 725 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7] 726 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 727 ; SSE2-NEXT: retq 728 ; 729 ; SSSE3-LABEL: shuffle_v8i16_0X444444: 730 ; SSSE3: # %bb.0: 731 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 732 ; SSSE3-NEXT: retq 733 ; 734 ; SSE41-LABEL: shuffle_v8i16_0X444444: 735 ; SSE41: # %bb.0: 736 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 737 ; SSE41-NEXT: retq 738 ; 739 ; AVX-LABEL: shuffle_v8i16_0X444444: 740 ; AVX: # %bb.0: 741 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 742 ; AVX-NEXT: retq 743 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 744 ret <8 x i16> %shuffle 745 } 746 define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) { 747 ; SSE2-LABEL: shuffle_v8i16_44X04444: 748 ; SSE2: # %bb.0: 749 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 750 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7] 751 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 752 ; SSE2-NEXT: retq 753 ; 754 ; SSSE3-LABEL: shuffle_v8i16_44X04444: 755 ; SSSE3: # %bb.0: 756 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 757 ; SSSE3-NEXT: retq 758 ; 759 ; SSE41-LABEL: shuffle_v8i16_44X04444: 760 ; SSE41: # %bb.0: 761 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 762 ; SSE41-NEXT: retq 763 ; 764 ; AVX-LABEL: shuffle_v8i16_44X04444: 765 ; AVX: # %bb.0: 766 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 767 ; AVX-NEXT: retq 768 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4> 769 ret <8 x i16> %shuffle 770 } 771 define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) { 772 ; SSE2-LABEL: shuffle_v8i16_X4404444: 773 ; SSE2: # %bb.0: 774 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 775 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 776 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 777 ; SSE2-NEXT: retq 778 ; 779 ; SSSE3-LABEL: shuffle_v8i16_X4404444: 780 ; SSSE3: # %bb.0: 781 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 782 ; SSSE3-NEXT: retq 783 ; 784 ; SSE41-LABEL: shuffle_v8i16_X4404444: 785 ; SSE41: # %bb.0: 786 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 787 ; SSE41-NEXT: retq 788 ; 789 ; AVX-LABEL: shuffle_v8i16_X4404444: 790 ; AVX: # %bb.0: 791 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 792 ; AVX-NEXT: retq 793 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4> 794 ret <8 x i16> %shuffle 795 } 796 797 define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) { 798 ; SSE2-LABEL: shuffle_v8i16_0127XXXX: 799 ; SSE2: # %bb.0: 800 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 801 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] 802 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 803 ; SSE2-NEXT: retq 804 ; 805 ; SSSE3-LABEL: shuffle_v8i16_0127XXXX: 806 ; SSSE3: # %bb.0: 807 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 808 ; SSSE3-NEXT: retq 809 ; 810 ; SSE41-LABEL: shuffle_v8i16_0127XXXX: 811 ; SSE41: # %bb.0: 812 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 813 ; SSE41-NEXT: retq 814 ; 815 ; AVX-LABEL: shuffle_v8i16_0127XXXX: 816 ; AVX: # %bb.0: 817 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 818 ; AVX-NEXT: retq 819 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 820 ret <8 x i16> %shuffle 821 } 822 823 define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) { 824 ; SSE2-LABEL: shuffle_v8i16_XXXX4563: 825 ; SSE2: # %bb.0: 826 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 827 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] 828 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] 829 ; SSE2-NEXT: retq 830 ; 831 ; SSSE3-LABEL: shuffle_v8i16_XXXX4563: 832 ; SSSE3: # %bb.0: 833 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 834 ; SSSE3-NEXT: retq 835 ; 836 ; SSE41-LABEL: shuffle_v8i16_XXXX4563: 837 ; SSE41: # %bb.0: 838 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 839 ; SSE41-NEXT: retq 840 ; 841 ; AVX-LABEL: shuffle_v8i16_XXXX4563: 842 ; AVX: # %bb.0: 843 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 844 ; AVX-NEXT: retq 845 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3> 846 ret <8 x i16> %shuffle 847 } 848 849 define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) { 850 ; SSE2-LABEL: shuffle_v8i16_4563XXXX: 851 ; SSE2: # %bb.0: 852 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 853 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] 854 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3] 855 ; SSE2-NEXT: retq 856 ; 857 ; SSSE3-LABEL: shuffle_v8i16_4563XXXX: 858 ; SSSE3: # %bb.0: 859 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 860 ; SSSE3-NEXT: retq 861 ; 862 ; SSE41-LABEL: shuffle_v8i16_4563XXXX: 863 ; SSE41: # %bb.0: 864 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 865 ; SSE41-NEXT: retq 866 ; 867 ; AVX-LABEL: shuffle_v8i16_4563XXXX: 868 ; AVX: # %bb.0: 869 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 870 ; AVX-NEXT: retq 871 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 872 ret <8 x i16> %shuffle 873 } 874 875 define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) { 876 ; SSE2-LABEL: shuffle_v8i16_01274563: 877 ; SSE2: # %bb.0: 878 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 879 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] 880 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2] 881 ; SSE2-NEXT: retq 882 ; 883 ; SSSE3-LABEL: shuffle_v8i16_01274563: 884 ; SSSE3: # %bb.0: 885 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 886 ; SSSE3-NEXT: retq 887 ; 888 ; SSE41-LABEL: shuffle_v8i16_01274563: 889 ; SSE41: # %bb.0: 890 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 891 ; SSE41-NEXT: retq 892 ; 893 ; AVX-LABEL: shuffle_v8i16_01274563: 894 ; AVX: # %bb.0: 895 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 896 ; AVX-NEXT: retq 897 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3> 898 ret <8 x i16> %shuffle 899 } 900 901 define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) { 902 ; SSE2-LABEL: shuffle_v8i16_45630127: 903 ; SSE2: # %bb.0: 904 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 905 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 906 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1] 907 ; SSE2-NEXT: retq 908 ; 909 ; SSSE3-LABEL: shuffle_v8i16_45630127: 910 ; SSSE3: # %bb.0: 911 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 912 ; SSSE3-NEXT: retq 913 ; 914 ; SSE41-LABEL: shuffle_v8i16_45630127: 915 ; SSE41: # %bb.0: 916 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 917 ; SSE41-NEXT: retq 918 ; 919 ; AVX-LABEL: shuffle_v8i16_45630127: 920 ; AVX: # %bb.0: 921 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 922 ; AVX-NEXT: retq 923 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7> 924 ret <8 x i16> %shuffle 925 } 926 927 define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) { 928 ; SSE2-LABEL: shuffle_v8i16_37102735: 929 ; SSE2: # %bb.0: 930 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 931 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 932 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 933 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 934 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 935 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6] 936 ; SSE2-NEXT: retq 937 ; 938 ; SSSE3-LABEL: shuffle_v8i16_37102735: 939 ; SSSE3: # %bb.0: 940 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 941 ; SSSE3-NEXT: retq 942 ; 943 ; SSE41-LABEL: shuffle_v8i16_37102735: 944 ; SSE41: # %bb.0: 945 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 946 ; SSE41-NEXT: retq 947 ; 948 ; AVX-LABEL: shuffle_v8i16_37102735: 949 ; AVX: # %bb.0: 950 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 951 ; AVX-NEXT: retq 952 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5> 953 ret <8 x i16> %shuffle 954 } 955 956 define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) { 957 ; SSE-LABEL: shuffle_v8i16_08192a3b: 958 ; SSE: # %bb.0: 959 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 960 ; SSE-NEXT: retq 961 ; 962 ; AVX-LABEL: shuffle_v8i16_08192a3b: 963 ; AVX: # %bb.0: 964 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 965 ; AVX-NEXT: retq 966 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 967 ret <8 x i16> %shuffle 968 } 969 970 define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) { 971 ; SSE-LABEL: shuffle_v8i16_0c1d2e3f: 972 ; SSE: # %bb.0: 973 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 974 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 975 ; SSE-NEXT: retq 976 ; 977 ; AVX-LABEL: shuffle_v8i16_0c1d2e3f: 978 ; AVX: # %bb.0: 979 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 980 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 981 ; AVX-NEXT: retq 982 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15> 983 ret <8 x i16> %shuffle 984 } 985 986 define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) { 987 ; SSE-LABEL: shuffle_v8i16_4c5d6e7f: 988 ; SSE: # %bb.0: 989 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 990 ; SSE-NEXT: retq 991 ; 992 ; AVX-LABEL: shuffle_v8i16_4c5d6e7f: 993 ; AVX: # %bb.0: 994 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 995 ; AVX-NEXT: retq 996 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 997 ret <8 x i16> %shuffle 998 } 999 1000 define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) { 1001 ; SSE-LABEL: shuffle_v8i16_48596a7b: 1002 ; SSE: # %bb.0: 1003 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1004 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1005 ; SSE-NEXT: retq 1006 ; 1007 ; AVX-LABEL: shuffle_v8i16_48596a7b: 1008 ; AVX: # %bb.0: 1009 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1010 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1011 ; AVX-NEXT: retq 1012 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11> 1013 ret <8 x i16> %shuffle 1014 } 1015 1016 define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) { 1017 ; SSE-LABEL: shuffle_v8i16_08196e7f: 1018 ; SSE: # %bb.0: 1019 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] 1020 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 1021 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1022 ; SSE-NEXT: retq 1023 ; 1024 ; AVX-LABEL: shuffle_v8i16_08196e7f: 1025 ; AVX: # %bb.0: 1026 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] 1027 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 1028 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1029 ; AVX-NEXT: retq 1030 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15> 1031 ret <8 x i16> %shuffle 1032 } 1033 1034 define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) { 1035 ; SSE-LABEL: shuffle_v8i16_0c1d6879: 1036 ; SSE: # %bb.0: 1037 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] 1038 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 1039 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1040 ; SSE-NEXT: retq 1041 ; 1042 ; AVX-LABEL: shuffle_v8i16_0c1d6879: 1043 ; AVX: # %bb.0: 1044 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] 1045 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 1046 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1047 ; AVX-NEXT: retq 1048 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9> 1049 ret <8 x i16> %shuffle 1050 } 1051 1052 define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) { 1053 ; SSE-LABEL: shuffle_v8i16_109832ba: 1054 ; SSE: # %bb.0: 1055 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1056 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 1057 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 1058 ; SSE-NEXT: retq 1059 ; 1060 ; AVX1-LABEL: shuffle_v8i16_109832ba: 1061 ; AVX1: # %bb.0: 1062 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1063 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 1064 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 1065 ; AVX1-NEXT: retq 1066 ; 1067 ; AVX2-SLOW-LABEL: shuffle_v8i16_109832ba: 1068 ; AVX2-SLOW: # %bb.0: 1069 ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1070 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 1071 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 1072 ; AVX2-SLOW-NEXT: retq 1073 ; 1074 ; AVX2-FAST-LABEL: shuffle_v8i16_109832ba: 1075 ; AVX2-FAST: # %bb.0: 1076 ; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1077 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11] 1078 ; AVX2-FAST-NEXT: retq 1079 ; 1080 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_109832ba: 1081 ; AVX512VL-SLOW: # %bb.0: 1082 ; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1083 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 1084 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 1085 ; AVX512VL-SLOW-NEXT: retq 1086 ; 1087 ; AVX512VL-FAST-LABEL: shuffle_v8i16_109832ba: 1088 ; AVX512VL-FAST: # %bb.0: 1089 ; AVX512VL-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1090 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11] 1091 ; AVX512VL-FAST-NEXT: retq 1092 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10> 1093 ret <8 x i16> %shuffle 1094 } 1095 1096 define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) { 1097 ; SSE-LABEL: shuffle_v8i16_8091a2b3: 1098 ; SSE: # %bb.0: 1099 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1100 ; SSE-NEXT: movdqa %xmm1, %xmm0 1101 ; SSE-NEXT: retq 1102 ; 1103 ; AVX-LABEL: shuffle_v8i16_8091a2b3: 1104 ; AVX: # %bb.0: 1105 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1106 ; AVX-NEXT: retq 1107 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3> 1108 ret <8 x i16> %shuffle 1109 } 1110 define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) { 1111 ; SSE-LABEL: shuffle_v8i16_c4d5e6f7: 1112 ; SSE: # %bb.0: 1113 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1114 ; SSE-NEXT: movdqa %xmm1, %xmm0 1115 ; SSE-NEXT: retq 1116 ; 1117 ; AVX-LABEL: shuffle_v8i16_c4d5e6f7: 1118 ; AVX: # %bb.0: 1119 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1120 ; AVX-NEXT: retq 1121 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7> 1122 ret <8 x i16> %shuffle 1123 } 1124 1125 define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) { 1126 ; SSE2-LABEL: shuffle_v8i16_0213cedf: 1127 ; SSE2: # %bb.0: 1128 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7] 1129 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7] 1130 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1131 ; SSE2-NEXT: retq 1132 ; 1133 ; SSSE3-LABEL: shuffle_v8i16_0213cedf: 1134 ; SSSE3: # %bb.0: 1135 ; SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7] 1136 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7] 1137 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1138 ; SSSE3-NEXT: retq 1139 ; 1140 ; SSE41-LABEL: shuffle_v8i16_0213cedf: 1141 ; SSE41: # %bb.0: 1142 ; SSE41-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] 1143 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1144 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1145 ; SSE41-NEXT: retq 1146 ; 1147 ; AVX1-LABEL: shuffle_v8i16_0213cedf: 1148 ; AVX1: # %bb.0: 1149 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] 1150 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1151 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1152 ; AVX1-NEXT: retq 1153 ; 1154 ; AVX2-SLOW-LABEL: shuffle_v8i16_0213cedf: 1155 ; AVX2-SLOW: # %bb.0: 1156 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] 1157 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1158 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1159 ; AVX2-SLOW-NEXT: retq 1160 ; 1161 ; AVX2-FAST-LABEL: shuffle_v8i16_0213cedf: 1162 ; AVX2-FAST: # %bb.0: 1163 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15] 1164 ; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1165 ; AVX2-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1166 ; AVX2-FAST-NEXT: retq 1167 ; 1168 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_0213cedf: 1169 ; AVX512VL-SLOW: # %bb.0: 1170 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1171 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] 1172 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1173 ; AVX512VL-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1174 ; AVX512VL-SLOW-NEXT: retq 1175 ; 1176 ; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf: 1177 ; AVX512VL-FAST: # %bb.0: 1178 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,12,13,10,11,14,15] 1179 ; AVX512VL-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1180 ; AVX512VL-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1181 ; AVX512VL-FAST-NEXT: retq 1182 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15> 1183 ret <8 x i16> %shuffle 1184 } 1185 1186 define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) { 1187 ; SSE2-LABEL: shuffle_v8i16_443aXXXX: 1188 ; SSE2: # %bb.0: 1189 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535] 1190 ; SSE2-NEXT: pand %xmm2, %xmm0 1191 ; SSE2-NEXT: pandn %xmm1, %xmm2 1192 ; SSE2-NEXT: por %xmm0, %xmm2 1193 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3] 1194 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1195 ; SSE2-NEXT: retq 1196 ; 1197 ; SSSE3-LABEL: shuffle_v8i16_443aXXXX: 1198 ; SSSE3: # %bb.0: 1199 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u] 1200 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1201 ; SSSE3-NEXT: por %xmm1, %xmm0 1202 ; SSSE3-NEXT: retq 1203 ; 1204 ; SSE41-LABEL: shuffle_v8i16_443aXXXX: 1205 ; SSE41: # %bb.0: 1206 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1207 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1208 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1209 ; SSE41-NEXT: retq 1210 ; 1211 ; AVX1-LABEL: shuffle_v8i16_443aXXXX: 1212 ; AVX1: # %bb.0: 1213 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1214 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1215 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1216 ; AVX1-NEXT: retq 1217 ; 1218 ; AVX2-SLOW-LABEL: shuffle_v8i16_443aXXXX: 1219 ; AVX2-SLOW: # %bb.0: 1220 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1221 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1222 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1223 ; AVX2-SLOW-NEXT: retq 1224 ; 1225 ; AVX2-FAST-LABEL: shuffle_v8i16_443aXXXX: 1226 ; AVX2-FAST: # %bb.0: 1227 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1228 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15] 1229 ; AVX2-FAST-NEXT: retq 1230 ; 1231 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_443aXXXX: 1232 ; AVX512VL-SLOW: # %bb.0: 1233 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1234 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1235 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1236 ; AVX512VL-SLOW-NEXT: retq 1237 ; 1238 ; AVX512VL-FAST-LABEL: shuffle_v8i16_443aXXXX: 1239 ; AVX512VL-FAST: # %bb.0: 1240 ; AVX512VL-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1241 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15] 1242 ; AVX512VL-FAST-NEXT: retq 1243 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef> 1244 ret <8 x i16> %shuffle 1245 } 1246 1247 define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) { 1248 ; SSE2-LABEL: shuffle_v8i16_032dXXXX: 1249 ; SSE2: # %bb.0: 1250 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1251 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,0] 1252 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,6,5,6,7] 1253 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1254 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 1255 ; SSE2-NEXT: retq 1256 ; 1257 ; SSSE3-LABEL: shuffle_v8i16_032dXXXX: 1258 ; SSSE3: # %bb.0: 1259 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u] 1260 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1261 ; SSSE3-NEXT: por %xmm1, %xmm0 1262 ; SSSE3-NEXT: retq 1263 ; 1264 ; SSE41-LABEL: shuffle_v8i16_032dXXXX: 1265 ; SSE41: # %bb.0: 1266 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1267 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1268 ; SSE41-NEXT: retq 1269 ; 1270 ; AVX1-LABEL: shuffle_v8i16_032dXXXX: 1271 ; AVX1: # %bb.0: 1272 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1273 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1274 ; AVX1-NEXT: retq 1275 ; 1276 ; AVX2OR512VL-LABEL: shuffle_v8i16_032dXXXX: 1277 ; AVX2OR512VL: # %bb.0: 1278 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1279 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1280 ; AVX2OR512VL-NEXT: retq 1281 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1282 ret <8 x i16> %shuffle 1283 } 1284 define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) { 1285 ; SSE-LABEL: shuffle_v8i16_XXXdXXXX: 1286 ; SSE: # %bb.0: 1287 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] 1288 ; SSE-NEXT: retq 1289 ; 1290 ; AVX-LABEL: shuffle_v8i16_XXXdXXXX: 1291 ; AVX: # %bb.0: 1292 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,2,3,3] 1293 ; AVX-NEXT: retq 1294 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1295 ret <8 x i16> %shuffle 1296 } 1297 1298 define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) { 1299 ; SSE2-LABEL: shuffle_v8i16_012dXXXX: 1300 ; SSE2: # %bb.0: 1301 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535] 1302 ; SSE2-NEXT: pand %xmm2, %xmm0 1303 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1304 ; SSE2-NEXT: pandn %xmm1, %xmm2 1305 ; SSE2-NEXT: por %xmm2, %xmm0 1306 ; SSE2-NEXT: retq 1307 ; 1308 ; SSSE3-LABEL: shuffle_v8i16_012dXXXX: 1309 ; SSSE3: # %bb.0: 1310 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u] 1311 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1312 ; SSSE3-NEXT: por %xmm1, %xmm0 1313 ; SSSE3-NEXT: retq 1314 ; 1315 ; SSE41-LABEL: shuffle_v8i16_012dXXXX: 1316 ; SSE41: # %bb.0: 1317 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1318 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1319 ; SSE41-NEXT: retq 1320 ; 1321 ; AVX-LABEL: shuffle_v8i16_012dXXXX: 1322 ; AVX: # %bb.0: 1323 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1324 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1325 ; AVX-NEXT: retq 1326 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1327 ret <8 x i16> %shuffle 1328 } 1329 1330 define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) { 1331 ; SSE2-LABEL: shuffle_v8i16_XXXXcde3: 1332 ; SSE2: # %bb.0: 1333 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0] 1334 ; SSE2-NEXT: pand %xmm2, %xmm1 1335 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1336 ; SSE2-NEXT: pandn %xmm0, %xmm2 1337 ; SSE2-NEXT: por %xmm1, %xmm2 1338 ; SSE2-NEXT: movdqa %xmm2, %xmm0 1339 ; SSE2-NEXT: retq 1340 ; 1341 ; SSSE3-LABEL: shuffle_v8i16_XXXXcde3: 1342 ; SSSE3: # %bb.0: 1343 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7] 1344 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero 1345 ; SSSE3-NEXT: por %xmm1, %xmm0 1346 ; SSSE3-NEXT: retq 1347 ; 1348 ; SSE41-LABEL: shuffle_v8i16_XXXXcde3: 1349 ; SSE41: # %bb.0: 1350 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1351 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1352 ; SSE41-NEXT: retq 1353 ; 1354 ; AVX1-LABEL: shuffle_v8i16_XXXXcde3: 1355 ; AVX1: # %bb.0: 1356 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1357 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1358 ; AVX1-NEXT: retq 1359 ; 1360 ; AVX2OR512VL-LABEL: shuffle_v8i16_XXXXcde3: 1361 ; AVX2OR512VL: # %bb.0: 1362 ; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %xmm0 1363 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1364 ; AVX2OR512VL-NEXT: retq 1365 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3> 1366 ret <8 x i16> %shuffle 1367 } 1368 1369 define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) { 1370 ; SSE2-LABEL: shuffle_v8i16_cde3XXXX: 1371 ; SSE2: # %bb.0: 1372 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535] 1373 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1374 ; SSE2-NEXT: pand %xmm2, %xmm1 1375 ; SSE2-NEXT: pandn %xmm0, %xmm2 1376 ; SSE2-NEXT: por %xmm1, %xmm2 1377 ; SSE2-NEXT: movdqa %xmm2, %xmm0 1378 ; SSE2-NEXT: retq 1379 ; 1380 ; SSSE3-LABEL: shuffle_v8i16_cde3XXXX: 1381 ; SSSE3: # %bb.0: 1382 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u] 1383 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u] 1384 ; SSSE3-NEXT: por %xmm1, %xmm0 1385 ; SSSE3-NEXT: retq 1386 ; 1387 ; SSE41-LABEL: shuffle_v8i16_cde3XXXX: 1388 ; SSE41: # %bb.0: 1389 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1390 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 1391 ; SSE41-NEXT: retq 1392 ; 1393 ; AVX-LABEL: shuffle_v8i16_cde3XXXX: 1394 ; AVX: # %bb.0: 1395 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1396 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 1397 ; AVX-NEXT: retq 1398 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1399 ret <8 x i16> %shuffle 1400 } 1401 1402 define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) { 1403 ; SSE2-LABEL: shuffle_v8i16_012dcde3: 1404 ; SSE2: # %bb.0: 1405 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1406 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3,2,1] 1407 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[3,1,2,0,4,5,6,7] 1408 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 1409 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 1410 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7] 1411 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 1412 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7] 1413 ; SSE2-NEXT: retq 1414 ; 1415 ; SSSE3-LABEL: shuffle_v8i16_012dcde3: 1416 ; SSSE3: # %bb.0: 1417 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero 1418 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7] 1419 ; SSSE3-NEXT: por %xmm1, %xmm0 1420 ; SSSE3-NEXT: retq 1421 ; 1422 ; SSE41-LABEL: shuffle_v8i16_012dcde3: 1423 ; SSE41: # %bb.0: 1424 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1425 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1426 ; SSE41-NEXT: retq 1427 ; 1428 ; AVX1-LABEL: shuffle_v8i16_012dcde3: 1429 ; AVX1: # %bb.0: 1430 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1431 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1432 ; AVX1-NEXT: retq 1433 ; 1434 ; AVX2OR512VL-LABEL: shuffle_v8i16_012dcde3: 1435 ; AVX2OR512VL: # %bb.0: 1436 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1437 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1438 ; AVX2OR512VL-NEXT: retq 1439 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3> 1440 ret <8 x i16> %shuffle 1441 } 1442 1443 define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) { 1444 ; SSE2-LABEL: shuffle_v8i16_0923cde7: 1445 ; SSE2: # %bb.0: 1446 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535] 1447 ; SSE2-NEXT: andps %xmm2, %xmm0 1448 ; SSE2-NEXT: andnps %xmm1, %xmm2 1449 ; SSE2-NEXT: orps %xmm2, %xmm0 1450 ; SSE2-NEXT: retq 1451 ; 1452 ; SSSE3-LABEL: shuffle_v8i16_0923cde7: 1453 ; SSSE3: # %bb.0: 1454 ; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535] 1455 ; SSSE3-NEXT: andps %xmm2, %xmm0 1456 ; SSSE3-NEXT: andnps %xmm1, %xmm2 1457 ; SSSE3-NEXT: orps %xmm2, %xmm0 1458 ; SSSE3-NEXT: retq 1459 ; 1460 ; SSE41-LABEL: shuffle_v8i16_0923cde7: 1461 ; SSE41: # %bb.0: 1462 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 1463 ; SSE41-NEXT: retq 1464 ; 1465 ; AVX-LABEL: shuffle_v8i16_0923cde7: 1466 ; AVX: # %bb.0: 1467 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 1468 ; AVX-NEXT: retq 1469 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7> 1470 ret <8 x i16> %shuffle 1471 } 1472 1473 define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) { 1474 ; SSE2-LABEL: shuffle_v8i16_XXX1X579: 1475 ; SSE2: # %bb.0: 1476 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0] 1477 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0] 1478 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1479 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1480 ; SSE2-NEXT: pand %xmm1, %xmm0 1481 ; SSE2-NEXT: pandn %xmm2, %xmm1 1482 ; SSE2-NEXT: por %xmm0, %xmm1 1483 ; SSE2-NEXT: movdqa %xmm1, %xmm0 1484 ; SSE2-NEXT: retq 1485 ; 1486 ; SSSE3-LABEL: shuffle_v8i16_XXX1X579: 1487 ; SSSE3: # %bb.0: 1488 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3] 1489 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero 1490 ; SSSE3-NEXT: por %xmm1, %xmm0 1491 ; SSSE3-NEXT: retq 1492 ; 1493 ; SSE41-LABEL: shuffle_v8i16_XXX1X579: 1494 ; SSE41: # %bb.0: 1495 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 1496 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1497 ; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1498 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1499 ; SSE41-NEXT: retq 1500 ; 1501 ; AVX1-LABEL: shuffle_v8i16_XXX1X579: 1502 ; AVX1: # %bb.0: 1503 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 1504 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1505 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1506 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1507 ; AVX1-NEXT: retq 1508 ; 1509 ; AVX2-SLOW-LABEL: shuffle_v8i16_XXX1X579: 1510 ; AVX2-SLOW: # %bb.0: 1511 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1 1512 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1513 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1514 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1515 ; AVX2-SLOW-NEXT: retq 1516 ; 1517 ; AVX2-FAST-LABEL: shuffle_v8i16_XXX1X579: 1518 ; AVX2-FAST: # %bb.0: 1519 ; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1 1520 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15] 1521 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1522 ; AVX2-FAST-NEXT: retq 1523 ; 1524 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXX1X579: 1525 ; AVX512VL-SLOW: # %bb.0: 1526 ; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1 1527 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1528 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1529 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1530 ; AVX512VL-SLOW-NEXT: retq 1531 ; 1532 ; AVX512VL-FAST-LABEL: shuffle_v8i16_XXX1X579: 1533 ; AVX512VL-FAST: # %bb.0: 1534 ; AVX512VL-FAST-NEXT: vpbroadcastd %xmm1, %xmm1 1535 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15] 1536 ; AVX512VL-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1537 ; AVX512VL-FAST-NEXT: retq 1538 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9> 1539 ret <8 x i16> %shuffle 1540 } 1541 1542 define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { 1543 ; SSE2-LABEL: shuffle_v8i16_XX4X8acX: 1544 ; SSE2: # %bb.0: 1545 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 1546 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 1547 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7] 1548 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2],xmm1[2,3] 1549 ; SSE2-NEXT: retq 1550 ; 1551 ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX: 1552 ; SSSE3: # %bb.0: 1553 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u] 1554 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u] 1555 ; SSSE3-NEXT: por %xmm1, %xmm0 1556 ; SSSE3-NEXT: retq 1557 ; 1558 ; SSE41-LABEL: shuffle_v8i16_XX4X8acX: 1559 ; SSE41: # %bb.0: 1560 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1561 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1562 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1563 ; SSE41-NEXT: retq 1564 ; 1565 ; AVX1-LABEL: shuffle_v8i16_XX4X8acX: 1566 ; AVX1: # %bb.0: 1567 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1568 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1569 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1570 ; AVX1-NEXT: retq 1571 ; 1572 ; AVX2OR512VL-LABEL: shuffle_v8i16_XX4X8acX: 1573 ; AVX2OR512VL: # %bb.0: 1574 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1575 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1576 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1577 ; AVX2OR512VL-NEXT: retq 1578 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef> 1579 ret <8 x i16> %shuffle 1580 } 1581 1582 define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) { 1583 ; SSE-LABEL: shuffle_v8i16_8zzzzzzz: 1584 ; SSE: # %bb.0: 1585 ; SSE-NEXT: movzwl %di, %eax 1586 ; SSE-NEXT: movd %eax, %xmm0 1587 ; SSE-NEXT: retq 1588 ; 1589 ; AVX-LABEL: shuffle_v8i16_8zzzzzzz: 1590 ; AVX: # %bb.0: 1591 ; AVX-NEXT: movzwl %di, %eax 1592 ; AVX-NEXT: vmovd %eax, %xmm0 1593 ; AVX-NEXT: retq 1594 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1595 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1596 ret <8 x i16> %shuffle 1597 } 1598 1599 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) { 1600 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz: 1601 ; SSE: # %bb.0: 1602 ; SSE-NEXT: pxor %xmm0, %xmm0 1603 ; SSE-NEXT: pinsrw $1, %edi, %xmm0 1604 ; SSE-NEXT: retq 1605 ; 1606 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz: 1607 ; AVX: # %bb.0: 1608 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1609 ; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 1610 ; AVX-NEXT: retq 1611 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1612 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3> 1613 ret <8 x i16> %shuffle 1614 } 1615 1616 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) { 1617 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz: 1618 ; SSE: # %bb.0: 1619 ; SSE-NEXT: pxor %xmm0, %xmm0 1620 ; SSE-NEXT: pinsrw $5, %edi, %xmm0 1621 ; SSE-NEXT: retq 1622 ; 1623 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz: 1624 ; AVX: # %bb.0: 1625 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1626 ; AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 1627 ; AVX-NEXT: retq 1628 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1629 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0> 1630 ret <8 x i16> %shuffle 1631 } 1632 1633 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) { 1634 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8: 1635 ; SSE: # %bb.0: 1636 ; SSE-NEXT: pxor %xmm0, %xmm0 1637 ; SSE-NEXT: pinsrw $7, %edi, %xmm0 1638 ; SSE-NEXT: retq 1639 ; 1640 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8: 1641 ; AVX: # %bb.0: 1642 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1643 ; AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 1644 ; AVX-NEXT: retq 1645 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1646 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8> 1647 ret <8 x i16> %shuffle 1648 } 1649 1650 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) { 1651 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz: 1652 ; SSE: # %bb.0: 1653 ; SSE-NEXT: pxor %xmm0, %xmm0 1654 ; SSE-NEXT: pinsrw $2, %edi, %xmm0 1655 ; SSE-NEXT: retq 1656 ; 1657 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz: 1658 ; AVX: # %bb.0: 1659 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1660 ; AVX-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0 1661 ; AVX-NEXT: retq 1662 %a = insertelement <8 x i16> undef, i16 %i, i32 3 1663 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7> 1664 ret <8 x i16> %shuffle 1665 } 1666 1667 define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) { 1668 ; SSE2-LABEL: shuffle_v8i16_def01234: 1669 ; SSE2: # %bb.0: 1670 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1671 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1672 ; SSE2-NEXT: por %xmm1, %xmm0 1673 ; SSE2-NEXT: retq 1674 ; 1675 ; SSSE3-LABEL: shuffle_v8i16_def01234: 1676 ; SSSE3: # %bb.0: 1677 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1678 ; SSSE3-NEXT: retq 1679 ; 1680 ; SSE41-LABEL: shuffle_v8i16_def01234: 1681 ; SSE41: # %bb.0: 1682 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1683 ; SSE41-NEXT: retq 1684 ; 1685 ; AVX-LABEL: shuffle_v8i16_def01234: 1686 ; AVX: # %bb.0: 1687 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1688 ; AVX-NEXT: retq 1689 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4> 1690 ret <8 x i16> %shuffle 1691 } 1692 1693 define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) { 1694 ; SSE2-LABEL: shuffle_v8i16_ueuu123u: 1695 ; SSE2: # %bb.0: 1696 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1697 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1698 ; SSE2-NEXT: por %xmm1, %xmm0 1699 ; SSE2-NEXT: retq 1700 ; 1701 ; SSSE3-LABEL: shuffle_v8i16_ueuu123u: 1702 ; SSSE3: # %bb.0: 1703 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1704 ; SSSE3-NEXT: retq 1705 ; 1706 ; SSE41-LABEL: shuffle_v8i16_ueuu123u: 1707 ; SSE41: # %bb.0: 1708 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1709 ; SSE41-NEXT: retq 1710 ; 1711 ; AVX-LABEL: shuffle_v8i16_ueuu123u: 1712 ; AVX: # %bb.0: 1713 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1714 ; AVX-NEXT: retq 1715 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1716 ret <8 x i16> %shuffle 1717 } 1718 1719 define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) { 1720 ; SSE2-LABEL: shuffle_v8i16_56701234: 1721 ; SSE2: # %bb.0: 1722 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1723 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1724 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1725 ; SSE2-NEXT: por %xmm1, %xmm0 1726 ; SSE2-NEXT: retq 1727 ; 1728 ; SSSE3-LABEL: shuffle_v8i16_56701234: 1729 ; SSSE3: # %bb.0: 1730 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1731 ; SSSE3-NEXT: retq 1732 ; 1733 ; SSE41-LABEL: shuffle_v8i16_56701234: 1734 ; SSE41: # %bb.0: 1735 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1736 ; SSE41-NEXT: retq 1737 ; 1738 ; AVX-LABEL: shuffle_v8i16_56701234: 1739 ; AVX: # %bb.0: 1740 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1741 ; AVX-NEXT: retq 1742 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4> 1743 ret <8 x i16> %shuffle 1744 } 1745 1746 define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) { 1747 ; SSE2-LABEL: shuffle_v8i16_u6uu123u: 1748 ; SSE2: # %bb.0: 1749 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1750 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1751 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1752 ; SSE2-NEXT: por %xmm1, %xmm0 1753 ; SSE2-NEXT: retq 1754 ; 1755 ; SSSE3-LABEL: shuffle_v8i16_u6uu123u: 1756 ; SSSE3: # %bb.0: 1757 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1758 ; SSSE3-NEXT: retq 1759 ; 1760 ; SSE41-LABEL: shuffle_v8i16_u6uu123u: 1761 ; SSE41: # %bb.0: 1762 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1763 ; SSE41-NEXT: retq 1764 ; 1765 ; AVX-LABEL: shuffle_v8i16_u6uu123u: 1766 ; AVX: # %bb.0: 1767 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1768 ; AVX-NEXT: retq 1769 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1770 ret <8 x i16> %shuffle 1771 } 1772 1773 define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) { 1774 ; SSE-LABEL: shuffle_v8i16_uuuu123u: 1775 ; SSE: # %bb.0: 1776 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1777 ; SSE-NEXT: retq 1778 ; 1779 ; AVX-LABEL: shuffle_v8i16_uuuu123u: 1780 ; AVX: # %bb.0: 1781 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1782 ; AVX-NEXT: retq 1783 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1784 ret <8 x i16> %shuffle 1785 } 1786 1787 define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) { 1788 ; SSE2-LABEL: shuffle_v8i16_bcdef012: 1789 ; SSE2: # %bb.0: 1790 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1791 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1792 ; SSE2-NEXT: por %xmm1, %xmm0 1793 ; SSE2-NEXT: retq 1794 ; 1795 ; SSSE3-LABEL: shuffle_v8i16_bcdef012: 1796 ; SSSE3: # %bb.0: 1797 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1798 ; SSSE3-NEXT: retq 1799 ; 1800 ; SSE41-LABEL: shuffle_v8i16_bcdef012: 1801 ; SSE41: # %bb.0: 1802 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1803 ; SSE41-NEXT: retq 1804 ; 1805 ; AVX-LABEL: shuffle_v8i16_bcdef012: 1806 ; AVX: # %bb.0: 1807 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1808 ; AVX-NEXT: retq 1809 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2> 1810 ret <8 x i16> %shuffle 1811 } 1812 1813 define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) { 1814 ; SSE2-LABEL: shuffle_v8i16_ucdeuu1u: 1815 ; SSE2: # %bb.0: 1816 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1817 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1818 ; SSE2-NEXT: por %xmm1, %xmm0 1819 ; SSE2-NEXT: retq 1820 ; 1821 ; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u: 1822 ; SSSE3: # %bb.0: 1823 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1824 ; SSSE3-NEXT: retq 1825 ; 1826 ; SSE41-LABEL: shuffle_v8i16_ucdeuu1u: 1827 ; SSE41: # %bb.0: 1828 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1829 ; SSE41-NEXT: retq 1830 ; 1831 ; AVX-LABEL: shuffle_v8i16_ucdeuu1u: 1832 ; AVX: # %bb.0: 1833 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1834 ; AVX-NEXT: retq 1835 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef> 1836 ret <8 x i16> %shuffle 1837 } 1838 1839 define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) { 1840 ; SSE2-LABEL: shuffle_v8i16_34567012: 1841 ; SSE2: # %bb.0: 1842 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1843 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1844 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1845 ; SSE2-NEXT: por %xmm1, %xmm0 1846 ; SSE2-NEXT: retq 1847 ; 1848 ; SSSE3-LABEL: shuffle_v8i16_34567012: 1849 ; SSSE3: # %bb.0: 1850 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1851 ; SSSE3-NEXT: retq 1852 ; 1853 ; SSE41-LABEL: shuffle_v8i16_34567012: 1854 ; SSE41: # %bb.0: 1855 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1856 ; SSE41-NEXT: retq 1857 ; 1858 ; AVX-LABEL: shuffle_v8i16_34567012: 1859 ; AVX: # %bb.0: 1860 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1861 ; AVX-NEXT: retq 1862 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2> 1863 ret <8 x i16> %shuffle 1864 } 1865 1866 define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) { 1867 ; SSE2-LABEL: shuffle_v8i16_u456uu1u: 1868 ; SSE2: # %bb.0: 1869 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1870 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1871 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1872 ; SSE2-NEXT: por %xmm1, %xmm0 1873 ; SSE2-NEXT: retq 1874 ; 1875 ; SSSE3-LABEL: shuffle_v8i16_u456uu1u: 1876 ; SSSE3: # %bb.0: 1877 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1878 ; SSSE3-NEXT: retq 1879 ; 1880 ; SSE41-LABEL: shuffle_v8i16_u456uu1u: 1881 ; SSE41: # %bb.0: 1882 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1883 ; SSE41-NEXT: retq 1884 ; 1885 ; AVX-LABEL: shuffle_v8i16_u456uu1u: 1886 ; AVX: # %bb.0: 1887 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1888 ; AVX-NEXT: retq 1889 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef> 1890 ret <8 x i16> %shuffle 1891 } 1892 1893 define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) { 1894 ; SSE-LABEL: shuffle_v8i16_u456uuuu: 1895 ; SSE: # %bb.0: 1896 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1897 ; SSE-NEXT: retq 1898 ; 1899 ; AVX-LABEL: shuffle_v8i16_u456uuuu: 1900 ; AVX: # %bb.0: 1901 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1902 ; AVX-NEXT: retq 1903 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef> 1904 ret <8 x i16> %shuffle 1905 } 1906 1907 define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) { 1908 ; SSE2-LABEL: shuffle_v8i16_3456789a: 1909 ; SSE2: # %bb.0: 1910 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1911 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5] 1912 ; SSE2-NEXT: por %xmm1, %xmm0 1913 ; SSE2-NEXT: retq 1914 ; 1915 ; SSSE3-LABEL: shuffle_v8i16_3456789a: 1916 ; SSSE3: # %bb.0: 1917 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1918 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1919 ; SSSE3-NEXT: retq 1920 ; 1921 ; SSE41-LABEL: shuffle_v8i16_3456789a: 1922 ; SSE41: # %bb.0: 1923 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1924 ; SSE41-NEXT: movdqa %xmm1, %xmm0 1925 ; SSE41-NEXT: retq 1926 ; 1927 ; AVX-LABEL: shuffle_v8i16_3456789a: 1928 ; AVX: # %bb.0: 1929 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1930 ; AVX-NEXT: retq 1931 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10> 1932 ret <8 x i16> %shuffle 1933 } 1934 1935 define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) { 1936 ; SSE2-LABEL: shuffle_v8i16_u456uu9u: 1937 ; SSE2: # %bb.0: 1938 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1939 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5] 1940 ; SSE2-NEXT: por %xmm1, %xmm0 1941 ; SSE2-NEXT: retq 1942 ; 1943 ; SSSE3-LABEL: shuffle_v8i16_u456uu9u: 1944 ; SSSE3: # %bb.0: 1945 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1946 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1947 ; SSSE3-NEXT: retq 1948 ; 1949 ; SSE41-LABEL: shuffle_v8i16_u456uu9u: 1950 ; SSE41: # %bb.0: 1951 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1952 ; SSE41-NEXT: movdqa %xmm1, %xmm0 1953 ; SSE41-NEXT: retq 1954 ; 1955 ; AVX-LABEL: shuffle_v8i16_u456uu9u: 1956 ; AVX: # %bb.0: 1957 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1958 ; AVX-NEXT: retq 1959 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef> 1960 ret <8 x i16> %shuffle 1961 } 1962 1963 define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) { 1964 ; SSE2-LABEL: shuffle_v8i16_56789abc: 1965 ; SSE2: # %bb.0: 1966 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1967 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 1968 ; SSE2-NEXT: por %xmm1, %xmm0 1969 ; SSE2-NEXT: retq 1970 ; 1971 ; SSSE3-LABEL: shuffle_v8i16_56789abc: 1972 ; SSSE3: # %bb.0: 1973 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1974 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1975 ; SSSE3-NEXT: retq 1976 ; 1977 ; SSE41-LABEL: shuffle_v8i16_56789abc: 1978 ; SSE41: # %bb.0: 1979 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1980 ; SSE41-NEXT: movdqa %xmm1, %xmm0 1981 ; SSE41-NEXT: retq 1982 ; 1983 ; AVX-LABEL: shuffle_v8i16_56789abc: 1984 ; AVX: # %bb.0: 1985 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1986 ; AVX-NEXT: retq 1987 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> 1988 ret <8 x i16> %shuffle 1989 } 1990 1991 define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) { 1992 ; SSE2-LABEL: shuffle_v8i16_u6uu9abu: 1993 ; SSE2: # %bb.0: 1994 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1995 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 1996 ; SSE2-NEXT: por %xmm1, %xmm0 1997 ; SSE2-NEXT: retq 1998 ; 1999 ; SSSE3-LABEL: shuffle_v8i16_u6uu9abu: 2000 ; SSSE3: # %bb.0: 2001 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 2002 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 2003 ; SSSE3-NEXT: retq 2004 ; 2005 ; SSE41-LABEL: shuffle_v8i16_u6uu9abu: 2006 ; SSE41: # %bb.0: 2007 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 2008 ; SSE41-NEXT: movdqa %xmm1, %xmm0 2009 ; SSE41-NEXT: retq 2010 ; 2011 ; AVX-LABEL: shuffle_v8i16_u6uu9abu: 2012 ; AVX: # %bb.0: 2013 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 2014 ; AVX-NEXT: retq 2015 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 2016 ret <8 x i16> %shuffle 2017 } 2018 2019 define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) { 2020 ; SSE2-LABEL: shuffle_v8i16_0uuu1uuu: 2021 ; SSE2: # %bb.0: 2022 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 2023 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] 2024 ; SSE2-NEXT: retq 2025 ; 2026 ; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu: 2027 ; SSSE3: # %bb.0: 2028 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 2029 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] 2030 ; SSSE3-NEXT: retq 2031 ; 2032 ; SSE41-LABEL: shuffle_v8i16_0uuu1uuu: 2033 ; SSE41: # %bb.0: 2034 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 2035 ; SSE41-NEXT: retq 2036 ; 2037 ; AVX-LABEL: shuffle_v8i16_0uuu1uuu: 2038 ; AVX: # %bb.0: 2039 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 2040 ; AVX-NEXT: retq 2041 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef> 2042 ret <8 x i16> %shuffle 2043 } 2044 2045 define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) { 2046 ; SSE2-LABEL: shuffle_v8i16_0zzz1zzz: 2047 ; SSE2: # %bb.0: 2048 ; SSE2-NEXT: pxor %xmm1, %xmm1 2049 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2050 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2051 ; SSE2-NEXT: retq 2052 ; 2053 ; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz: 2054 ; SSSE3: # %bb.0: 2055 ; SSSE3-NEXT: pxor %xmm1, %xmm1 2056 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2057 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2058 ; SSSE3-NEXT: retq 2059 ; 2060 ; SSE41-LABEL: shuffle_v8i16_0zzz1zzz: 2061 ; SSE41: # %bb.0: 2062 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 2063 ; SSE41-NEXT: retq 2064 ; 2065 ; AVX-LABEL: shuffle_v8i16_0zzz1zzz: 2066 ; AVX: # %bb.0: 2067 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 2068 ; AVX-NEXT: retq 2069 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 2070 ret <8 x i16> %shuffle 2071 } 2072 2073 define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) { 2074 ; SSE2-LABEL: shuffle_v8i16_0u1u2u3u: 2075 ; SSE2: # %bb.0: 2076 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2077 ; SSE2-NEXT: retq 2078 ; 2079 ; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u: 2080 ; SSSE3: # %bb.0: 2081 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2082 ; SSSE3-NEXT: retq 2083 ; 2084 ; SSE41-LABEL: shuffle_v8i16_0u1u2u3u: 2085 ; SSE41: # %bb.0: 2086 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2087 ; SSE41-NEXT: retq 2088 ; 2089 ; AVX-LABEL: shuffle_v8i16_0u1u2u3u: 2090 ; AVX: # %bb.0: 2091 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2092 ; AVX-NEXT: retq 2093 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef> 2094 ret <8 x i16> %shuffle 2095 } 2096 2097 define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) { 2098 ; SSE2-LABEL: shuffle_v8i16_0z1z2z3z: 2099 ; SSE2: # %bb.0: 2100 ; SSE2-NEXT: pxor %xmm1, %xmm1 2101 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2102 ; SSE2-NEXT: retq 2103 ; 2104 ; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z: 2105 ; SSSE3: # %bb.0: 2106 ; SSSE3-NEXT: pxor %xmm1, %xmm1 2107 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2108 ; SSSE3-NEXT: retq 2109 ; 2110 ; SSE41-LABEL: shuffle_v8i16_0z1z2z3z: 2111 ; SSE41: # %bb.0: 2112 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2113 ; SSE41-NEXT: retq 2114 ; 2115 ; AVX-LABEL: shuffle_v8i16_0z1z2z3z: 2116 ; AVX: # %bb.0: 2117 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2118 ; AVX-NEXT: retq 2119 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 2120 ret <8 x i16> %shuffle 2121 } 2122 2123 define <8 x i16> @shuffle_v8i16_01100110(<8 x i16> %a) { 2124 ; SSE-LABEL: shuffle_v8i16_01100110: 2125 ; SSE: # %bb.0: 2126 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2127 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2128 ; SSE-NEXT: retq 2129 ; 2130 ; AVX1-LABEL: shuffle_v8i16_01100110: 2131 ; AVX1: # %bb.0: 2132 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2133 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2134 ; AVX1-NEXT: retq 2135 ; 2136 ; AVX2-SLOW-LABEL: shuffle_v8i16_01100110: 2137 ; AVX2-SLOW: # %bb.0: 2138 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2139 ; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0 2140 ; AVX2-SLOW-NEXT: retq 2141 ; 2142 ; AVX2-FAST-LABEL: shuffle_v8i16_01100110: 2143 ; AVX2-FAST: # %bb.0: 2144 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] 2145 ; AVX2-FAST-NEXT: retq 2146 ; 2147 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_01100110: 2148 ; AVX512VL-SLOW: # %bb.0: 2149 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2150 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2151 ; AVX512VL-SLOW-NEXT: retq 2152 ; 2153 ; AVX512VL-FAST-LABEL: shuffle_v8i16_01100110: 2154 ; AVX512VL-FAST: # %bb.0: 2155 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] 2156 ; AVX512VL-FAST-NEXT: retq 2157 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0> 2158 ret <8 x i16> %shuffle 2159 } 2160 2161 define <8 x i16> @shuffle_v8i16_01u0u110(<8 x i16> %a) { 2162 ; SSE-LABEL: shuffle_v8i16_01u0u110: 2163 ; SSE: # %bb.0: 2164 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2165 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2166 ; SSE-NEXT: retq 2167 ; 2168 ; AVX1-LABEL: shuffle_v8i16_01u0u110: 2169 ; AVX1: # %bb.0: 2170 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2171 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2172 ; AVX1-NEXT: retq 2173 ; 2174 ; AVX2-SLOW-LABEL: shuffle_v8i16_01u0u110: 2175 ; AVX2-SLOW: # %bb.0: 2176 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2177 ; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0 2178 ; AVX2-SLOW-NEXT: retq 2179 ; 2180 ; AVX2-FAST-LABEL: shuffle_v8i16_01u0u110: 2181 ; AVX2-FAST: # %bb.0: 2182 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] 2183 ; AVX2-FAST-NEXT: retq 2184 ; 2185 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_01u0u110: 2186 ; AVX512VL-SLOW: # %bb.0: 2187 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7] 2188 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2189 ; AVX512VL-SLOW-NEXT: retq 2190 ; 2191 ; AVX512VL-FAST-LABEL: shuffle_v8i16_01u0u110: 2192 ; AVX512VL-FAST: # %bb.0: 2193 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1] 2194 ; AVX512VL-FAST-NEXT: retq 2195 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 1, i32 0> 2196 ret <8 x i16> %shuffle 2197 } 2198 2199 define <8 x i16> @shuffle_v8i16_467uu675(<8 x i16> %a) { 2200 ; SSE-LABEL: shuffle_v8i16_467uu675: 2201 ; SSE: # %bb.0: 2202 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] 2203 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2204 ; SSE-NEXT: retq 2205 ; 2206 ; AVX1-LABEL: shuffle_v8i16_467uu675: 2207 ; AVX1: # %bb.0: 2208 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] 2209 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2210 ; AVX1-NEXT: retq 2211 ; 2212 ; AVX2-SLOW-LABEL: shuffle_v8i16_467uu675: 2213 ; AVX2-SLOW: # %bb.0: 2214 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] 2215 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2216 ; AVX2-SLOW-NEXT: retq 2217 ; 2218 ; AVX2-FAST-LABEL: shuffle_v8i16_467uu675: 2219 ; AVX2-FAST: # %bb.0: 2220 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11] 2221 ; AVX2-FAST-NEXT: retq 2222 ; 2223 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_467uu675: 2224 ; AVX512VL-SLOW: # %bb.0: 2225 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5] 2226 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2227 ; AVX512VL-SLOW-NEXT: retq 2228 ; 2229 ; AVX512VL-FAST-LABEL: shuffle_v8i16_467uu675: 2230 ; AVX512VL-FAST: # %bb.0: 2231 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,10,11,8,9,12,13,14,15,10,11] 2232 ; AVX512VL-FAST-NEXT: retq 2233 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5> 2234 ret <8 x i16> %shuffle 2235 } 2236 2237 ; 2238 ; Shuffle to logical bit shifts 2239 ; 2240 define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) { 2241 ; SSE-LABEL: shuffle_v8i16_z0z2z4z6: 2242 ; SSE: # %bb.0: 2243 ; SSE-NEXT: pslld $16, %xmm0 2244 ; SSE-NEXT: retq 2245 ; 2246 ; AVX-LABEL: shuffle_v8i16_z0z2z4z6: 2247 ; AVX: # %bb.0: 2248 ; AVX-NEXT: vpslld $16, %xmm0, %xmm0 2249 ; AVX-NEXT: retq 2250 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6> 2251 ret <8 x i16> %shuffle 2252 } 2253 2254 define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) { 2255 ; SSE-LABEL: shuffle_v8i16_zzz0zzz4: 2256 ; SSE: # %bb.0: 2257 ; SSE-NEXT: psllq $48, %xmm0 2258 ; SSE-NEXT: retq 2259 ; 2260 ; AVX-LABEL: shuffle_v8i16_zzz0zzz4: 2261 ; AVX: # %bb.0: 2262 ; AVX-NEXT: vpsllq $48, %xmm0, %xmm0 2263 ; AVX-NEXT: retq 2264 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4> 2265 ret <8 x i16> %shuffle 2266 } 2267 2268 define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) { 2269 ; SSE-LABEL: shuffle_v8i16_zz01zX4X: 2270 ; SSE: # %bb.0: 2271 ; SSE-NEXT: psllq $32, %xmm0 2272 ; SSE-NEXT: retq 2273 ; 2274 ; AVX-LABEL: shuffle_v8i16_zz01zX4X: 2275 ; AVX: # %bb.0: 2276 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 2277 ; AVX-NEXT: retq 2278 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef> 2279 ret <8 x i16> %shuffle 2280 } 2281 2282 define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) { 2283 ; SSE-LABEL: shuffle_v8i16_z0X2z456: 2284 ; SSE: # %bb.0: 2285 ; SSE-NEXT: psllq $16, %xmm0 2286 ; SSE-NEXT: retq 2287 ; 2288 ; AVX-LABEL: shuffle_v8i16_z0X2z456: 2289 ; AVX: # %bb.0: 2290 ; AVX-NEXT: vpsllq $16, %xmm0, %xmm0 2291 ; AVX-NEXT: retq 2292 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6> 2293 ret <8 x i16> %shuffle 2294 } 2295 2296 define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) { 2297 ; SSE-LABEL: shuffle_v8i16_1z3zXz7z: 2298 ; SSE: # %bb.0: 2299 ; SSE-NEXT: psrld $16, %xmm0 2300 ; SSE-NEXT: retq 2301 ; 2302 ; AVX-LABEL: shuffle_v8i16_1z3zXz7z: 2303 ; AVX: # %bb.0: 2304 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 2305 ; AVX-NEXT: retq 2306 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8> 2307 ret <8 x i16> %shuffle 2308 } 2309 2310 define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) { 2311 ; SSE-LABEL: shuffle_v8i16_1X3z567z: 2312 ; SSE: # %bb.0: 2313 ; SSE-NEXT: psrlq $16, %xmm0 2314 ; SSE-NEXT: retq 2315 ; 2316 ; AVX-LABEL: shuffle_v8i16_1X3z567z: 2317 ; AVX: # %bb.0: 2318 ; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0 2319 ; AVX-NEXT: retq 2320 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8> 2321 ret <8 x i16> %shuffle 2322 } 2323 2324 define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) { 2325 ; SSE-LABEL: shuffle_v8i16_23zz67zz: 2326 ; SSE: # %bb.0: 2327 ; SSE-NEXT: psrlq $32, %xmm0 2328 ; SSE-NEXT: retq 2329 ; 2330 ; AVX-LABEL: shuffle_v8i16_23zz67zz: 2331 ; AVX: # %bb.0: 2332 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 2333 ; AVX-NEXT: retq 2334 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8> 2335 ret <8 x i16> %shuffle 2336 } 2337 2338 define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) { 2339 ; SSE-LABEL: shuffle_v8i16_3zXXXzzz: 2340 ; SSE: # %bb.0: 2341 ; SSE-NEXT: psrlq $48, %xmm0 2342 ; SSE-NEXT: retq 2343 ; 2344 ; AVX-LABEL: shuffle_v8i16_3zXXXzzz: 2345 ; AVX: # %bb.0: 2346 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0 2347 ; AVX-NEXT: retq 2348 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8> 2349 ret <8 x i16> %shuffle 2350 } 2351 2352 define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) { 2353 ; SSE-LABEL: shuffle_v8i16_01u3zzuz: 2354 ; SSE: # %bb.0: 2355 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 2356 ; SSE-NEXT: retq 2357 ; 2358 ; AVX-LABEL: shuffle_v8i16_01u3zzuz: 2359 ; AVX: # %bb.0: 2360 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2361 ; AVX-NEXT: retq 2362 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8> 2363 ret <8 x i16> %shuffle 2364 } 2365 2366 define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) { 2367 ; SSE2-LABEL: shuffle_v8i16_0z234567: 2368 ; SSE2: # %bb.0: 2369 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2370 ; SSE2-NEXT: retq 2371 ; 2372 ; SSSE3-LABEL: shuffle_v8i16_0z234567: 2373 ; SSSE3: # %bb.0: 2374 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2375 ; SSSE3-NEXT: retq 2376 ; 2377 ; SSE41-LABEL: shuffle_v8i16_0z234567: 2378 ; SSE41: # %bb.0: 2379 ; SSE41-NEXT: pxor %xmm1, %xmm1 2380 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 2381 ; SSE41-NEXT: retq 2382 ; 2383 ; AVX-LABEL: shuffle_v8i16_0z234567: 2384 ; AVX: # %bb.0: 2385 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2386 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 2387 ; AVX-NEXT: retq 2388 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2389 ret <8 x i16> %shuffle 2390 } 2391 2392 define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) { 2393 ; SSE2-LABEL: shuffle_v8i16_0zzzz5z7: 2394 ; SSE2: # %bb.0: 2395 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2396 ; SSE2-NEXT: retq 2397 ; 2398 ; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7: 2399 ; SSSE3: # %bb.0: 2400 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2401 ; SSSE3-NEXT: retq 2402 ; 2403 ; SSE41-LABEL: shuffle_v8i16_0zzzz5z7: 2404 ; SSE41: # %bb.0: 2405 ; SSE41-NEXT: pxor %xmm1, %xmm1 2406 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7] 2407 ; SSE41-NEXT: retq 2408 ; 2409 ; AVX-LABEL: shuffle_v8i16_0zzzz5z7: 2410 ; AVX: # %bb.0: 2411 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2412 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7] 2413 ; AVX-NEXT: retq 2414 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7> 2415 ret <8 x i16> %shuffle 2416 } 2417 2418 define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) { 2419 ; SSE2-LABEL: shuffle_v8i16_0123456z: 2420 ; SSE2: # %bb.0: 2421 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2422 ; SSE2-NEXT: retq 2423 ; 2424 ; SSSE3-LABEL: shuffle_v8i16_0123456z: 2425 ; SSSE3: # %bb.0: 2426 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2427 ; SSSE3-NEXT: retq 2428 ; 2429 ; SSE41-LABEL: shuffle_v8i16_0123456z: 2430 ; SSE41: # %bb.0: 2431 ; SSE41-NEXT: pxor %xmm1, %xmm1 2432 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7] 2433 ; SSE41-NEXT: retq 2434 ; 2435 ; AVX-LABEL: shuffle_v8i16_0123456z: 2436 ; AVX: # %bb.0: 2437 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2438 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7] 2439 ; AVX-NEXT: retq 2440 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15> 2441 ret <8 x i16> %shuffle 2442 } 2443 2444 define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) { 2445 ; SSE-LABEL: shuffle_v8i16_fu3ucc5u: 2446 ; SSE: # %bb.0: 2447 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2448 ; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 2449 ; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2450 ; SSE-NEXT: movdqa %xmm1, %xmm0 2451 ; SSE-NEXT: retq 2452 ; 2453 ; AVX-LABEL: shuffle_v8i16_fu3ucc5u: 2454 ; AVX: # %bb.0: 2455 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2456 ; AVX-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 2457 ; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2458 ; AVX-NEXT: retq 2459 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef> 2460 ret <8 x i16> %shuffle 2461 } 2462 2463 define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) { 2464 ; SSE-LABEL: shuffle_v8i16_8012345u: 2465 ; SSE: # %bb.0: 2466 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2467 ; SSE-NEXT: retq 2468 ; 2469 ; AVX-LABEL: shuffle_v8i16_8012345u: 2470 ; AVX: # %bb.0: 2471 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2472 ; AVX-NEXT: retq 2473 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef> 2474 2475 ret <8 x i16> %shuffle 2476 } 2477 2478 define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) { 2479 ; SSE2-LABEL: mask_v8i16_012345ef: 2480 ; SSE2: # %bb.0: 2481 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 2482 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 2483 ; SSE2-NEXT: movaps %xmm1, %xmm0 2484 ; SSE2-NEXT: retq 2485 ; 2486 ; SSSE3-LABEL: mask_v8i16_012345ef: 2487 ; SSSE3: # %bb.0: 2488 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 2489 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 2490 ; SSSE3-NEXT: movaps %xmm1, %xmm0 2491 ; SSSE3-NEXT: retq 2492 ; 2493 ; SSE41-LABEL: mask_v8i16_012345ef: 2494 ; SSE41: # %bb.0: 2495 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 2496 ; SSE41-NEXT: retq 2497 ; 2498 ; AVX-LABEL: mask_v8i16_012345ef: 2499 ; AVX: # %bb.0: 2500 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 2501 ; AVX-NEXT: retq 2502 %1 = bitcast <8 x i16> %a to <2 x i64> 2503 %2 = bitcast <8 x i16> %b to <2 x i64> 2504 %3 = and <2 x i64> %1, <i64 0, i64 -4294967296> 2505 %4 = and <2 x i64> %2, <i64 -1, i64 4294967295> 2506 %5 = or <2 x i64> %4, %3 2507 %6 = bitcast <2 x i64> %5 to <8 x i16> 2508 ret <8 x i16> %6 2509 } 2510 2511 define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) { 2512 ; SSE-LABEL: insert_dup_mem_v8i16_i32: 2513 ; SSE: # %bb.0: 2514 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2515 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 2516 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2517 ; SSE-NEXT: retq 2518 ; 2519 ; AVX1-LABEL: insert_dup_mem_v8i16_i32: 2520 ; AVX1: # %bb.0: 2521 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2522 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 2523 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2524 ; AVX1-NEXT: retq 2525 ; 2526 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i32: 2527 ; AVX2OR512VL: # %bb.0: 2528 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0 2529 ; AVX2OR512VL-NEXT: retq 2530 %tmp = load i32, i32* %ptr, align 4 2531 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2532 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2533 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer 2534 ret <8 x i16> %tmp3 2535 } 2536 2537 define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) { 2538 ; SSE-LABEL: insert_dup_mem_v8i16_sext_i16: 2539 ; SSE: # %bb.0: 2540 ; SSE-NEXT: movswl (%rdi), %eax 2541 ; SSE-NEXT: movd %eax, %xmm0 2542 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 2543 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2544 ; SSE-NEXT: retq 2545 ; 2546 ; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16: 2547 ; AVX1: # %bb.0: 2548 ; AVX1-NEXT: movswl (%rdi), %eax 2549 ; AVX1-NEXT: vmovd %eax, %xmm0 2550 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 2551 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2552 ; AVX1-NEXT: retq 2553 ; 2554 ; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16: 2555 ; AVX2: # %bb.0: 2556 ; AVX2-NEXT: movswl (%rdi), %eax 2557 ; AVX2-NEXT: vmovd %eax, %xmm0 2558 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2559 ; AVX2-NEXT: retq 2560 ; 2561 ; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16: 2562 ; AVX512VL: # %bb.0: 2563 ; AVX512VL-NEXT: movswl (%rdi), %eax 2564 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 2565 ; AVX512VL-NEXT: retq 2566 %tmp = load i16, i16* %ptr, align 2 2567 %tmp1 = sext i16 %tmp to i32 2568 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2569 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2570 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer 2571 ret <8 x i16> %tmp4 2572 } 2573 2574 define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) { 2575 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_i32: 2576 ; SSE: # %bb.0: 2577 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2578 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 2579 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2580 ; SSE-NEXT: retq 2581 ; 2582 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32: 2583 ; AVX1: # %bb.0: 2584 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2585 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 2586 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2587 ; AVX1-NEXT: retq 2588 ; 2589 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i32: 2590 ; AVX2OR512VL: # %bb.0: 2591 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0 2592 ; AVX2OR512VL-NEXT: retq 2593 %tmp = load i32, i32* %ptr, align 4 2594 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2595 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2596 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2597 ret <8 x i16> %tmp3 2598 } 2599 2600 define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) { 2601 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32: 2602 ; SSE2: # %bb.0: 2603 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2604 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,0,1,4,5,6,7] 2605 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2606 ; SSE2-NEXT: retq 2607 ; 2608 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32: 2609 ; SSSE3: # %bb.0: 2610 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2611 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2612 ; SSSE3-NEXT: retq 2613 ; 2614 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32: 2615 ; SSE41: # %bb.0: 2616 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2617 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2618 ; SSE41-NEXT: retq 2619 ; 2620 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32: 2621 ; AVX1: # %bb.0: 2622 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2623 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2624 ; AVX1-NEXT: retq 2625 ; 2626 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v8i16_i32: 2627 ; AVX2OR512VL: # %bb.0: 2628 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0 2629 ; AVX2OR512VL-NEXT: retq 2630 %tmp = load i32, i32* %ptr, align 4 2631 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 2632 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2633 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2634 ret <8 x i16> %tmp3 2635 } 2636 2637 define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) { 2638 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2639 ; SSE: # %bb.0: 2640 ; SSE-NEXT: movswl (%rdi), %eax 2641 ; SSE-NEXT: movd %eax, %xmm0 2642 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 2643 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2644 ; SSE-NEXT: retq 2645 ; 2646 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2647 ; AVX1: # %bb.0: 2648 ; AVX1-NEXT: movswl (%rdi), %eax 2649 ; AVX1-NEXT: vmovd %eax, %xmm0 2650 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7] 2651 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2652 ; AVX1-NEXT: retq 2653 ; 2654 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2655 ; AVX2: # %bb.0: 2656 ; AVX2-NEXT: movswl (%rdi), %eax 2657 ; AVX2-NEXT: shrl $16, %eax 2658 ; AVX2-NEXT: vmovd %eax, %xmm0 2659 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2660 ; AVX2-NEXT: retq 2661 ; 2662 ; AVX512VL-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2663 ; AVX512VL: # %bb.0: 2664 ; AVX512VL-NEXT: movswl (%rdi), %eax 2665 ; AVX512VL-NEXT: shrl $16, %eax 2666 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 2667 ; AVX512VL-NEXT: retq 2668 %tmp = load i16, i16* %ptr, align 2 2669 %tmp1 = sext i16 %tmp to i32 2670 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2671 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2672 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2673 ret <8 x i16> %tmp4 2674 } 2675 2676 define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) { 2677 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2678 ; SSE2: # %bb.0: 2679 ; SSE2-NEXT: movswl (%rdi), %eax 2680 ; SSE2-NEXT: movd %eax, %xmm0 2681 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,0,1,4,5,6,7] 2682 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2683 ; SSE2-NEXT: retq 2684 ; 2685 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2686 ; SSSE3: # %bb.0: 2687 ; SSSE3-NEXT: movswl (%rdi), %eax 2688 ; SSSE3-NEXT: movd %eax, %xmm0 2689 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2690 ; SSSE3-NEXT: retq 2691 ; 2692 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2693 ; SSE41: # %bb.0: 2694 ; SSE41-NEXT: movswl (%rdi), %eax 2695 ; SSE41-NEXT: movd %eax, %xmm0 2696 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2697 ; SSE41-NEXT: retq 2698 ; 2699 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2700 ; AVX1: # %bb.0: 2701 ; AVX1-NEXT: movswl (%rdi), %eax 2702 ; AVX1-NEXT: vmovd %eax, %xmm0 2703 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2704 ; AVX1-NEXT: retq 2705 ; 2706 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2707 ; AVX2: # %bb.0: 2708 ; AVX2-NEXT: movswl (%rdi), %eax 2709 ; AVX2-NEXT: shrl $16, %eax 2710 ; AVX2-NEXT: vmovd %eax, %xmm0 2711 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2712 ; AVX2-NEXT: retq 2713 ; 2714 ; AVX512VL-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2715 ; AVX512VL: # %bb.0: 2716 ; AVX512VL-NEXT: movswl (%rdi), %eax 2717 ; AVX512VL-NEXT: shrl $16, %eax 2718 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 2719 ; AVX512VL-NEXT: retq 2720 %tmp = load i16, i16* %ptr, align 2 2721 %tmp1 = sext i16 %tmp to i32 2722 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1 2723 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2724 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2725 ret <8 x i16> %tmp4 2726 } 2727