1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 7 8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 9 target triple = "x86_64-unknown-unknown" 10 11 define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) { 12 ; SSE-LABEL: shuffle_v8i16_01012323: 13 ; SSE: # BB#0: 14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 15 ; SSE-NEXT: retq 16 ; 17 ; AVX-LABEL: shuffle_v8i16_01012323: 18 ; AVX: # BB#0: 19 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 20 ; AVX-NEXT: retq 21 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3> 22 ret <8 x i16> %shuffle 23 } 24 define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) { 25 ; SSE-LABEL: shuffle_v8i16_67452301: 26 ; SSE: # BB#0: 27 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 28 ; SSE-NEXT: retq 29 ; 30 ; AVX-LABEL: shuffle_v8i16_67452301: 31 ; AVX: # BB#0: 32 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 33 ; AVX-NEXT: retq 34 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1> 35 ret <8 x i16> %shuffle 36 } 37 define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) { 38 ; SSE2-LABEL: shuffle_v8i16_456789AB: 39 ; SSE2: # BB#0: 40 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 41 ; SSE2-NEXT: retq 42 ; 43 ; SSSE3-LABEL: shuffle_v8i16_456789AB: 44 ; SSSE3: # BB#0: 45 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 46 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 47 ; SSSE3-NEXT: retq 48 ; 49 ; SSE41-LABEL: shuffle_v8i16_456789AB: 50 ; SSE41: # BB#0: 51 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 52 ; SSE41-NEXT: movdqa %xmm1, %xmm0 53 ; SSE41-NEXT: retq 54 ; 55 ; AVX-LABEL: shuffle_v8i16_456789AB: 56 ; AVX: # BB#0: 57 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 58 ; AVX-NEXT: retq 59 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 60 ret <8 x i16> %shuffle 61 } 62 63 define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) { 64 ; SSE2-LABEL: shuffle_v8i16_00000000: 65 ; SSE2: # BB#0: 66 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 67 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 68 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 69 ; SSE2-NEXT: retq 70 ; 71 ; SSSE3-LABEL: shuffle_v8i16_00000000: 72 ; SSSE3: # BB#0: 73 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 74 ; SSSE3-NEXT: retq 75 ; 76 ; SSE41-LABEL: shuffle_v8i16_00000000: 77 ; SSE41: # BB#0: 78 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 79 ; SSE41-NEXT: retq 80 ; 81 ; AVX1-LABEL: shuffle_v8i16_00000000: 82 ; AVX1: # BB#0: 83 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 84 ; AVX1-NEXT: retq 85 ; 86 ; AVX2-LABEL: shuffle_v8i16_00000000: 87 ; AVX2: # BB#0: 88 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 89 ; AVX2-NEXT: retq 90 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 91 ret <8 x i16> %shuffle 92 } 93 define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) { 94 ; SSE-LABEL: shuffle_v8i16_00004444: 95 ; SSE: # BB#0: 96 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 97 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 98 ; SSE-NEXT: retq 99 ; 100 ; AVX-LABEL: shuffle_v8i16_00004444: 101 ; AVX: # BB#0: 102 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 103 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 104 ; AVX-NEXT: retq 105 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 106 ret <8 x i16> %shuffle 107 } 108 define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) { 109 ; SSE-LABEL: shuffle_v8i16_u0u1u2u3: 110 ; SSE: # BB#0: 111 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 112 ; SSE-NEXT: retq 113 ; 114 ; AVX-LABEL: shuffle_v8i16_u0u1u2u3: 115 ; AVX: # BB#0: 116 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 117 ; AVX-NEXT: retq 118 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3> 119 ret <8 x i16> %shuffle 120 } 121 define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) { 122 ; SSE-LABEL: shuffle_v8i16_u4u5u6u7: 123 ; SSE: # BB#0: 124 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 125 ; SSE-NEXT: retq 126 ; 127 ; AVX-LABEL: shuffle_v8i16_u4u5u6u7: 128 ; AVX: # BB#0: 129 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 130 ; AVX-NEXT: retq 131 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7> 132 ret <8 x i16> %shuffle 133 } 134 define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) { 135 ; SSE-LABEL: shuffle_v8i16_31206745: 136 ; SSE: # BB#0: 137 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 138 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 139 ; SSE-NEXT: retq 140 ; 141 ; AVX-LABEL: shuffle_v8i16_31206745: 142 ; AVX: # BB#0: 143 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 144 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 145 ; AVX-NEXT: retq 146 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5> 147 ret <8 x i16> %shuffle 148 } 149 define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) { 150 ; SSE2-LABEL: shuffle_v8i16_44440000: 151 ; SSE2: # BB#0: 152 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3] 153 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 154 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 155 ; SSE2-NEXT: retq 156 ; 157 ; SSSE3-LABEL: shuffle_v8i16_44440000: 158 ; SSSE3: # BB#0: 159 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 160 ; SSSE3-NEXT: retq 161 ; 162 ; SSE41-LABEL: shuffle_v8i16_44440000: 163 ; SSE41: # BB#0: 164 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 165 ; SSE41-NEXT: retq 166 ; 167 ; AVX-LABEL: shuffle_v8i16_44440000: 168 ; AVX: # BB#0: 169 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 170 ; AVX-NEXT: retq 171 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0> 172 ret <8 x i16> %shuffle 173 } 174 define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) { 175 ; SSE-LABEL: shuffle_v8i16_23016745: 176 ; SSE: # BB#0: 177 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 178 ; SSE-NEXT: retq 179 ; 180 ; AVX-LABEL: shuffle_v8i16_23016745: 181 ; AVX: # BB#0: 182 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 183 ; AVX-NEXT: retq 184 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> 185 ret <8 x i16> %shuffle 186 } 187 define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) { 188 ; SSE-LABEL: shuffle_v8i16_23026745: 189 ; SSE: # BB#0: 190 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 191 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 192 ; SSE-NEXT: retq 193 ; 194 ; AVX-LABEL: shuffle_v8i16_23026745: 195 ; AVX: # BB#0: 196 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 197 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 198 ; AVX-NEXT: retq 199 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5> 200 ret <8 x i16> %shuffle 201 } 202 define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) { 203 ; SSE-LABEL: shuffle_v8i16_23016747: 204 ; SSE: # BB#0: 205 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 206 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 207 ; SSE-NEXT: retq 208 ; 209 ; AVX-LABEL: shuffle_v8i16_23016747: 210 ; AVX: # BB#0: 211 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 212 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 213 ; AVX-NEXT: retq 214 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7> 215 ret <8 x i16> %shuffle 216 } 217 define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) { 218 ; SSE2-LABEL: shuffle_v8i16_75643120: 219 ; SSE2: # BB#0: 220 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 221 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 222 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 223 ; SSE2-NEXT: retq 224 ; 225 ; SSSE3-LABEL: shuffle_v8i16_75643120: 226 ; SSSE3: # BB#0: 227 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 228 ; SSSE3-NEXT: retq 229 ; 230 ; SSE41-LABEL: shuffle_v8i16_75643120: 231 ; SSE41: # BB#0: 232 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 233 ; SSE41-NEXT: retq 234 ; 235 ; AVX-LABEL: shuffle_v8i16_75643120: 236 ; AVX: # BB#0: 237 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 238 ; AVX-NEXT: retq 239 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0> 240 ret <8 x i16> %shuffle 241 } 242 243 define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) { 244 ; SSE2-LABEL: shuffle_v8i16_10545410: 245 ; SSE2: # BB#0: 246 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 247 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] 248 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 249 ; SSE2-NEXT: retq 250 ; 251 ; SSSE3-LABEL: shuffle_v8i16_10545410: 252 ; SSSE3: # BB#0: 253 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 254 ; SSSE3-NEXT: retq 255 ; 256 ; SSE41-LABEL: shuffle_v8i16_10545410: 257 ; SSE41: # BB#0: 258 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 259 ; SSE41-NEXT: retq 260 ; 261 ; AVX-LABEL: shuffle_v8i16_10545410: 262 ; AVX: # BB#0: 263 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 264 ; AVX-NEXT: retq 265 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0> 266 ret <8 x i16> %shuffle 267 } 268 define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) { 269 ; SSE2-LABEL: shuffle_v8i16_54105410: 270 ; SSE2: # BB#0: 271 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 272 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 273 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] 274 ; SSE2-NEXT: retq 275 ; 276 ; SSSE3-LABEL: shuffle_v8i16_54105410: 277 ; SSSE3: # BB#0: 278 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 279 ; SSSE3-NEXT: retq 280 ; 281 ; SSE41-LABEL: shuffle_v8i16_54105410: 282 ; SSE41: # BB#0: 283 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 284 ; SSE41-NEXT: retq 285 ; 286 ; AVX-LABEL: shuffle_v8i16_54105410: 287 ; AVX: # BB#0: 288 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 289 ; AVX-NEXT: retq 290 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0> 291 ret <8 x i16> %shuffle 292 } 293 define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) { 294 ; SSE2-LABEL: shuffle_v8i16_54101054: 295 ; SSE2: # BB#0: 296 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 297 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 298 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 299 ; SSE2-NEXT: retq 300 ; 301 ; SSSE3-LABEL: shuffle_v8i16_54101054: 302 ; SSSE3: # BB#0: 303 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 304 ; SSSE3-NEXT: retq 305 ; 306 ; SSE41-LABEL: shuffle_v8i16_54101054: 307 ; SSE41: # BB#0: 308 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 309 ; SSE41-NEXT: retq 310 ; 311 ; AVX-LABEL: shuffle_v8i16_54101054: 312 ; AVX: # BB#0: 313 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 314 ; AVX-NEXT: retq 315 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4> 316 ret <8 x i16> %shuffle 317 } 318 define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) { 319 ; SSE2-LABEL: shuffle_v8i16_04400440: 320 ; SSE2: # BB#0: 321 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 322 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 323 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6] 324 ; SSE2-NEXT: retq 325 ; 326 ; SSSE3-LABEL: shuffle_v8i16_04400440: 327 ; SSSE3: # BB#0: 328 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 329 ; SSSE3-NEXT: retq 330 ; 331 ; SSE41-LABEL: shuffle_v8i16_04400440: 332 ; SSE41: # BB#0: 333 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 334 ; SSE41-NEXT: retq 335 ; 336 ; AVX-LABEL: shuffle_v8i16_04400440: 337 ; AVX: # BB#0: 338 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 339 ; AVX-NEXT: retq 340 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0> 341 ret <8 x i16> %shuffle 342 } 343 define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) { 344 ; SSE2-LABEL: shuffle_v8i16_40044004: 345 ; SSE2: # BB#0: 346 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0] 347 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7] 348 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4] 349 ; SSE2-NEXT: retq 350 ; 351 ; SSSE3-LABEL: shuffle_v8i16_40044004: 352 ; SSSE3: # BB#0: 353 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 354 ; SSSE3-NEXT: retq 355 ; 356 ; SSE41-LABEL: shuffle_v8i16_40044004: 357 ; SSE41: # BB#0: 358 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 359 ; SSE41-NEXT: retq 360 ; 361 ; AVX-LABEL: shuffle_v8i16_40044004: 362 ; AVX: # BB#0: 363 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 364 ; AVX-NEXT: retq 365 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4> 366 ret <8 x i16> %shuffle 367 } 368 369 define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) { 370 ; SSE2-LABEL: shuffle_v8i16_26405173: 371 ; SSE2: # BB#0: 372 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 373 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 374 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 375 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 376 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7] 377 ; SSE2-NEXT: retq 378 ; 379 ; SSSE3-LABEL: shuffle_v8i16_26405173: 380 ; SSSE3: # BB#0: 381 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 382 ; SSSE3-NEXT: retq 383 ; 384 ; SSE41-LABEL: shuffle_v8i16_26405173: 385 ; SSE41: # BB#0: 386 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 387 ; SSE41-NEXT: retq 388 ; 389 ; AVX-LABEL: shuffle_v8i16_26405173: 390 ; AVX: # BB#0: 391 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 392 ; AVX-NEXT: retq 393 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3> 394 ret <8 x i16> %shuffle 395 } 396 define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) { 397 ; SSE2-LABEL: shuffle_v8i16_20645173: 398 ; SSE2: # BB#0: 399 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 400 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 401 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 402 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7] 403 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7] 404 ; SSE2-NEXT: retq 405 ; 406 ; SSSE3-LABEL: shuffle_v8i16_20645173: 407 ; SSSE3: # BB#0: 408 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 409 ; SSSE3-NEXT: retq 410 ; 411 ; SSE41-LABEL: shuffle_v8i16_20645173: 412 ; SSE41: # BB#0: 413 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 414 ; SSE41-NEXT: retq 415 ; 416 ; AVX-LABEL: shuffle_v8i16_20645173: 417 ; AVX: # BB#0: 418 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 419 ; AVX-NEXT: retq 420 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3> 421 ret <8 x i16> %shuffle 422 } 423 define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) { 424 ; SSE2-LABEL: shuffle_v8i16_26401375: 425 ; SSE2: # BB#0: 426 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 427 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 428 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2] 429 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 430 ; SSE2-NEXT: retq 431 ; 432 ; SSSE3-LABEL: shuffle_v8i16_26401375: 433 ; SSSE3: # BB#0: 434 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 435 ; SSSE3-NEXT: retq 436 ; 437 ; SSE41-LABEL: shuffle_v8i16_26401375: 438 ; SSE41: # BB#0: 439 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 440 ; SSE41-NEXT: retq 441 ; 442 ; AVX-LABEL: shuffle_v8i16_26401375: 443 ; AVX: # BB#0: 444 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 445 ; AVX-NEXT: retq 446 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5> 447 ret <8 x i16> %shuffle 448 } 449 450 define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) { 451 ; SSE2-LABEL: shuffle_v8i16_66751643: 452 ; SSE2: # BB#0: 453 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7] 454 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 455 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0] 456 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7] 457 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6] 458 ; SSE2-NEXT: retq 459 ; 460 ; SSSE3-LABEL: shuffle_v8i16_66751643: 461 ; SSSE3: # BB#0: 462 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 463 ; SSSE3-NEXT: retq 464 ; 465 ; SSE41-LABEL: shuffle_v8i16_66751643: 466 ; SSE41: # BB#0: 467 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 468 ; SSE41-NEXT: retq 469 ; 470 ; AVX-LABEL: shuffle_v8i16_66751643: 471 ; AVX: # BB#0: 472 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 473 ; AVX-NEXT: retq 474 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3> 475 ret <8 x i16> %shuffle 476 } 477 478 define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) { 479 ; SSE2-LABEL: shuffle_v8i16_60514754: 480 ; SSE2: # BB#0: 481 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] 482 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 483 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 484 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6] 485 ; SSE2-NEXT: retq 486 ; 487 ; SSSE3-LABEL: shuffle_v8i16_60514754: 488 ; SSSE3: # BB#0: 489 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 490 ; SSSE3-NEXT: retq 491 ; 492 ; SSE41-LABEL: shuffle_v8i16_60514754: 493 ; SSE41: # BB#0: 494 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 495 ; SSE41-NEXT: retq 496 ; 497 ; AVX-LABEL: shuffle_v8i16_60514754: 498 ; AVX: # BB#0: 499 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9] 500 ; AVX-NEXT: retq 501 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4> 502 ret <8 x i16> %shuffle 503 } 504 505 define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) { 506 ; SSE2-LABEL: shuffle_v8i16_00444444: 507 ; SSE2: # BB#0: 508 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 509 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 510 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 511 ; SSE2-NEXT: retq 512 ; 513 ; SSSE3-LABEL: shuffle_v8i16_00444444: 514 ; SSSE3: # BB#0: 515 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 516 ; SSSE3-NEXT: retq 517 ; 518 ; SSE41-LABEL: shuffle_v8i16_00444444: 519 ; SSE41: # BB#0: 520 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 521 ; SSE41-NEXT: retq 522 ; 523 ; AVX-LABEL: shuffle_v8i16_00444444: 524 ; AVX: # BB#0: 525 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 526 ; AVX-NEXT: retq 527 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 528 ret <8 x i16> %shuffle 529 } 530 define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) { 531 ; SSE2-LABEL: shuffle_v8i16_44004444: 532 ; SSE2: # BB#0: 533 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 534 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7] 535 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 536 ; SSE2-NEXT: retq 537 ; 538 ; SSSE3-LABEL: shuffle_v8i16_44004444: 539 ; SSSE3: # BB#0: 540 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 541 ; SSSE3-NEXT: retq 542 ; 543 ; SSE41-LABEL: shuffle_v8i16_44004444: 544 ; SSE41: # BB#0: 545 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 546 ; SSE41-NEXT: retq 547 ; 548 ; AVX-LABEL: shuffle_v8i16_44004444: 549 ; AVX: # BB#0: 550 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 551 ; AVX-NEXT: retq 552 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 553 ret <8 x i16> %shuffle 554 } 555 define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) { 556 ; SSE2-LABEL: shuffle_v8i16_04404444: 557 ; SSE2: # BB#0: 558 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 559 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 560 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 561 ; SSE2-NEXT: retq 562 ; 563 ; SSSE3-LABEL: shuffle_v8i16_04404444: 564 ; SSSE3: # BB#0: 565 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 566 ; SSSE3-NEXT: retq 567 ; 568 ; SSE41-LABEL: shuffle_v8i16_04404444: 569 ; SSE41: # BB#0: 570 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 571 ; SSE41-NEXT: retq 572 ; 573 ; AVX-LABEL: shuffle_v8i16_04404444: 574 ; AVX: # BB#0: 575 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 576 ; AVX-NEXT: retq 577 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4> 578 ret <8 x i16> %shuffle 579 } 580 define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) { 581 ; SSE2-LABEL: shuffle_v8i16_04400000: 582 ; SSE2: # BB#0: 583 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3] 584 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 585 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 586 ; SSE2-NEXT: retq 587 ; 588 ; SSSE3-LABEL: shuffle_v8i16_04400000: 589 ; SSSE3: # BB#0: 590 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 591 ; SSSE3-NEXT: retq 592 ; 593 ; SSE41-LABEL: shuffle_v8i16_04400000: 594 ; SSE41: # BB#0: 595 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 596 ; SSE41-NEXT: retq 597 ; 598 ; AVX-LABEL: shuffle_v8i16_04400000: 599 ; AVX: # BB#0: 600 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 601 ; AVX-NEXT: retq 602 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0> 603 ret <8 x i16> %shuffle 604 } 605 define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) { 606 ; SSE-LABEL: shuffle_v8i16_04404567: 607 ; SSE: # BB#0: 608 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 609 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 610 ; SSE-NEXT: retq 611 ; 612 ; AVX-LABEL: shuffle_v8i16_04404567: 613 ; AVX: # BB#0: 614 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 615 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 616 ; AVX-NEXT: retq 617 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7> 618 ret <8 x i16> %shuffle 619 } 620 621 define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) { 622 ; SSE2-LABEL: shuffle_v8i16_0X444444: 623 ; SSE2: # BB#0: 624 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 625 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7] 626 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 627 ; SSE2-NEXT: retq 628 ; 629 ; SSSE3-LABEL: shuffle_v8i16_0X444444: 630 ; SSSE3: # BB#0: 631 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 632 ; SSSE3-NEXT: retq 633 ; 634 ; SSE41-LABEL: shuffle_v8i16_0X444444: 635 ; SSE41: # BB#0: 636 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 637 ; SSE41-NEXT: retq 638 ; 639 ; AVX-LABEL: shuffle_v8i16_0X444444: 640 ; AVX: # BB#0: 641 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 642 ; AVX-NEXT: retq 643 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 644 ret <8 x i16> %shuffle 645 } 646 define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) { 647 ; SSE2-LABEL: shuffle_v8i16_44X04444: 648 ; SSE2: # BB#0: 649 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 650 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7] 651 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 652 ; SSE2-NEXT: retq 653 ; 654 ; SSSE3-LABEL: shuffle_v8i16_44X04444: 655 ; SSSE3: # BB#0: 656 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 657 ; SSSE3-NEXT: retq 658 ; 659 ; SSE41-LABEL: shuffle_v8i16_44X04444: 660 ; SSE41: # BB#0: 661 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 662 ; SSE41-NEXT: retq 663 ; 664 ; AVX-LABEL: shuffle_v8i16_44X04444: 665 ; AVX: # BB#0: 666 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 667 ; AVX-NEXT: retq 668 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4> 669 ret <8 x i16> %shuffle 670 } 671 define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) { 672 ; SSE2-LABEL: shuffle_v8i16_X4404444: 673 ; SSE2: # BB#0: 674 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 675 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 676 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 677 ; SSE2-NEXT: retq 678 ; 679 ; SSSE3-LABEL: shuffle_v8i16_X4404444: 680 ; SSSE3: # BB#0: 681 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 682 ; SSSE3-NEXT: retq 683 ; 684 ; SSE41-LABEL: shuffle_v8i16_X4404444: 685 ; SSE41: # BB#0: 686 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 687 ; SSE41-NEXT: retq 688 ; 689 ; AVX-LABEL: shuffle_v8i16_X4404444: 690 ; AVX: # BB#0: 691 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 692 ; AVX-NEXT: retq 693 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4> 694 ret <8 x i16> %shuffle 695 } 696 697 define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) { 698 ; SSE2-LABEL: shuffle_v8i16_0127XXXX: 699 ; SSE2: # BB#0: 700 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 701 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] 702 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 703 ; SSE2-NEXT: retq 704 ; 705 ; SSSE3-LABEL: shuffle_v8i16_0127XXXX: 706 ; SSSE3: # BB#0: 707 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 708 ; SSSE3-NEXT: retq 709 ; 710 ; SSE41-LABEL: shuffle_v8i16_0127XXXX: 711 ; SSE41: # BB#0: 712 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 713 ; SSE41-NEXT: retq 714 ; 715 ; AVX-LABEL: shuffle_v8i16_0127XXXX: 716 ; AVX: # BB#0: 717 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 718 ; AVX-NEXT: retq 719 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 720 ret <8 x i16> %shuffle 721 } 722 723 define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) { 724 ; SSE2-LABEL: shuffle_v8i16_XXXX4563: 725 ; SSE2: # BB#0: 726 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 727 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] 728 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] 729 ; SSE2-NEXT: retq 730 ; 731 ; SSSE3-LABEL: shuffle_v8i16_XXXX4563: 732 ; SSSE3: # BB#0: 733 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 734 ; SSSE3-NEXT: retq 735 ; 736 ; SSE41-LABEL: shuffle_v8i16_XXXX4563: 737 ; SSE41: # BB#0: 738 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 739 ; SSE41-NEXT: retq 740 ; 741 ; AVX-LABEL: shuffle_v8i16_XXXX4563: 742 ; AVX: # BB#0: 743 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 744 ; AVX-NEXT: retq 745 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3> 746 ret <8 x i16> %shuffle 747 } 748 749 define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) { 750 ; SSE2-LABEL: shuffle_v8i16_4563XXXX: 751 ; SSE2: # BB#0: 752 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 753 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] 754 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3] 755 ; SSE2-NEXT: retq 756 ; 757 ; SSSE3-LABEL: shuffle_v8i16_4563XXXX: 758 ; SSSE3: # BB#0: 759 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 760 ; SSSE3-NEXT: retq 761 ; 762 ; SSE41-LABEL: shuffle_v8i16_4563XXXX: 763 ; SSE41: # BB#0: 764 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 765 ; SSE41-NEXT: retq 766 ; 767 ; AVX-LABEL: shuffle_v8i16_4563XXXX: 768 ; AVX: # BB#0: 769 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 770 ; AVX-NEXT: retq 771 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 772 ret <8 x i16> %shuffle 773 } 774 775 define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) { 776 ; SSE2-LABEL: shuffle_v8i16_01274563: 777 ; SSE2: # BB#0: 778 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 779 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] 780 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2] 781 ; SSE2-NEXT: retq 782 ; 783 ; SSSE3-LABEL: shuffle_v8i16_01274563: 784 ; SSSE3: # BB#0: 785 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 786 ; SSSE3-NEXT: retq 787 ; 788 ; SSE41-LABEL: shuffle_v8i16_01274563: 789 ; SSE41: # BB#0: 790 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 791 ; SSE41-NEXT: retq 792 ; 793 ; AVX-LABEL: shuffle_v8i16_01274563: 794 ; AVX: # BB#0: 795 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 796 ; AVX-NEXT: retq 797 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3> 798 ret <8 x i16> %shuffle 799 } 800 801 define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) { 802 ; SSE2-LABEL: shuffle_v8i16_45630127: 803 ; SSE2: # BB#0: 804 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 805 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 806 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1] 807 ; SSE2-NEXT: retq 808 ; 809 ; SSSE3-LABEL: shuffle_v8i16_45630127: 810 ; SSSE3: # BB#0: 811 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 812 ; SSSE3-NEXT: retq 813 ; 814 ; SSE41-LABEL: shuffle_v8i16_45630127: 815 ; SSE41: # BB#0: 816 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 817 ; SSE41-NEXT: retq 818 ; 819 ; AVX-LABEL: shuffle_v8i16_45630127: 820 ; AVX: # BB#0: 821 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 822 ; AVX-NEXT: retq 823 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7> 824 ret <8 x i16> %shuffle 825 } 826 827 define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) { 828 ; SSE2-LABEL: shuffle_v8i16_37102735: 829 ; SSE2: # BB#0: 830 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 831 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 832 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4] 833 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 834 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 835 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6] 836 ; SSE2-NEXT: retq 837 ; 838 ; SSSE3-LABEL: shuffle_v8i16_37102735: 839 ; SSSE3: # BB#0: 840 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 841 ; SSSE3-NEXT: retq 842 ; 843 ; SSE41-LABEL: shuffle_v8i16_37102735: 844 ; SSE41: # BB#0: 845 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 846 ; SSE41-NEXT: retq 847 ; 848 ; AVX-LABEL: shuffle_v8i16_37102735: 849 ; AVX: # BB#0: 850 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 851 ; AVX-NEXT: retq 852 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5> 853 ret <8 x i16> %shuffle 854 } 855 856 define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) { 857 ; SSE-LABEL: shuffle_v8i16_08192a3b: 858 ; SSE: # BB#0: 859 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 860 ; SSE-NEXT: retq 861 ; 862 ; AVX-LABEL: shuffle_v8i16_08192a3b: 863 ; AVX: # BB#0: 864 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 865 ; AVX-NEXT: retq 866 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 867 ret <8 x i16> %shuffle 868 } 869 870 define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) { 871 ; SSE-LABEL: shuffle_v8i16_0c1d2e3f: 872 ; SSE: # BB#0: 873 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 874 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 875 ; SSE-NEXT: retq 876 ; 877 ; AVX-LABEL: shuffle_v8i16_0c1d2e3f: 878 ; AVX: # BB#0: 879 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 880 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 881 ; AVX-NEXT: retq 882 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15> 883 ret <8 x i16> %shuffle 884 } 885 886 define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) { 887 ; SSE-LABEL: shuffle_v8i16_4c5d6e7f: 888 ; SSE: # BB#0: 889 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 890 ; SSE-NEXT: retq 891 ; 892 ; AVX-LABEL: shuffle_v8i16_4c5d6e7f: 893 ; AVX: # BB#0: 894 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 895 ; AVX-NEXT: retq 896 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 897 ret <8 x i16> %shuffle 898 } 899 900 define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) { 901 ; SSE-LABEL: shuffle_v8i16_48596a7b: 902 ; SSE: # BB#0: 903 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 904 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 905 ; SSE-NEXT: retq 906 ; 907 ; AVX-LABEL: shuffle_v8i16_48596a7b: 908 ; AVX: # BB#0: 909 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 910 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 911 ; AVX-NEXT: retq 912 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11> 913 ret <8 x i16> %shuffle 914 } 915 916 define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) { 917 ; SSE-LABEL: shuffle_v8i16_08196e7f: 918 ; SSE: # BB#0: 919 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] 920 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 921 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 922 ; SSE-NEXT: retq 923 ; 924 ; AVX-LABEL: shuffle_v8i16_08196e7f: 925 ; AVX: # BB#0: 926 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3] 927 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 928 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 929 ; AVX-NEXT: retq 930 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15> 931 ret <8 x i16> %shuffle 932 } 933 934 define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) { 935 ; SSE-LABEL: shuffle_v8i16_0c1d6879: 936 ; SSE: # BB#0: 937 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] 938 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 939 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 940 ; SSE-NEXT: retq 941 ; 942 ; AVX-LABEL: shuffle_v8i16_0c1d6879: 943 ; AVX: # BB#0: 944 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3] 945 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 946 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 947 ; AVX-NEXT: retq 948 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9> 949 ret <8 x i16> %shuffle 950 } 951 952 define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) { 953 ; SSE-LABEL: shuffle_v8i16_109832ba: 954 ; SSE: # BB#0: 955 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 956 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 957 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 958 ; SSE-NEXT: retq 959 ; 960 ; AVX-LABEL: shuffle_v8i16_109832ba: 961 ; AVX: # BB#0: 962 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 963 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 964 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 965 ; AVX-NEXT: retq 966 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10> 967 ret <8 x i16> %shuffle 968 } 969 970 define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) { 971 ; SSE-LABEL: shuffle_v8i16_8091a2b3: 972 ; SSE: # BB#0: 973 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 974 ; SSE-NEXT: movdqa %xmm1, %xmm0 975 ; SSE-NEXT: retq 976 ; 977 ; AVX-LABEL: shuffle_v8i16_8091a2b3: 978 ; AVX: # BB#0: 979 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 980 ; AVX-NEXT: retq 981 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3> 982 ret <8 x i16> %shuffle 983 } 984 define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) { 985 ; SSE-LABEL: shuffle_v8i16_c4d5e6f7: 986 ; SSE: # BB#0: 987 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 988 ; SSE-NEXT: movdqa %xmm1, %xmm0 989 ; SSE-NEXT: retq 990 ; 991 ; AVX-LABEL: shuffle_v8i16_c4d5e6f7: 992 ; AVX: # BB#0: 993 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 994 ; AVX-NEXT: retq 995 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7> 996 ret <8 x i16> %shuffle 997 } 998 999 define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) { 1000 ; SSE-LABEL: shuffle_v8i16_0213cedf: 1001 ; SSE: # BB#0: 1002 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1003 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1004 ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] 1005 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1006 ; SSE-NEXT: retq 1007 ; 1008 ; AVX-LABEL: shuffle_v8i16_0213cedf: 1009 ; AVX: # BB#0: 1010 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 1011 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1012 ; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] 1013 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1014 ; AVX-NEXT: retq 1015 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15> 1016 ret <8 x i16> %shuffle 1017 } 1018 1019 define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) { 1020 ; SSE2-LABEL: shuffle_v8i16_443aXXXX: 1021 ; SSE2: # BB#0: 1022 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535] 1023 ; SSE2-NEXT: pand %xmm2, %xmm0 1024 ; SSE2-NEXT: pandn %xmm1, %xmm2 1025 ; SSE2-NEXT: por %xmm0, %xmm2 1026 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3] 1027 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1028 ; SSE2-NEXT: retq 1029 ; 1030 ; SSSE3-LABEL: shuffle_v8i16_443aXXXX: 1031 ; SSSE3: # BB#0: 1032 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u] 1033 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1034 ; SSSE3-NEXT: por %xmm1, %xmm0 1035 ; SSSE3-NEXT: retq 1036 ; 1037 ; SSE41-LABEL: shuffle_v8i16_443aXXXX: 1038 ; SSE41: # BB#0: 1039 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1040 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1041 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1042 ; SSE41-NEXT: retq 1043 ; 1044 ; AVX-LABEL: shuffle_v8i16_443aXXXX: 1045 ; AVX: # BB#0: 1046 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 1047 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1048 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 1049 ; AVX-NEXT: retq 1050 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef> 1051 ret <8 x i16> %shuffle 1052 } 1053 1054 define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) { 1055 ; SSE2-LABEL: shuffle_v8i16_032dXXXX: 1056 ; SSE2: # BB#0: 1057 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1058 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0] 1059 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7] 1060 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 1061 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 1062 ; SSE2-NEXT: retq 1063 ; 1064 ; SSSE3-LABEL: shuffle_v8i16_032dXXXX: 1065 ; SSSE3: # BB#0: 1066 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u] 1067 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1068 ; SSSE3-NEXT: por %xmm1, %xmm0 1069 ; SSSE3-NEXT: retq 1070 ; 1071 ; SSE41-LABEL: shuffle_v8i16_032dXXXX: 1072 ; SSE41: # BB#0: 1073 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1074 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1075 ; SSE41-NEXT: retq 1076 ; 1077 ; AVX1-LABEL: shuffle_v8i16_032dXXXX: 1078 ; AVX1: # BB#0: 1079 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1080 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1081 ; AVX1-NEXT: retq 1082 ; 1083 ; AVX2-LABEL: shuffle_v8i16_032dXXXX: 1084 ; AVX2: # BB#0: 1085 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1086 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 1087 ; AVX2-NEXT: retq 1088 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1089 ret <8 x i16> %shuffle 1090 } 1091 define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) { 1092 ; SSE-LABEL: shuffle_v8i16_XXXdXXXX: 1093 ; SSE: # BB#0: 1094 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] 1095 ; SSE-NEXT: retq 1096 ; 1097 ; AVX-LABEL: shuffle_v8i16_XXXdXXXX: 1098 ; AVX: # BB#0: 1099 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] 1100 ; AVX-NEXT: retq 1101 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1102 ret <8 x i16> %shuffle 1103 } 1104 1105 define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) { 1106 ; SSE2-LABEL: shuffle_v8i16_012dXXXX: 1107 ; SSE2: # BB#0: 1108 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535] 1109 ; SSE2-NEXT: pand %xmm2, %xmm0 1110 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1111 ; SSE2-NEXT: pandn %xmm1, %xmm2 1112 ; SSE2-NEXT: por %xmm2, %xmm0 1113 ; SSE2-NEXT: retq 1114 ; 1115 ; SSSE3-LABEL: shuffle_v8i16_012dXXXX: 1116 ; SSSE3: # BB#0: 1117 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u] 1118 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u] 1119 ; SSSE3-NEXT: por %xmm1, %xmm0 1120 ; SSSE3-NEXT: retq 1121 ; 1122 ; SSE41-LABEL: shuffle_v8i16_012dXXXX: 1123 ; SSE41: # BB#0: 1124 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1125 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1126 ; SSE41-NEXT: retq 1127 ; 1128 ; AVX-LABEL: shuffle_v8i16_012dXXXX: 1129 ; AVX: # BB#0: 1130 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1131 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1132 ; AVX-NEXT: retq 1133 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef> 1134 ret <8 x i16> %shuffle 1135 } 1136 1137 define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) { 1138 ; SSE2-LABEL: shuffle_v8i16_XXXXcde3: 1139 ; SSE2: # BB#0: 1140 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0] 1141 ; SSE2-NEXT: pand %xmm2, %xmm1 1142 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1143 ; SSE2-NEXT: pandn %xmm0, %xmm2 1144 ; SSE2-NEXT: por %xmm1, %xmm2 1145 ; SSE2-NEXT: movdqa %xmm2, %xmm0 1146 ; SSE2-NEXT: retq 1147 ; 1148 ; SSSE3-LABEL: shuffle_v8i16_XXXXcde3: 1149 ; SSSE3: # BB#0: 1150 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7] 1151 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero 1152 ; SSSE3-NEXT: por %xmm1, %xmm0 1153 ; SSSE3-NEXT: retq 1154 ; 1155 ; SSE41-LABEL: shuffle_v8i16_XXXXcde3: 1156 ; SSE41: # BB#0: 1157 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1158 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1159 ; SSE41-NEXT: retq 1160 ; 1161 ; AVX1-LABEL: shuffle_v8i16_XXXXcde3: 1162 ; AVX1: # BB#0: 1163 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1164 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1165 ; AVX1-NEXT: retq 1166 ; 1167 ; AVX2-LABEL: shuffle_v8i16_XXXXcde3: 1168 ; AVX2: # BB#0: 1169 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 1170 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 1171 ; AVX2-NEXT: retq 1172 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3> 1173 ret <8 x i16> %shuffle 1174 } 1175 1176 define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) { 1177 ; SSE2-LABEL: shuffle_v8i16_cde3XXXX: 1178 ; SSE2: # BB#0: 1179 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535] 1180 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1181 ; SSE2-NEXT: pand %xmm2, %xmm1 1182 ; SSE2-NEXT: pandn %xmm0, %xmm2 1183 ; SSE2-NEXT: por %xmm1, %xmm2 1184 ; SSE2-NEXT: movdqa %xmm2, %xmm0 1185 ; SSE2-NEXT: retq 1186 ; 1187 ; SSSE3-LABEL: shuffle_v8i16_cde3XXXX: 1188 ; SSSE3: # BB#0: 1189 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u] 1190 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u] 1191 ; SSSE3-NEXT: por %xmm1, %xmm0 1192 ; SSSE3-NEXT: retq 1193 ; 1194 ; SSE41-LABEL: shuffle_v8i16_cde3XXXX: 1195 ; SSE41: # BB#0: 1196 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1197 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 1198 ; SSE41-NEXT: retq 1199 ; 1200 ; AVX-LABEL: shuffle_v8i16_cde3XXXX: 1201 ; AVX: # BB#0: 1202 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1203 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 1204 ; AVX-NEXT: retq 1205 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1206 ret <8 x i16> %shuffle 1207 } 1208 1209 define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) { 1210 ; SSE2-LABEL: shuffle_v8i16_012dcde3: 1211 ; SSE2: # BB#0: 1212 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1213 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,1] 1214 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 1215 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] 1216 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] 1217 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7] 1218 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 1219 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7] 1220 ; SSE2-NEXT: retq 1221 ; 1222 ; SSSE3-LABEL: shuffle_v8i16_012dcde3: 1223 ; SSSE3: # BB#0: 1224 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero 1225 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7] 1226 ; SSSE3-NEXT: por %xmm1, %xmm0 1227 ; SSSE3-NEXT: retq 1228 ; 1229 ; SSE41-LABEL: shuffle_v8i16_012dcde3: 1230 ; SSE41: # BB#0: 1231 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1232 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1233 ; SSE41-NEXT: retq 1234 ; 1235 ; AVX1-LABEL: shuffle_v8i16_012dcde3: 1236 ; AVX1: # BB#0: 1237 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1238 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1239 ; AVX1-NEXT: retq 1240 ; 1241 ; AVX2-LABEL: shuffle_v8i16_012dcde3: 1242 ; AVX2: # BB#0: 1243 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1244 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 1245 ; AVX2-NEXT: retq 1246 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3> 1247 ret <8 x i16> %shuffle 1248 } 1249 1250 define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) { 1251 ; SSE2-LABEL: shuffle_v8i16_0923cde7: 1252 ; SSE2: # BB#0: 1253 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535] 1254 ; SSE2-NEXT: andps %xmm2, %xmm0 1255 ; SSE2-NEXT: andnps %xmm1, %xmm2 1256 ; SSE2-NEXT: orps %xmm2, %xmm0 1257 ; SSE2-NEXT: retq 1258 ; 1259 ; SSSE3-LABEL: shuffle_v8i16_0923cde7: 1260 ; SSSE3: # BB#0: 1261 ; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535] 1262 ; SSSE3-NEXT: andps %xmm2, %xmm0 1263 ; SSSE3-NEXT: andnps %xmm1, %xmm2 1264 ; SSSE3-NEXT: orps %xmm2, %xmm0 1265 ; SSSE3-NEXT: retq 1266 ; 1267 ; SSE41-LABEL: shuffle_v8i16_0923cde7: 1268 ; SSE41: # BB#0: 1269 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 1270 ; SSE41-NEXT: retq 1271 ; 1272 ; AVX-LABEL: shuffle_v8i16_0923cde7: 1273 ; AVX: # BB#0: 1274 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 1275 ; AVX-NEXT: retq 1276 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7> 1277 ret <8 x i16> %shuffle 1278 } 1279 1280 define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) { 1281 ; SSE2-LABEL: shuffle_v8i16_XXX1X579: 1282 ; SSE2: # BB#0: 1283 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0] 1284 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0] 1285 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1286 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1287 ; SSE2-NEXT: pand %xmm1, %xmm0 1288 ; SSE2-NEXT: pandn %xmm2, %xmm1 1289 ; SSE2-NEXT: por %xmm0, %xmm1 1290 ; SSE2-NEXT: movdqa %xmm1, %xmm0 1291 ; SSE2-NEXT: retq 1292 ; 1293 ; SSSE3-LABEL: shuffle_v8i16_XXX1X579: 1294 ; SSSE3: # BB#0: 1295 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3] 1296 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero 1297 ; SSSE3-NEXT: por %xmm1, %xmm0 1298 ; SSSE3-NEXT: retq 1299 ; 1300 ; SSE41-LABEL: shuffle_v8i16_XXX1X579: 1301 ; SSE41: # BB#0: 1302 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 1303 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1304 ; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1305 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1306 ; SSE41-NEXT: retq 1307 ; 1308 ; AVX1-LABEL: shuffle_v8i16_XXX1X579: 1309 ; AVX1: # BB#0: 1310 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 1311 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1312 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1313 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1314 ; AVX1-NEXT: retq 1315 ; 1316 ; AVX2-LABEL: shuffle_v8i16_XXX1X579: 1317 ; AVX2: # BB#0: 1318 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 1319 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1320 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 1321 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1322 ; AVX2-NEXT: retq 1323 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9> 1324 ret <8 x i16> %shuffle 1325 } 1326 1327 define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { 1328 ; SSE2-LABEL: shuffle_v8i16_XX4X8acX: 1329 ; SSE2: # BB#0: 1330 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 1331 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] 1332 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0] 1333 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7] 1334 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1335 ; SSE2-NEXT: retq 1336 ; 1337 ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX: 1338 ; SSSE3: # BB#0: 1339 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u] 1340 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u] 1341 ; SSSE3-NEXT: por %xmm1, %xmm0 1342 ; SSSE3-NEXT: retq 1343 ; 1344 ; SSE41-LABEL: shuffle_v8i16_XX4X8acX: 1345 ; SSE41: # BB#0: 1346 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1347 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1348 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1349 ; SSE41-NEXT: retq 1350 ; 1351 ; AVX1-LABEL: shuffle_v8i16_XX4X8acX: 1352 ; AVX1: # BB#0: 1353 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1354 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1355 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 1356 ; AVX1-NEXT: retq 1357 ; 1358 ; AVX2-LABEL: shuffle_v8i16_XX4X8acX: 1359 ; AVX2: # BB#0: 1360 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 1361 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 1362 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1363 ; AVX2-NEXT: retq 1364 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef> 1365 ret <8 x i16> %shuffle 1366 } 1367 1368 define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) { 1369 ; SSE-LABEL: shuffle_v8i16_8zzzzzzz: 1370 ; SSE: # BB#0: 1371 ; SSE-NEXT: movzwl %di, %eax 1372 ; SSE-NEXT: movd %eax, %xmm0 1373 ; SSE-NEXT: retq 1374 ; 1375 ; AVX-LABEL: shuffle_v8i16_8zzzzzzz: 1376 ; AVX: # BB#0: 1377 ; AVX-NEXT: movzwl %di, %eax 1378 ; AVX-NEXT: vmovd %eax, %xmm0 1379 ; AVX-NEXT: retq 1380 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1381 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1382 ret <8 x i16> %shuffle 1383 } 1384 1385 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) { 1386 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz: 1387 ; SSE: # BB#0: 1388 ; SSE-NEXT: pxor %xmm0, %xmm0 1389 ; SSE-NEXT: pinsrw $1, %edi, %xmm0 1390 ; SSE-NEXT: retq 1391 ; 1392 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz: 1393 ; AVX: # BB#0: 1394 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1395 ; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 1396 ; AVX-NEXT: retq 1397 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1398 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3> 1399 ret <8 x i16> %shuffle 1400 } 1401 1402 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) { 1403 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz: 1404 ; SSE: # BB#0: 1405 ; SSE-NEXT: pxor %xmm0, %xmm0 1406 ; SSE-NEXT: pinsrw $5, %edi, %xmm0 1407 ; SSE-NEXT: retq 1408 ; 1409 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz: 1410 ; AVX: # BB#0: 1411 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1412 ; AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 1413 ; AVX-NEXT: retq 1414 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1415 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0> 1416 ret <8 x i16> %shuffle 1417 } 1418 1419 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) { 1420 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8: 1421 ; SSE: # BB#0: 1422 ; SSE-NEXT: pxor %xmm0, %xmm0 1423 ; SSE-NEXT: pinsrw $7, %edi, %xmm0 1424 ; SSE-NEXT: retq 1425 ; 1426 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8: 1427 ; AVX: # BB#0: 1428 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1429 ; AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 1430 ; AVX-NEXT: retq 1431 %a = insertelement <8 x i16> undef, i16 %i, i32 0 1432 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8> 1433 ret <8 x i16> %shuffle 1434 } 1435 1436 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) { 1437 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz: 1438 ; SSE: # BB#0: 1439 ; SSE-NEXT: pxor %xmm0, %xmm0 1440 ; SSE-NEXT: pinsrw $2, %edi, %xmm0 1441 ; SSE-NEXT: retq 1442 ; 1443 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz: 1444 ; AVX: # BB#0: 1445 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1446 ; AVX-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0 1447 ; AVX-NEXT: retq 1448 %a = insertelement <8 x i16> undef, i16 %i, i32 3 1449 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7> 1450 ret <8 x i16> %shuffle 1451 } 1452 1453 define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) { 1454 ; SSE2-LABEL: shuffle_v8i16_def01234: 1455 ; SSE2: # BB#0: 1456 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1457 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1458 ; SSE2-NEXT: por %xmm1, %xmm0 1459 ; SSE2-NEXT: retq 1460 ; 1461 ; SSSE3-LABEL: shuffle_v8i16_def01234: 1462 ; SSSE3: # BB#0: 1463 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1464 ; SSSE3-NEXT: retq 1465 ; 1466 ; SSE41-LABEL: shuffle_v8i16_def01234: 1467 ; SSE41: # BB#0: 1468 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1469 ; SSE41-NEXT: retq 1470 ; 1471 ; AVX-LABEL: shuffle_v8i16_def01234: 1472 ; AVX: # BB#0: 1473 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1474 ; AVX-NEXT: retq 1475 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4> 1476 ret <8 x i16> %shuffle 1477 } 1478 1479 define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) { 1480 ; SSE2-LABEL: shuffle_v8i16_ueuu123u: 1481 ; SSE2: # BB#0: 1482 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1483 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1484 ; SSE2-NEXT: por %xmm1, %xmm0 1485 ; SSE2-NEXT: retq 1486 ; 1487 ; SSSE3-LABEL: shuffle_v8i16_ueuu123u: 1488 ; SSSE3: # BB#0: 1489 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1490 ; SSSE3-NEXT: retq 1491 ; 1492 ; SSE41-LABEL: shuffle_v8i16_ueuu123u: 1493 ; SSE41: # BB#0: 1494 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1495 ; SSE41-NEXT: retq 1496 ; 1497 ; AVX-LABEL: shuffle_v8i16_ueuu123u: 1498 ; AVX: # BB#0: 1499 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1500 ; AVX-NEXT: retq 1501 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1502 ret <8 x i16> %shuffle 1503 } 1504 1505 define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) { 1506 ; SSE2-LABEL: shuffle_v8i16_56701234: 1507 ; SSE2: # BB#0: 1508 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1509 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1510 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1511 ; SSE2-NEXT: por %xmm1, %xmm0 1512 ; SSE2-NEXT: retq 1513 ; 1514 ; SSSE3-LABEL: shuffle_v8i16_56701234: 1515 ; SSSE3: # BB#0: 1516 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1517 ; SSSE3-NEXT: retq 1518 ; 1519 ; SSE41-LABEL: shuffle_v8i16_56701234: 1520 ; SSE41: # BB#0: 1521 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1522 ; SSE41-NEXT: retq 1523 ; 1524 ; AVX-LABEL: shuffle_v8i16_56701234: 1525 ; AVX: # BB#0: 1526 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1527 ; AVX-NEXT: retq 1528 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4> 1529 ret <8 x i16> %shuffle 1530 } 1531 1532 define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) { 1533 ; SSE2-LABEL: shuffle_v8i16_u6uu123u: 1534 ; SSE2: # BB#0: 1535 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1536 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1537 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1538 ; SSE2-NEXT: por %xmm1, %xmm0 1539 ; SSE2-NEXT: retq 1540 ; 1541 ; SSSE3-LABEL: shuffle_v8i16_u6uu123u: 1542 ; SSSE3: # BB#0: 1543 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1544 ; SSSE3-NEXT: retq 1545 ; 1546 ; SSE41-LABEL: shuffle_v8i16_u6uu123u: 1547 ; SSE41: # BB#0: 1548 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1549 ; SSE41-NEXT: retq 1550 ; 1551 ; AVX-LABEL: shuffle_v8i16_u6uu123u: 1552 ; AVX: # BB#0: 1553 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 1554 ; AVX-NEXT: retq 1555 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1556 ret <8 x i16> %shuffle 1557 } 1558 1559 define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) { 1560 ; SSE-LABEL: shuffle_v8i16_uuuu123u: 1561 ; SSE: # BB#0: 1562 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1563 ; SSE-NEXT: retq 1564 ; 1565 ; AVX-LABEL: shuffle_v8i16_uuuu123u: 1566 ; AVX: # BB#0: 1567 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 1568 ; AVX-NEXT: retq 1569 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef> 1570 ret <8 x i16> %shuffle 1571 } 1572 1573 define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) { 1574 ; SSE2-LABEL: shuffle_v8i16_bcdef012: 1575 ; SSE2: # BB#0: 1576 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1577 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1578 ; SSE2-NEXT: por %xmm1, %xmm0 1579 ; SSE2-NEXT: retq 1580 ; 1581 ; SSSE3-LABEL: shuffle_v8i16_bcdef012: 1582 ; SSSE3: # BB#0: 1583 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1584 ; SSSE3-NEXT: retq 1585 ; 1586 ; SSE41-LABEL: shuffle_v8i16_bcdef012: 1587 ; SSE41: # BB#0: 1588 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1589 ; SSE41-NEXT: retq 1590 ; 1591 ; AVX-LABEL: shuffle_v8i16_bcdef012: 1592 ; AVX: # BB#0: 1593 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1594 ; AVX-NEXT: retq 1595 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2> 1596 ret <8 x i16> %shuffle 1597 } 1598 1599 define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) { 1600 ; SSE2-LABEL: shuffle_v8i16_ucdeuu1u: 1601 ; SSE2: # BB#0: 1602 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1603 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1604 ; SSE2-NEXT: por %xmm1, %xmm0 1605 ; SSE2-NEXT: retq 1606 ; 1607 ; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u: 1608 ; SSSE3: # BB#0: 1609 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1610 ; SSSE3-NEXT: retq 1611 ; 1612 ; SSE41-LABEL: shuffle_v8i16_ucdeuu1u: 1613 ; SSE41: # BB#0: 1614 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1615 ; SSE41-NEXT: retq 1616 ; 1617 ; AVX-LABEL: shuffle_v8i16_ucdeuu1u: 1618 ; AVX: # BB#0: 1619 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 1620 ; AVX-NEXT: retq 1621 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef> 1622 ret <8 x i16> %shuffle 1623 } 1624 1625 define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) { 1626 ; SSE2-LABEL: shuffle_v8i16_34567012: 1627 ; SSE2: # BB#0: 1628 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1629 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1630 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1631 ; SSE2-NEXT: por %xmm1, %xmm0 1632 ; SSE2-NEXT: retq 1633 ; 1634 ; SSSE3-LABEL: shuffle_v8i16_34567012: 1635 ; SSSE3: # BB#0: 1636 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1637 ; SSSE3-NEXT: retq 1638 ; 1639 ; SSE41-LABEL: shuffle_v8i16_34567012: 1640 ; SSE41: # BB#0: 1641 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1642 ; SSE41-NEXT: retq 1643 ; 1644 ; AVX-LABEL: shuffle_v8i16_34567012: 1645 ; AVX: # BB#0: 1646 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1647 ; AVX-NEXT: retq 1648 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2> 1649 ret <8 x i16> %shuffle 1650 } 1651 1652 define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) { 1653 ; SSE2-LABEL: shuffle_v8i16_u456uu1u: 1654 ; SSE2: # BB#0: 1655 ; SSE2-NEXT: movdqa %xmm0, %xmm1 1656 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1657 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 1658 ; SSE2-NEXT: por %xmm1, %xmm0 1659 ; SSE2-NEXT: retq 1660 ; 1661 ; SSSE3-LABEL: shuffle_v8i16_u456uu1u: 1662 ; SSSE3: # BB#0: 1663 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1664 ; SSSE3-NEXT: retq 1665 ; 1666 ; SSE41-LABEL: shuffle_v8i16_u456uu1u: 1667 ; SSE41: # BB#0: 1668 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1669 ; SSE41-NEXT: retq 1670 ; 1671 ; AVX-LABEL: shuffle_v8i16_u456uu1u: 1672 ; AVX: # BB#0: 1673 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 1674 ; AVX-NEXT: retq 1675 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef> 1676 ret <8 x i16> %shuffle 1677 } 1678 1679 define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) { 1680 ; SSE-LABEL: shuffle_v8i16_u456uuuu: 1681 ; SSE: # BB#0: 1682 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1683 ; SSE-NEXT: retq 1684 ; 1685 ; AVX-LABEL: shuffle_v8i16_u456uuuu: 1686 ; AVX: # BB#0: 1687 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1688 ; AVX-NEXT: retq 1689 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef> 1690 ret <8 x i16> %shuffle 1691 } 1692 1693 define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) { 1694 ; SSE2-LABEL: shuffle_v8i16_3456789a: 1695 ; SSE2: # BB#0: 1696 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1697 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5] 1698 ; SSE2-NEXT: por %xmm1, %xmm0 1699 ; SSE2-NEXT: retq 1700 ; 1701 ; SSSE3-LABEL: shuffle_v8i16_3456789a: 1702 ; SSSE3: # BB#0: 1703 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1704 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1705 ; SSSE3-NEXT: retq 1706 ; 1707 ; SSE41-LABEL: shuffle_v8i16_3456789a: 1708 ; SSE41: # BB#0: 1709 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1710 ; SSE41-NEXT: movdqa %xmm1, %xmm0 1711 ; SSE41-NEXT: retq 1712 ; 1713 ; AVX-LABEL: shuffle_v8i16_3456789a: 1714 ; AVX: # BB#0: 1715 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1716 ; AVX-NEXT: retq 1717 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10> 1718 ret <8 x i16> %shuffle 1719 } 1720 1721 define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) { 1722 ; SSE2-LABEL: shuffle_v8i16_u456uu9u: 1723 ; SSE2: # BB#0: 1724 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1725 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5] 1726 ; SSE2-NEXT: por %xmm1, %xmm0 1727 ; SSE2-NEXT: retq 1728 ; 1729 ; SSSE3-LABEL: shuffle_v8i16_u456uu9u: 1730 ; SSSE3: # BB#0: 1731 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1732 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1733 ; SSSE3-NEXT: retq 1734 ; 1735 ; SSE41-LABEL: shuffle_v8i16_u456uu9u: 1736 ; SSE41: # BB#0: 1737 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1738 ; SSE41-NEXT: movdqa %xmm1, %xmm0 1739 ; SSE41-NEXT: retq 1740 ; 1741 ; AVX-LABEL: shuffle_v8i16_u456uu9u: 1742 ; AVX: # BB#0: 1743 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 1744 ; AVX-NEXT: retq 1745 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef> 1746 ret <8 x i16> %shuffle 1747 } 1748 1749 define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) { 1750 ; SSE2-LABEL: shuffle_v8i16_56789abc: 1751 ; SSE2: # BB#0: 1752 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1753 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 1754 ; SSE2-NEXT: por %xmm1, %xmm0 1755 ; SSE2-NEXT: retq 1756 ; 1757 ; SSSE3-LABEL: shuffle_v8i16_56789abc: 1758 ; SSSE3: # BB#0: 1759 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1760 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1761 ; SSSE3-NEXT: retq 1762 ; 1763 ; SSE41-LABEL: shuffle_v8i16_56789abc: 1764 ; SSE41: # BB#0: 1765 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1766 ; SSE41-NEXT: movdqa %xmm1, %xmm0 1767 ; SSE41-NEXT: retq 1768 ; 1769 ; AVX-LABEL: shuffle_v8i16_56789abc: 1770 ; AVX: # BB#0: 1771 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1772 ; AVX-NEXT: retq 1773 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> 1774 ret <8 x i16> %shuffle 1775 } 1776 1777 define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) { 1778 ; SSE2-LABEL: shuffle_v8i16_u6uu9abu: 1779 ; SSE2: # BB#0: 1780 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1781 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 1782 ; SSE2-NEXT: por %xmm1, %xmm0 1783 ; SSE2-NEXT: retq 1784 ; 1785 ; SSSE3-LABEL: shuffle_v8i16_u6uu9abu: 1786 ; SSSE3: # BB#0: 1787 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1788 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1789 ; SSSE3-NEXT: retq 1790 ; 1791 ; SSE41-LABEL: shuffle_v8i16_u6uu9abu: 1792 ; SSE41: # BB#0: 1793 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1794 ; SSE41-NEXT: movdqa %xmm1, %xmm0 1795 ; SSE41-NEXT: retq 1796 ; 1797 ; AVX-LABEL: shuffle_v8i16_u6uu9abu: 1798 ; AVX: # BB#0: 1799 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 1800 ; AVX-NEXT: retq 1801 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 1802 ret <8 x i16> %shuffle 1803 } 1804 1805 define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) { 1806 ; SSE2-LABEL: shuffle_v8i16_0uuu1uuu: 1807 ; SSE2: # BB#0: 1808 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 1809 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] 1810 ; SSE2-NEXT: retq 1811 ; 1812 ; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu: 1813 ; SSSE3: # BB#0: 1814 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 1815 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] 1816 ; SSSE3-NEXT: retq 1817 ; 1818 ; SSE41-LABEL: shuffle_v8i16_0uuu1uuu: 1819 ; SSE41: # BB#0: 1820 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1821 ; SSE41-NEXT: retq 1822 ; 1823 ; AVX-LABEL: shuffle_v8i16_0uuu1uuu: 1824 ; AVX: # BB#0: 1825 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1826 ; AVX-NEXT: retq 1827 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef> 1828 ret <8 x i16> %shuffle 1829 } 1830 1831 define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) { 1832 ; SSE2-LABEL: shuffle_v8i16_0zzz1zzz: 1833 ; SSE2: # BB#0: 1834 ; SSE2-NEXT: pxor %xmm1, %xmm1 1835 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1836 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1837 ; SSE2-NEXT: retq 1838 ; 1839 ; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz: 1840 ; SSSE3: # BB#0: 1841 ; SSSE3-NEXT: pxor %xmm1, %xmm1 1842 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1843 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1844 ; SSSE3-NEXT: retq 1845 ; 1846 ; SSE41-LABEL: shuffle_v8i16_0zzz1zzz: 1847 ; SSE41: # BB#0: 1848 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1849 ; SSE41-NEXT: retq 1850 ; 1851 ; AVX-LABEL: shuffle_v8i16_0zzz1zzz: 1852 ; AVX: # BB#0: 1853 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1854 ; AVX-NEXT: retq 1855 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 1856 ret <8 x i16> %shuffle 1857 } 1858 1859 define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) { 1860 ; SSE2-LABEL: shuffle_v8i16_0u1u2u3u: 1861 ; SSE2: # BB#0: 1862 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1863 ; SSE2-NEXT: retq 1864 ; 1865 ; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u: 1866 ; SSSE3: # BB#0: 1867 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1868 ; SSSE3-NEXT: retq 1869 ; 1870 ; SSE41-LABEL: shuffle_v8i16_0u1u2u3u: 1871 ; SSE41: # BB#0: 1872 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1873 ; SSE41-NEXT: retq 1874 ; 1875 ; AVX-LABEL: shuffle_v8i16_0u1u2u3u: 1876 ; AVX: # BB#0: 1877 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1878 ; AVX-NEXT: retq 1879 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef> 1880 ret <8 x i16> %shuffle 1881 } 1882 1883 define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) { 1884 ; SSE2-LABEL: shuffle_v8i16_0z1z2z3z: 1885 ; SSE2: # BB#0: 1886 ; SSE2-NEXT: pxor %xmm1, %xmm1 1887 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1888 ; SSE2-NEXT: retq 1889 ; 1890 ; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z: 1891 ; SSSE3: # BB#0: 1892 ; SSSE3-NEXT: pxor %xmm1, %xmm1 1893 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1894 ; SSSE3-NEXT: retq 1895 ; 1896 ; SSE41-LABEL: shuffle_v8i16_0z1z2z3z: 1897 ; SSE41: # BB#0: 1898 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1899 ; SSE41-NEXT: retq 1900 ; 1901 ; AVX-LABEL: shuffle_v8i16_0z1z2z3z: 1902 ; AVX: # BB#0: 1903 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1904 ; AVX-NEXT: retq 1905 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 1906 ret <8 x i16> %shuffle 1907 } 1908 1909 ; 1910 ; Shuffle to logical bit shifts 1911 ; 1912 define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) { 1913 ; SSE-LABEL: shuffle_v8i16_z0z2z4z6: 1914 ; SSE: # BB#0: 1915 ; SSE-NEXT: pslld $16, %xmm0 1916 ; SSE-NEXT: retq 1917 ; 1918 ; AVX-LABEL: shuffle_v8i16_z0z2z4z6: 1919 ; AVX: # BB#0: 1920 ; AVX-NEXT: vpslld $16, %xmm0, %xmm0 1921 ; AVX-NEXT: retq 1922 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6> 1923 ret <8 x i16> %shuffle 1924 } 1925 1926 define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) { 1927 ; SSE-LABEL: shuffle_v8i16_zzz0zzz4: 1928 ; SSE: # BB#0: 1929 ; SSE-NEXT: psllq $48, %xmm0 1930 ; SSE-NEXT: retq 1931 ; 1932 ; AVX-LABEL: shuffle_v8i16_zzz0zzz4: 1933 ; AVX: # BB#0: 1934 ; AVX-NEXT: vpsllq $48, %xmm0, %xmm0 1935 ; AVX-NEXT: retq 1936 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4> 1937 ret <8 x i16> %shuffle 1938 } 1939 1940 define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) { 1941 ; SSE-LABEL: shuffle_v8i16_zz01zX4X: 1942 ; SSE: # BB#0: 1943 ; SSE-NEXT: psllq $32, %xmm0 1944 ; SSE-NEXT: retq 1945 ; 1946 ; AVX-LABEL: shuffle_v8i16_zz01zX4X: 1947 ; AVX: # BB#0: 1948 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 1949 ; AVX-NEXT: retq 1950 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef> 1951 ret <8 x i16> %shuffle 1952 } 1953 1954 define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) { 1955 ; SSE-LABEL: shuffle_v8i16_z0X2z456: 1956 ; SSE: # BB#0: 1957 ; SSE-NEXT: psllq $16, %xmm0 1958 ; SSE-NEXT: retq 1959 ; 1960 ; AVX-LABEL: shuffle_v8i16_z0X2z456: 1961 ; AVX: # BB#0: 1962 ; AVX-NEXT: vpsllq $16, %xmm0, %xmm0 1963 ; AVX-NEXT: retq 1964 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6> 1965 ret <8 x i16> %shuffle 1966 } 1967 1968 define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) { 1969 ; SSE-LABEL: shuffle_v8i16_1z3zXz7z: 1970 ; SSE: # BB#0: 1971 ; SSE-NEXT: psrld $16, %xmm0 1972 ; SSE-NEXT: retq 1973 ; 1974 ; AVX-LABEL: shuffle_v8i16_1z3zXz7z: 1975 ; AVX: # BB#0: 1976 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 1977 ; AVX-NEXT: retq 1978 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8> 1979 ret <8 x i16> %shuffle 1980 } 1981 1982 define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) { 1983 ; SSE-LABEL: shuffle_v8i16_1X3z567z: 1984 ; SSE: # BB#0: 1985 ; SSE-NEXT: psrlq $16, %xmm0 1986 ; SSE-NEXT: retq 1987 ; 1988 ; AVX-LABEL: shuffle_v8i16_1X3z567z: 1989 ; AVX: # BB#0: 1990 ; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0 1991 ; AVX-NEXT: retq 1992 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8> 1993 ret <8 x i16> %shuffle 1994 } 1995 1996 define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) { 1997 ; SSE-LABEL: shuffle_v8i16_23zz67zz: 1998 ; SSE: # BB#0: 1999 ; SSE-NEXT: psrlq $32, %xmm0 2000 ; SSE-NEXT: retq 2001 ; 2002 ; AVX-LABEL: shuffle_v8i16_23zz67zz: 2003 ; AVX: # BB#0: 2004 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 2005 ; AVX-NEXT: retq 2006 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8> 2007 ret <8 x i16> %shuffle 2008 } 2009 2010 define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) { 2011 ; SSE-LABEL: shuffle_v8i16_3zXXXzzz: 2012 ; SSE: # BB#0: 2013 ; SSE-NEXT: psrlq $48, %xmm0 2014 ; SSE-NEXT: retq 2015 ; 2016 ; AVX-LABEL: shuffle_v8i16_3zXXXzzz: 2017 ; AVX: # BB#0: 2018 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0 2019 ; AVX-NEXT: retq 2020 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8> 2021 ret <8 x i16> %shuffle 2022 } 2023 2024 define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) { 2025 ; SSE-LABEL: shuffle_v8i16_01u3zzuz: 2026 ; SSE: # BB#0: 2027 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 2028 ; SSE-NEXT: retq 2029 ; 2030 ; AVX-LABEL: shuffle_v8i16_01u3zzuz: 2031 ; AVX: # BB#0: 2032 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2033 ; AVX-NEXT: retq 2034 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8> 2035 ret <8 x i16> %shuffle 2036 } 2037 2038 define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) { 2039 ; SSE2-LABEL: shuffle_v8i16_0z234567: 2040 ; SSE2: # BB#0: 2041 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2042 ; SSE2-NEXT: retq 2043 ; 2044 ; SSSE3-LABEL: shuffle_v8i16_0z234567: 2045 ; SSSE3: # BB#0: 2046 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2047 ; SSSE3-NEXT: retq 2048 ; 2049 ; SSE41-LABEL: shuffle_v8i16_0z234567: 2050 ; SSE41: # BB#0: 2051 ; SSE41-NEXT: pxor %xmm1, %xmm1 2052 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 2053 ; SSE41-NEXT: retq 2054 ; 2055 ; AVX-LABEL: shuffle_v8i16_0z234567: 2056 ; AVX: # BB#0: 2057 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2058 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 2059 ; AVX-NEXT: retq 2060 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2061 ret <8 x i16> %shuffle 2062 } 2063 2064 define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) { 2065 ; SSE2-LABEL: shuffle_v8i16_0zzzz5z7: 2066 ; SSE2: # BB#0: 2067 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2068 ; SSE2-NEXT: retq 2069 ; 2070 ; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7: 2071 ; SSSE3: # BB#0: 2072 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2073 ; SSSE3-NEXT: retq 2074 ; 2075 ; SSE41-LABEL: shuffle_v8i16_0zzzz5z7: 2076 ; SSE41: # BB#0: 2077 ; SSE41-NEXT: pxor %xmm1, %xmm1 2078 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7] 2079 ; SSE41-NEXT: retq 2080 ; 2081 ; AVX-LABEL: shuffle_v8i16_0zzzz5z7: 2082 ; AVX: # BB#0: 2083 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2084 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7] 2085 ; AVX-NEXT: retq 2086 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7> 2087 ret <8 x i16> %shuffle 2088 } 2089 2090 define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) { 2091 ; SSE2-LABEL: shuffle_v8i16_0123456z: 2092 ; SSE2: # BB#0: 2093 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 2094 ; SSE2-NEXT: retq 2095 ; 2096 ; SSSE3-LABEL: shuffle_v8i16_0123456z: 2097 ; SSSE3: # BB#0: 2098 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 2099 ; SSSE3-NEXT: retq 2100 ; 2101 ; SSE41-LABEL: shuffle_v8i16_0123456z: 2102 ; SSE41: # BB#0: 2103 ; SSE41-NEXT: pxor %xmm1, %xmm1 2104 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7] 2105 ; SSE41-NEXT: retq 2106 ; 2107 ; AVX-LABEL: shuffle_v8i16_0123456z: 2108 ; AVX: # BB#0: 2109 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2110 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7] 2111 ; AVX-NEXT: retq 2112 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15> 2113 ret <8 x i16> %shuffle 2114 } 2115 2116 define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) { 2117 ; SSE-LABEL: shuffle_v8i16_fu3ucc5u: 2118 ; SSE: # BB#0: 2119 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2120 ; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 2121 ; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2122 ; SSE-NEXT: movdqa %xmm1, %xmm0 2123 ; SSE-NEXT: retq 2124 ; 2125 ; AVX-LABEL: shuffle_v8i16_fu3ucc5u: 2126 ; AVX: # BB#0: 2127 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2128 ; AVX-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 2129 ; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2130 ; AVX-NEXT: retq 2131 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef> 2132 ret <8 x i16> %shuffle 2133 } 2134 2135 define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) { 2136 ; SSE-LABEL: shuffle_v8i16_8012345u: 2137 ; SSE: # BB#0: 2138 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2139 ; SSE-NEXT: retq 2140 ; 2141 ; AVX-LABEL: shuffle_v8i16_8012345u: 2142 ; AVX: # BB#0: 2143 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2144 ; AVX-NEXT: retq 2145 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef> 2146 2147 ret <8 x i16> %shuffle 2148 } 2149 2150 define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) { 2151 ; SSE2-LABEL: insert_dup_mem_v8i16_i32: 2152 ; SSE2: # BB#0: 2153 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2154 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 2155 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2156 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 2157 ; SSE2-NEXT: retq 2158 ; 2159 ; SSSE3-LABEL: insert_dup_mem_v8i16_i32: 2160 ; SSSE3: # BB#0: 2161 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2162 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2163 ; SSSE3-NEXT: retq 2164 ; 2165 ; SSE41-LABEL: insert_dup_mem_v8i16_i32: 2166 ; SSE41: # BB#0: 2167 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2168 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2169 ; SSE41-NEXT: retq 2170 ; 2171 ; AVX1-LABEL: insert_dup_mem_v8i16_i32: 2172 ; AVX1: # BB#0: 2173 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2174 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2175 ; AVX1-NEXT: retq 2176 ; 2177 ; AVX2-LABEL: insert_dup_mem_v8i16_i32: 2178 ; AVX2: # BB#0: 2179 ; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0 2180 ; AVX2-NEXT: retq 2181 %tmp = load i32, i32* %ptr, align 4 2182 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2183 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2184 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer 2185 ret <8 x i16> %tmp3 2186 } 2187 2188 define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) { 2189 ; SSE2-LABEL: insert_dup_mem_v8i16_sext_i16: 2190 ; SSE2: # BB#0: 2191 ; SSE2-NEXT: movswl (%rdi), %eax 2192 ; SSE2-NEXT: movd %eax, %xmm0 2193 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 2194 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2195 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 2196 ; SSE2-NEXT: retq 2197 ; 2198 ; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16: 2199 ; SSSE3: # BB#0: 2200 ; SSSE3-NEXT: movswl (%rdi), %eax 2201 ; SSSE3-NEXT: movd %eax, %xmm0 2202 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2203 ; SSSE3-NEXT: retq 2204 ; 2205 ; SSE41-LABEL: insert_dup_mem_v8i16_sext_i16: 2206 ; SSE41: # BB#0: 2207 ; SSE41-NEXT: movswl (%rdi), %eax 2208 ; SSE41-NEXT: movd %eax, %xmm0 2209 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2210 ; SSE41-NEXT: retq 2211 ; 2212 ; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16: 2213 ; AVX1: # BB#0: 2214 ; AVX1-NEXT: movswl (%rdi), %eax 2215 ; AVX1-NEXT: vmovd %eax, %xmm0 2216 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 2217 ; AVX1-NEXT: retq 2218 ; 2219 ; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16: 2220 ; AVX2: # BB#0: 2221 ; AVX2-NEXT: movswl (%rdi), %eax 2222 ; AVX2-NEXT: vmovd %eax, %xmm0 2223 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2224 ; AVX2-NEXT: retq 2225 %tmp = load i16, i16* %ptr, align 2 2226 %tmp1 = sext i16 %tmp to i32 2227 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2228 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2229 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer 2230 ret <8 x i16> %tmp4 2231 } 2232 2233 define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) { 2234 ; SSE2-LABEL: insert_dup_elt1_mem_v8i16_i32: 2235 ; SSE2: # BB#0: 2236 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2237 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 2238 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 2239 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 2240 ; SSE2-NEXT: retq 2241 ; 2242 ; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_i32: 2243 ; SSSE3: # BB#0: 2244 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2245 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2246 ; SSSE3-NEXT: retq 2247 ; 2248 ; SSE41-LABEL: insert_dup_elt1_mem_v8i16_i32: 2249 ; SSE41: # BB#0: 2250 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2251 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2252 ; SSE41-NEXT: retq 2253 ; 2254 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32: 2255 ; AVX1: # BB#0: 2256 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2257 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2258 ; AVX1-NEXT: retq 2259 ; 2260 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32: 2261 ; AVX2: # BB#0: 2262 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0 2263 ; AVX2-NEXT: retq 2264 %tmp = load i32, i32* %ptr, align 4 2265 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2266 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2267 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2268 ret <8 x i16> %tmp3 2269 } 2270 2271 define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) { 2272 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32: 2273 ; SSE2: # BB#0: 2274 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2275 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0] 2276 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 2277 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] 2278 ; SSE2-NEXT: retq 2279 ; 2280 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32: 2281 ; SSSE3: # BB#0: 2282 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2283 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2284 ; SSSE3-NEXT: retq 2285 ; 2286 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32: 2287 ; SSE41: # BB#0: 2288 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2289 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2290 ; SSE41-NEXT: retq 2291 ; 2292 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32: 2293 ; AVX1: # BB#0: 2294 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2295 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2296 ; AVX1-NEXT: retq 2297 ; 2298 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32: 2299 ; AVX2: # BB#0: 2300 ; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0 2301 ; AVX2-NEXT: retq 2302 %tmp = load i32, i32* %ptr, align 4 2303 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 2304 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 2305 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2306 ret <8 x i16> %tmp3 2307 } 2308 2309 define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) { 2310 ; SSE2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2311 ; SSE2: # BB#0: 2312 ; SSE2-NEXT: movswl (%rdi), %eax 2313 ; SSE2-NEXT: movd %eax, %xmm0 2314 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 2315 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 2316 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 2317 ; SSE2-NEXT: retq 2318 ; 2319 ; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2320 ; SSSE3: # BB#0: 2321 ; SSSE3-NEXT: movswl (%rdi), %eax 2322 ; SSSE3-NEXT: movd %eax, %xmm0 2323 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2324 ; SSSE3-NEXT: retq 2325 ; 2326 ; SSE41-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2327 ; SSE41: # BB#0: 2328 ; SSE41-NEXT: movswl (%rdi), %eax 2329 ; SSE41-NEXT: movd %eax, %xmm0 2330 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2331 ; SSE41-NEXT: retq 2332 ; 2333 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2334 ; AVX1: # BB#0: 2335 ; AVX1-NEXT: movswl (%rdi), %eax 2336 ; AVX1-NEXT: vmovd %eax, %xmm0 2337 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2338 ; AVX1-NEXT: retq 2339 ; 2340 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16: 2341 ; AVX2: # BB#0: 2342 ; AVX2-NEXT: movswl (%rdi), %eax 2343 ; AVX2-NEXT: shrl $16, %eax 2344 ; AVX2-NEXT: vmovd %eax, %xmm0 2345 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2346 ; AVX2-NEXT: retq 2347 %tmp = load i16, i16* %ptr, align 2 2348 %tmp1 = sext i16 %tmp to i32 2349 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2350 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2351 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2352 ret <8 x i16> %tmp4 2353 } 2354 2355 define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) { 2356 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2357 ; SSE2: # BB#0: 2358 ; SSE2-NEXT: movswl (%rdi), %eax 2359 ; SSE2-NEXT: movd %eax, %xmm0 2360 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0] 2361 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 2362 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] 2363 ; SSE2-NEXT: retq 2364 ; 2365 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2366 ; SSSE3: # BB#0: 2367 ; SSSE3-NEXT: movswl (%rdi), %eax 2368 ; SSSE3-NEXT: movd %eax, %xmm0 2369 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2370 ; SSSE3-NEXT: retq 2371 ; 2372 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2373 ; SSE41: # BB#0: 2374 ; SSE41-NEXT: movswl (%rdi), %eax 2375 ; SSE41-NEXT: movd %eax, %xmm0 2376 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2377 ; SSE41-NEXT: retq 2378 ; 2379 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2380 ; AVX1: # BB#0: 2381 ; AVX1-NEXT: movswl (%rdi), %eax 2382 ; AVX1-NEXT: vmovd %eax, %xmm0 2383 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 2384 ; AVX1-NEXT: retq 2385 ; 2386 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16: 2387 ; AVX2: # BB#0: 2388 ; AVX2-NEXT: movswl (%rdi), %eax 2389 ; AVX2-NEXT: shrl $16, %eax 2390 ; AVX2-NEXT: vmovd %eax, %xmm0 2391 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2392 ; AVX2-NEXT: retq 2393 %tmp = load i16, i16* %ptr, align 2 2394 %tmp1 = sext i16 %tmp to i32 2395 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1 2396 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 2397 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2398 ret <8 x i16> %tmp4 2399 } 2400