1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2OR512VL,AVX2,AVX2-SLOW 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2OR512VL,AVX2,AVX2-FAST 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX2OR512VL,AVX512VL 10 11 define <4 x i32> @shuffle_v4i32_0001(<4 x i32> %a, <4 x i32> %b) { 12 ; SSE-LABEL: shuffle_v4i32_0001: 13 ; SSE: # %bb.0: 14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 15 ; SSE-NEXT: retq 16 ; 17 ; AVX-LABEL: shuffle_v4i32_0001: 18 ; AVX: # %bb.0: 19 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1] 20 ; AVX-NEXT: retq 21 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 22 ret <4 x i32> %shuffle 23 } 24 define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) { 25 ; SSE-LABEL: shuffle_v4i32_0020: 26 ; SSE: # %bb.0: 27 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,0] 28 ; SSE-NEXT: retq 29 ; 30 ; AVX-LABEL: shuffle_v4i32_0020: 31 ; AVX: # %bb.0: 32 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0] 33 ; AVX-NEXT: retq 34 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 35 ret <4 x i32> %shuffle 36 } 37 define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) { 38 ; SSE-LABEL: shuffle_v4i32_0112: 39 ; SSE: # %bb.0: 40 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2] 41 ; SSE-NEXT: retq 42 ; 43 ; AVX-LABEL: shuffle_v4i32_0112: 44 ; AVX: # %bb.0: 45 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,2] 46 ; AVX-NEXT: retq 47 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2> 48 ret <4 x i32> %shuffle 49 } 50 define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) { 51 ; SSE-LABEL: shuffle_v4i32_0300: 52 ; SSE: # %bb.0: 53 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,0,0] 54 ; SSE-NEXT: retq 55 ; 56 ; AVX-LABEL: shuffle_v4i32_0300: 57 ; AVX: # %bb.0: 58 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0] 59 ; AVX-NEXT: retq 60 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 61 ret <4 x i32> %shuffle 62 } 63 define <4 x i32> @shuffle_v4i32_1000(<4 x i32> %a, <4 x i32> %b) { 64 ; SSE-LABEL: shuffle_v4i32_1000: 65 ; SSE: # %bb.0: 66 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] 67 ; SSE-NEXT: retq 68 ; 69 ; AVX-LABEL: shuffle_v4i32_1000: 70 ; AVX: # %bb.0: 71 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0] 72 ; AVX-NEXT: retq 73 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 74 ret <4 x i32> %shuffle 75 } 76 define <4 x i32> @shuffle_v4i32_2200(<4 x i32> %a, <4 x i32> %b) { 77 ; SSE-LABEL: shuffle_v4i32_2200: 78 ; SSE: # %bb.0: 79 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,0,0] 80 ; SSE-NEXT: retq 81 ; 82 ; AVX-LABEL: shuffle_v4i32_2200: 83 ; AVX: # %bb.0: 84 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0] 85 ; AVX-NEXT: retq 86 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 87 ret <4 x i32> %shuffle 88 } 89 define <4 x i32> @shuffle_v4i32_3330(<4 x i32> %a, <4 x i32> %b) { 90 ; SSE-LABEL: shuffle_v4i32_3330: 91 ; SSE: # %bb.0: 92 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,0] 93 ; SSE-NEXT: retq 94 ; 95 ; AVX-LABEL: shuffle_v4i32_3330: 96 ; AVX: # %bb.0: 97 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0] 98 ; AVX-NEXT: retq 99 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 100 ret <4 x i32> %shuffle 101 } 102 define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) { 103 ; SSE-LABEL: shuffle_v4i32_3210: 104 ; SSE: # %bb.0: 105 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 106 ; SSE-NEXT: retq 107 ; 108 ; AVX-LABEL: shuffle_v4i32_3210: 109 ; AVX: # %bb.0: 110 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 111 ; AVX-NEXT: retq 112 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 113 ret <4 x i32> %shuffle 114 } 115 116 define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) { 117 ; SSE-LABEL: shuffle_v4i32_2121: 118 ; SSE: # %bb.0: 119 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,1] 120 ; SSE-NEXT: retq 121 ; 122 ; AVX-LABEL: shuffle_v4i32_2121: 123 ; AVX: # %bb.0: 124 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,2,1] 125 ; AVX-NEXT: retq 126 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1> 127 ret <4 x i32> %shuffle 128 } 129 130 define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) { 131 ; SSE-LABEL: shuffle_v4f32_0001: 132 ; SSE: # %bb.0: 133 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,1] 134 ; SSE-NEXT: retq 135 ; 136 ; AVX-LABEL: shuffle_v4f32_0001: 137 ; AVX: # %bb.0: 138 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1] 139 ; AVX-NEXT: retq 140 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 141 ret <4 x float> %shuffle 142 } 143 define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) { 144 ; SSE-LABEL: shuffle_v4f32_0020: 145 ; SSE: # %bb.0: 146 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,0] 147 ; SSE-NEXT: retq 148 ; 149 ; AVX-LABEL: shuffle_v4f32_0020: 150 ; AVX: # %bb.0: 151 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0] 152 ; AVX-NEXT: retq 153 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 154 ret <4 x float> %shuffle 155 } 156 define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) { 157 ; SSE-LABEL: shuffle_v4f32_0300: 158 ; SSE: # %bb.0: 159 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,0,0] 160 ; SSE-NEXT: retq 161 ; 162 ; AVX-LABEL: shuffle_v4f32_0300: 163 ; AVX: # %bb.0: 164 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0] 165 ; AVX-NEXT: retq 166 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 167 ret <4 x float> %shuffle 168 } 169 define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) { 170 ; SSE-LABEL: shuffle_v4f32_1000: 171 ; SSE: # %bb.0: 172 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0,0,0] 173 ; SSE-NEXT: retq 174 ; 175 ; AVX-LABEL: shuffle_v4f32_1000: 176 ; AVX: # %bb.0: 177 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0] 178 ; AVX-NEXT: retq 179 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 180 ret <4 x float> %shuffle 181 } 182 define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) { 183 ; SSE-LABEL: shuffle_v4f32_2200: 184 ; SSE: # %bb.0: 185 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2,0,0] 186 ; SSE-NEXT: retq 187 ; 188 ; AVX-LABEL: shuffle_v4f32_2200: 189 ; AVX: # %bb.0: 190 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0] 191 ; AVX-NEXT: retq 192 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 193 ret <4 x float> %shuffle 194 } 195 define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) { 196 ; SSE-LABEL: shuffle_v4f32_3330: 197 ; SSE: # %bb.0: 198 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,0] 199 ; SSE-NEXT: retq 200 ; 201 ; AVX-LABEL: shuffle_v4f32_3330: 202 ; AVX: # %bb.0: 203 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0] 204 ; AVX-NEXT: retq 205 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 206 ret <4 x float> %shuffle 207 } 208 define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) { 209 ; SSE-LABEL: shuffle_v4f32_3210: 210 ; SSE: # %bb.0: 211 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] 212 ; SSE-NEXT: retq 213 ; 214 ; AVX-LABEL: shuffle_v4f32_3210: 215 ; AVX: # %bb.0: 216 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 217 ; AVX-NEXT: retq 218 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 219 ret <4 x float> %shuffle 220 } 221 define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) { 222 ; SSE-LABEL: shuffle_v4f32_0011: 223 ; SSE: # %bb.0: 224 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1] 225 ; SSE-NEXT: retq 226 ; 227 ; AVX-LABEL: shuffle_v4f32_0011: 228 ; AVX: # %bb.0: 229 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1] 230 ; AVX-NEXT: retq 231 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 232 ret <4 x float> %shuffle 233 } 234 define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) { 235 ; SSE-LABEL: shuffle_v4f32_2233: 236 ; SSE: # %bb.0: 237 ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3] 238 ; SSE-NEXT: retq 239 ; 240 ; AVX-LABEL: shuffle_v4f32_2233: 241 ; AVX: # %bb.0: 242 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 243 ; AVX-NEXT: retq 244 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 245 ret <4 x float> %shuffle 246 } 247 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) { 248 ; SSE2-LABEL: shuffle_v4f32_0022: 249 ; SSE2: # %bb.0: 250 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2] 251 ; SSE2-NEXT: retq 252 ; 253 ; SSE3-LABEL: shuffle_v4f32_0022: 254 ; SSE3: # %bb.0: 255 ; SSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] 256 ; SSE3-NEXT: retq 257 ; 258 ; SSSE3-LABEL: shuffle_v4f32_0022: 259 ; SSSE3: # %bb.0: 260 ; SSSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] 261 ; SSSE3-NEXT: retq 262 ; 263 ; SSE41-LABEL: shuffle_v4f32_0022: 264 ; SSE41: # %bb.0: 265 ; SSE41-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] 266 ; SSE41-NEXT: retq 267 ; 268 ; AVX-LABEL: shuffle_v4f32_0022: 269 ; AVX: # %bb.0: 270 ; AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] 271 ; AVX-NEXT: retq 272 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 273 ret <4 x float> %shuffle 274 } 275 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) { 276 ; SSE2-LABEL: shuffle_v4f32_1133: 277 ; SSE2: # %bb.0: 278 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3] 279 ; SSE2-NEXT: retq 280 ; 281 ; SSE3-LABEL: shuffle_v4f32_1133: 282 ; SSE3: # %bb.0: 283 ; SSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 284 ; SSE3-NEXT: retq 285 ; 286 ; SSSE3-LABEL: shuffle_v4f32_1133: 287 ; SSSE3: # %bb.0: 288 ; SSSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 289 ; SSSE3-NEXT: retq 290 ; 291 ; SSE41-LABEL: shuffle_v4f32_1133: 292 ; SSE41: # %bb.0: 293 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 294 ; SSE41-NEXT: retq 295 ; 296 ; AVX-LABEL: shuffle_v4f32_1133: 297 ; AVX: # %bb.0: 298 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 299 ; AVX-NEXT: retq 300 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 301 ret <4 x float> %shuffle 302 } 303 304 define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) { 305 ; SSE-LABEL: shuffle_v4f32_0145: 306 ; SSE: # %bb.0: 307 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 308 ; SSE-NEXT: retq 309 ; 310 ; AVX-LABEL: shuffle_v4f32_0145: 311 ; AVX: # %bb.0: 312 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 313 ; AVX-NEXT: retq 314 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 315 ret <4 x float> %shuffle 316 } 317 318 define <4 x float> @shuffle_v4f32_6723(<4 x float> %a, <4 x float> %b) { 319 ; SSE-LABEL: shuffle_v4f32_6723: 320 ; SSE: # %bb.0: 321 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] 322 ; SSE-NEXT: retq 323 ; 324 ; AVX-LABEL: shuffle_v4f32_6723: 325 ; AVX: # %bb.0: 326 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] 327 ; AVX-NEXT: retq 328 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 329 ret <4 x float> %shuffle 330 } 331 332 define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) { 333 ; SSE2-LABEL: shuffle_v4i32_0124: 334 ; SSE2: # %bb.0: 335 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 336 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 337 ; SSE2-NEXT: retq 338 ; 339 ; SSE3-LABEL: shuffle_v4i32_0124: 340 ; SSE3: # %bb.0: 341 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 342 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 343 ; SSE3-NEXT: retq 344 ; 345 ; SSSE3-LABEL: shuffle_v4i32_0124: 346 ; SSSE3: # %bb.0: 347 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 348 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 349 ; SSSE3-NEXT: retq 350 ; 351 ; SSE41-LABEL: shuffle_v4i32_0124: 352 ; SSE41: # %bb.0: 353 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 354 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] 355 ; SSE41-NEXT: retq 356 ; 357 ; AVX1-LABEL: shuffle_v4i32_0124: 358 ; AVX1: # %bb.0: 359 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,2,0] 360 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 361 ; AVX1-NEXT: retq 362 ; 363 ; AVX2OR512VL-LABEL: shuffle_v4i32_0124: 364 ; AVX2OR512VL: # %bb.0: 365 ; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1 366 ; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 367 ; AVX2OR512VL-NEXT: retq 368 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 369 ret <4 x i32> %shuffle 370 } 371 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) { 372 ; SSE2-LABEL: shuffle_v4i32_0142: 373 ; SSE2: # %bb.0: 374 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 375 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 376 ; SSE2-NEXT: retq 377 ; 378 ; SSE3-LABEL: shuffle_v4i32_0142: 379 ; SSE3: # %bb.0: 380 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 381 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 382 ; SSE3-NEXT: retq 383 ; 384 ; SSSE3-LABEL: shuffle_v4i32_0142: 385 ; SSSE3: # %bb.0: 386 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 387 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 388 ; SSSE3-NEXT: retq 389 ; 390 ; SSE41-LABEL: shuffle_v4i32_0142: 391 ; SSE41: # %bb.0: 392 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 393 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2] 394 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 395 ; SSE41-NEXT: retq 396 ; 397 ; AVX1-LABEL: shuffle_v4i32_0142: 398 ; AVX1: # %bb.0: 399 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,1] 400 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,2] 401 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 402 ; AVX1-NEXT: retq 403 ; 404 ; AVX2OR512VL-LABEL: shuffle_v4i32_0142: 405 ; AVX2OR512VL: # %bb.0: 406 ; AVX2OR512VL-NEXT: vpbroadcastq %xmm1, %xmm1 407 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2] 408 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 409 ; AVX2OR512VL-NEXT: retq 410 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 411 ret <4 x i32> %shuffle 412 } 413 define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) { 414 ; SSE2-LABEL: shuffle_v4i32_0412: 415 ; SSE2: # %bb.0: 416 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] 417 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2] 418 ; SSE2-NEXT: movaps %xmm1, %xmm0 419 ; SSE2-NEXT: retq 420 ; 421 ; SSE3-LABEL: shuffle_v4i32_0412: 422 ; SSE3: # %bb.0: 423 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] 424 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2] 425 ; SSE3-NEXT: movaps %xmm1, %xmm0 426 ; SSE3-NEXT: retq 427 ; 428 ; SSSE3-LABEL: shuffle_v4i32_0412: 429 ; SSSE3: # %bb.0: 430 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] 431 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2] 432 ; SSSE3-NEXT: movaps %xmm1, %xmm0 433 ; SSSE3-NEXT: retq 434 ; 435 ; SSE41-LABEL: shuffle_v4i32_0412: 436 ; SSE41: # %bb.0: 437 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 438 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2] 439 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 440 ; SSE41-NEXT: retq 441 ; 442 ; AVX1-LABEL: shuffle_v4i32_0412: 443 ; AVX1: # %bb.0: 444 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] 445 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,2] 446 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 447 ; AVX1-NEXT: retq 448 ; 449 ; AVX2OR512VL-LABEL: shuffle_v4i32_0412: 450 ; AVX2OR512VL: # %bb.0: 451 ; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1 452 ; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,2] 453 ; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 454 ; AVX2OR512VL-NEXT: retq 455 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2> 456 ret <4 x i32> %shuffle 457 } 458 define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) { 459 ; SSE2-LABEL: shuffle_v4i32_4012: 460 ; SSE2: # %bb.0: 461 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] 462 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] 463 ; SSE2-NEXT: movaps %xmm1, %xmm0 464 ; SSE2-NEXT: retq 465 ; 466 ; SSE3-LABEL: shuffle_v4i32_4012: 467 ; SSE3: # %bb.0: 468 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] 469 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] 470 ; SSE3-NEXT: movaps %xmm1, %xmm0 471 ; SSE3-NEXT: retq 472 ; 473 ; SSSE3-LABEL: shuffle_v4i32_4012: 474 ; SSSE3: # %bb.0: 475 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] 476 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] 477 ; SSSE3-NEXT: movaps %xmm1, %xmm0 478 ; SSSE3-NEXT: retq 479 ; 480 ; SSE41-LABEL: shuffle_v4i32_4012: 481 ; SSE41: # %bb.0: 482 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,2] 483 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 484 ; SSE41-NEXT: retq 485 ; 486 ; AVX-LABEL: shuffle_v4i32_4012: 487 ; AVX: # %bb.0: 488 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,2] 489 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 490 ; AVX-NEXT: retq 491 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2> 492 ret <4 x i32> %shuffle 493 } 494 define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) { 495 ; SSE-LABEL: shuffle_v4i32_0145: 496 ; SSE: # %bb.0: 497 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 498 ; SSE-NEXT: retq 499 ; 500 ; AVX-LABEL: shuffle_v4i32_0145: 501 ; AVX: # %bb.0: 502 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 503 ; AVX-NEXT: retq 504 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 505 ret <4 x i32> %shuffle 506 } 507 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) { 508 ; SSE2-LABEL: shuffle_v4i32_0451: 509 ; SSE2: # %bb.0: 510 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 511 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 512 ; SSE2-NEXT: retq 513 ; 514 ; SSE3-LABEL: shuffle_v4i32_0451: 515 ; SSE3: # %bb.0: 516 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 517 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 518 ; SSE3-NEXT: retq 519 ; 520 ; SSSE3-LABEL: shuffle_v4i32_0451: 521 ; SSSE3: # %bb.0: 522 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 523 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 524 ; SSSE3-NEXT: retq 525 ; 526 ; SSE41-LABEL: shuffle_v4i32_0451: 527 ; SSE41: # %bb.0: 528 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 529 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 530 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7] 531 ; SSE41-NEXT: retq 532 ; 533 ; AVX1-LABEL: shuffle_v4i32_0451: 534 ; AVX1: # %bb.0: 535 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] 536 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] 537 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 538 ; AVX1-NEXT: retq 539 ; 540 ; AVX2OR512VL-LABEL: shuffle_v4i32_0451: 541 ; AVX2OR512VL: # %bb.0: 542 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 543 ; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %xmm0 544 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 545 ; AVX2OR512VL-NEXT: retq 546 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> 547 ret <4 x i32> %shuffle 548 } 549 define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) { 550 ; SSE-LABEL: shuffle_v4i32_4501: 551 ; SSE: # %bb.0: 552 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 553 ; SSE-NEXT: movaps %xmm1, %xmm0 554 ; SSE-NEXT: retq 555 ; 556 ; AVX-LABEL: shuffle_v4i32_4501: 557 ; AVX: # %bb.0: 558 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 559 ; AVX-NEXT: retq 560 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 561 ret <4 x i32> %shuffle 562 } 563 define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) { 564 ; SSE2-LABEL: shuffle_v4i32_4015: 565 ; SSE2: # %bb.0: 566 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 567 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 568 ; SSE2-NEXT: retq 569 ; 570 ; SSE3-LABEL: shuffle_v4i32_4015: 571 ; SSE3: # %bb.0: 572 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 573 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 574 ; SSE3-NEXT: retq 575 ; 576 ; SSSE3-LABEL: shuffle_v4i32_4015: 577 ; SSSE3: # %bb.0: 578 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 579 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3] 580 ; SSSE3-NEXT: retq 581 ; 582 ; SSE41-LABEL: shuffle_v4i32_4015: 583 ; SSE41: # %bb.0: 584 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 585 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 586 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] 587 ; SSE41-NEXT: retq 588 ; 589 ; AVX1-LABEL: shuffle_v4i32_4015: 590 ; AVX1: # %bb.0: 591 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,1] 592 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1] 593 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 594 ; AVX1-NEXT: retq 595 ; 596 ; AVX2OR512VL-LABEL: shuffle_v4i32_4015: 597 ; AVX2OR512VL: # %bb.0: 598 ; AVX2OR512VL-NEXT: vpbroadcastq %xmm1, %xmm1 599 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 600 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 601 ; AVX2OR512VL-NEXT: retq 602 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5> 603 ret <4 x i32> %shuffle 604 } 605 606 define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) { 607 ; SSE2-LABEL: shuffle_v4f32_4zzz: 608 ; SSE2: # %bb.0: 609 ; SSE2-NEXT: xorps %xmm1, %xmm1 610 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 611 ; SSE2-NEXT: movaps %xmm1, %xmm0 612 ; SSE2-NEXT: retq 613 ; 614 ; SSE3-LABEL: shuffle_v4f32_4zzz: 615 ; SSE3: # %bb.0: 616 ; SSE3-NEXT: xorps %xmm1, %xmm1 617 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 618 ; SSE3-NEXT: movaps %xmm1, %xmm0 619 ; SSE3-NEXT: retq 620 ; 621 ; SSSE3-LABEL: shuffle_v4f32_4zzz: 622 ; SSSE3: # %bb.0: 623 ; SSSE3-NEXT: xorps %xmm1, %xmm1 624 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 625 ; SSSE3-NEXT: movaps %xmm1, %xmm0 626 ; SSSE3-NEXT: retq 627 ; 628 ; SSE41-LABEL: shuffle_v4f32_4zzz: 629 ; SSE41: # %bb.0: 630 ; SSE41-NEXT: xorps %xmm1, %xmm1 631 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 632 ; SSE41-NEXT: retq 633 ; 634 ; AVX-LABEL: shuffle_v4f32_4zzz: 635 ; AVX: # %bb.0: 636 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 637 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 638 ; AVX-NEXT: retq 639 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 640 ret <4 x float> %shuffle 641 } 642 643 define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) { 644 ; SSE2-LABEL: shuffle_v4f32_z4zz: 645 ; SSE2: # %bb.0: 646 ; SSE2-NEXT: xorps %xmm1, %xmm1 647 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 648 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 649 ; SSE2-NEXT: retq 650 ; 651 ; SSE3-LABEL: shuffle_v4f32_z4zz: 652 ; SSE3: # %bb.0: 653 ; SSE3-NEXT: xorps %xmm1, %xmm1 654 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 655 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 656 ; SSE3-NEXT: retq 657 ; 658 ; SSSE3-LABEL: shuffle_v4f32_z4zz: 659 ; SSSE3: # %bb.0: 660 ; SSSE3-NEXT: xorps %xmm1, %xmm1 661 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 662 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 663 ; SSSE3-NEXT: retq 664 ; 665 ; SSE41-LABEL: shuffle_v4f32_z4zz: 666 ; SSE41: # %bb.0: 667 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero 668 ; SSE41-NEXT: retq 669 ; 670 ; AVX-LABEL: shuffle_v4f32_z4zz: 671 ; AVX: # %bb.0: 672 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero 673 ; AVX-NEXT: retq 674 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0> 675 ret <4 x float> %shuffle 676 } 677 678 define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) { 679 ; SSE2-LABEL: shuffle_v4f32_zz4z: 680 ; SSE2: # %bb.0: 681 ; SSE2-NEXT: xorps %xmm1, %xmm1 682 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 683 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 684 ; SSE2-NEXT: movaps %xmm1, %xmm0 685 ; SSE2-NEXT: retq 686 ; 687 ; SSE3-LABEL: shuffle_v4f32_zz4z: 688 ; SSE3: # %bb.0: 689 ; SSE3-NEXT: xorps %xmm1, %xmm1 690 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 691 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 692 ; SSE3-NEXT: movaps %xmm1, %xmm0 693 ; SSE3-NEXT: retq 694 ; 695 ; SSSE3-LABEL: shuffle_v4f32_zz4z: 696 ; SSSE3: # %bb.0: 697 ; SSSE3-NEXT: xorps %xmm1, %xmm1 698 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 699 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 700 ; SSSE3-NEXT: movaps %xmm1, %xmm0 701 ; SSSE3-NEXT: retq 702 ; 703 ; SSE41-LABEL: shuffle_v4f32_zz4z: 704 ; SSE41: # %bb.0: 705 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero 706 ; SSE41-NEXT: retq 707 ; 708 ; AVX-LABEL: shuffle_v4f32_zz4z: 709 ; AVX: # %bb.0: 710 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero 711 ; AVX-NEXT: retq 712 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0> 713 ret <4 x float> %shuffle 714 } 715 716 define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) { 717 ; SSE2-LABEL: shuffle_v4f32_zuu4: 718 ; SSE2: # %bb.0: 719 ; SSE2-NEXT: xorps %xmm1, %xmm1 720 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 721 ; SSE2-NEXT: movaps %xmm1, %xmm0 722 ; SSE2-NEXT: retq 723 ; 724 ; SSE3-LABEL: shuffle_v4f32_zuu4: 725 ; SSE3: # %bb.0: 726 ; SSE3-NEXT: xorps %xmm1, %xmm1 727 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 728 ; SSE3-NEXT: movaps %xmm1, %xmm0 729 ; SSE3-NEXT: retq 730 ; 731 ; SSSE3-LABEL: shuffle_v4f32_zuu4: 732 ; SSSE3: # %bb.0: 733 ; SSSE3-NEXT: xorps %xmm1, %xmm1 734 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 735 ; SSSE3-NEXT: movaps %xmm1, %xmm0 736 ; SSSE3-NEXT: retq 737 ; 738 ; SSE41-LABEL: shuffle_v4f32_zuu4: 739 ; SSE41: # %bb.0: 740 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0] 741 ; SSE41-NEXT: retq 742 ; 743 ; AVX-LABEL: shuffle_v4f32_zuu4: 744 ; AVX: # %bb.0: 745 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0] 746 ; AVX-NEXT: retq 747 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4> 748 ret <4 x float> %shuffle 749 } 750 751 define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) { 752 ; SSE2-LABEL: shuffle_v4f32_zzz7: 753 ; SSE2: # %bb.0: 754 ; SSE2-NEXT: xorps %xmm1, %xmm1 755 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 756 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 757 ; SSE2-NEXT: movaps %xmm1, %xmm0 758 ; SSE2-NEXT: retq 759 ; 760 ; SSE3-LABEL: shuffle_v4f32_zzz7: 761 ; SSE3: # %bb.0: 762 ; SSE3-NEXT: xorps %xmm1, %xmm1 763 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 764 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 765 ; SSE3-NEXT: movaps %xmm1, %xmm0 766 ; SSE3-NEXT: retq 767 ; 768 ; SSSE3-LABEL: shuffle_v4f32_zzz7: 769 ; SSSE3: # %bb.0: 770 ; SSSE3-NEXT: xorps %xmm1, %xmm1 771 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 772 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 773 ; SSSE3-NEXT: movaps %xmm1, %xmm0 774 ; SSSE3-NEXT: retq 775 ; 776 ; SSE41-LABEL: shuffle_v4f32_zzz7: 777 ; SSE41: # %bb.0: 778 ; SSE41-NEXT: xorps %xmm1, %xmm1 779 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 780 ; SSE41-NEXT: retq 781 ; 782 ; AVX-LABEL: shuffle_v4f32_zzz7: 783 ; AVX: # %bb.0: 784 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 785 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 786 ; AVX-NEXT: retq 787 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 788 ret <4 x float> %shuffle 789 } 790 791 define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) { 792 ; SSE2-LABEL: shuffle_v4f32_z6zz: 793 ; SSE2: # %bb.0: 794 ; SSE2-NEXT: xorps %xmm1, %xmm1 795 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 796 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 797 ; SSE2-NEXT: retq 798 ; 799 ; SSE3-LABEL: shuffle_v4f32_z6zz: 800 ; SSE3: # %bb.0: 801 ; SSE3-NEXT: xorps %xmm1, %xmm1 802 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 803 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 804 ; SSE3-NEXT: retq 805 ; 806 ; SSSE3-LABEL: shuffle_v4f32_z6zz: 807 ; SSSE3: # %bb.0: 808 ; SSSE3-NEXT: xorps %xmm1, %xmm1 809 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 810 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 811 ; SSSE3-NEXT: retq 812 ; 813 ; SSE41-LABEL: shuffle_v4f32_z6zz: 814 ; SSE41: # %bb.0: 815 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero 816 ; SSE41-NEXT: retq 817 ; 818 ; AVX-LABEL: shuffle_v4f32_z6zz: 819 ; AVX: # %bb.0: 820 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero 821 ; AVX-NEXT: retq 822 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3> 823 ret <4 x float> %shuffle 824 } 825 826 define <4 x float> @shuffle_v4f32_0z23(<4 x float> %a) { 827 ; SSE2-LABEL: shuffle_v4f32_0z23: 828 ; SSE2: # %bb.0: 829 ; SSE2-NEXT: xorps %xmm1, %xmm1 830 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 831 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 832 ; SSE2-NEXT: movaps %xmm1, %xmm0 833 ; SSE2-NEXT: retq 834 ; 835 ; SSE3-LABEL: shuffle_v4f32_0z23: 836 ; SSE3: # %bb.0: 837 ; SSE3-NEXT: xorps %xmm1, %xmm1 838 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 839 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 840 ; SSE3-NEXT: movaps %xmm1, %xmm0 841 ; SSE3-NEXT: retq 842 ; 843 ; SSSE3-LABEL: shuffle_v4f32_0z23: 844 ; SSSE3: # %bb.0: 845 ; SSSE3-NEXT: xorps %xmm1, %xmm1 846 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 847 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 848 ; SSSE3-NEXT: movaps %xmm1, %xmm0 849 ; SSSE3-NEXT: retq 850 ; 851 ; SSE41-LABEL: shuffle_v4f32_0z23: 852 ; SSE41: # %bb.0: 853 ; SSE41-NEXT: xorps %xmm1, %xmm1 854 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 855 ; SSE41-NEXT: retq 856 ; 857 ; AVX-LABEL: shuffle_v4f32_0z23: 858 ; AVX: # %bb.0: 859 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 860 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 861 ; AVX-NEXT: retq 862 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 863 ret <4 x float> %shuffle 864 } 865 866 define <4 x float> @shuffle_v4f32_01z3(<4 x float> %a) { 867 ; SSE2-LABEL: shuffle_v4f32_01z3: 868 ; SSE2: # %bb.0: 869 ; SSE2-NEXT: xorps %xmm1, %xmm1 870 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0] 871 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 872 ; SSE2-NEXT: retq 873 ; 874 ; SSE3-LABEL: shuffle_v4f32_01z3: 875 ; SSE3: # %bb.0: 876 ; SSE3-NEXT: xorps %xmm1, %xmm1 877 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0] 878 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 879 ; SSE3-NEXT: retq 880 ; 881 ; SSSE3-LABEL: shuffle_v4f32_01z3: 882 ; SSSE3: # %bb.0: 883 ; SSSE3-NEXT: xorps %xmm1, %xmm1 884 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0] 885 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 886 ; SSSE3-NEXT: retq 887 ; 888 ; SSE41-LABEL: shuffle_v4f32_01z3: 889 ; SSE41: # %bb.0: 890 ; SSE41-NEXT: xorps %xmm1, %xmm1 891 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 892 ; SSE41-NEXT: retq 893 ; 894 ; AVX-LABEL: shuffle_v4f32_01z3: 895 ; AVX: # %bb.0: 896 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 897 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 898 ; AVX-NEXT: retq 899 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3> 900 ret <4 x float> %shuffle 901 } 902 903 define <4 x float> @shuffle_v4f32_012z(<4 x float> %a) { 904 ; SSE2-LABEL: shuffle_v4f32_012z: 905 ; SSE2: # %bb.0: 906 ; SSE2-NEXT: xorps %xmm1, %xmm1 907 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0] 908 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 909 ; SSE2-NEXT: retq 910 ; 911 ; SSE3-LABEL: shuffle_v4f32_012z: 912 ; SSE3: # %bb.0: 913 ; SSE3-NEXT: xorps %xmm1, %xmm1 914 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0] 915 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 916 ; SSE3-NEXT: retq 917 ; 918 ; SSSE3-LABEL: shuffle_v4f32_012z: 919 ; SSSE3: # %bb.0: 920 ; SSSE3-NEXT: xorps %xmm1, %xmm1 921 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0] 922 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 923 ; SSSE3-NEXT: retq 924 ; 925 ; SSE41-LABEL: shuffle_v4f32_012z: 926 ; SSE41: # %bb.0: 927 ; SSE41-NEXT: xorps %xmm1, %xmm1 928 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 929 ; SSE41-NEXT: retq 930 ; 931 ; AVX-LABEL: shuffle_v4f32_012z: 932 ; AVX: # %bb.0: 933 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 934 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 935 ; AVX-NEXT: retq 936 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 937 ret <4 x float> %shuffle 938 } 939 940 define <4 x float> @shuffle_v4f32_0zz3(<4 x float> %a) { 941 ; SSE2-LABEL: shuffle_v4f32_0zz3: 942 ; SSE2: # %bb.0: 943 ; SSE2-NEXT: xorps %xmm1, %xmm1 944 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2] 945 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1] 946 ; SSE2-NEXT: retq 947 ; 948 ; SSE3-LABEL: shuffle_v4f32_0zz3: 949 ; SSE3: # %bb.0: 950 ; SSE3-NEXT: xorps %xmm1, %xmm1 951 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2] 952 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1] 953 ; SSE3-NEXT: retq 954 ; 955 ; SSSE3-LABEL: shuffle_v4f32_0zz3: 956 ; SSSE3: # %bb.0: 957 ; SSSE3-NEXT: xorps %xmm1, %xmm1 958 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2] 959 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1] 960 ; SSSE3-NEXT: retq 961 ; 962 ; SSE41-LABEL: shuffle_v4f32_0zz3: 963 ; SSE41: # %bb.0: 964 ; SSE41-NEXT: xorps %xmm1, %xmm1 965 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 966 ; SSE41-NEXT: retq 967 ; 968 ; AVX-LABEL: shuffle_v4f32_0zz3: 969 ; AVX: # %bb.0: 970 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 971 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 972 ; AVX-NEXT: retq 973 %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3> 974 ret <4 x float> %shuffle 975 } 976 977 define <4 x float> @shuffle_v4f32_0z2z(<4 x float> %v) { 978 ; SSE2-LABEL: shuffle_v4f32_0z2z: 979 ; SSE2: # %bb.0: 980 ; SSE2-NEXT: xorps %xmm1, %xmm1 981 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0] 982 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 983 ; SSE2-NEXT: retq 984 ; 985 ; SSE3-LABEL: shuffle_v4f32_0z2z: 986 ; SSE3: # %bb.0: 987 ; SSE3-NEXT: xorps %xmm1, %xmm1 988 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0] 989 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 990 ; SSE3-NEXT: retq 991 ; 992 ; SSSE3-LABEL: shuffle_v4f32_0z2z: 993 ; SSSE3: # %bb.0: 994 ; SSSE3-NEXT: xorps %xmm1, %xmm1 995 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0] 996 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 997 ; SSSE3-NEXT: retq 998 ; 999 ; SSE41-LABEL: shuffle_v4f32_0z2z: 1000 ; SSE41: # %bb.0: 1001 ; SSE41-NEXT: xorps %xmm1, %xmm1 1002 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1003 ; SSE41-NEXT: retq 1004 ; 1005 ; AVX-LABEL: shuffle_v4f32_0z2z: 1006 ; AVX: # %bb.0: 1007 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1008 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1009 ; AVX-NEXT: retq 1010 %shuffle = shufflevector <4 x float> %v, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 2, i32 4> 1011 ret <4 x float> %shuffle 1012 } 1013 1014 define <4 x float> @shuffle_v4f32_u051(<4 x float> %a, <4 x float> %b) { 1015 ; SSE-LABEL: shuffle_v4f32_u051: 1016 ; SSE: # %bb.0: 1017 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1018 ; SSE-NEXT: movaps %xmm1, %xmm0 1019 ; SSE-NEXT: retq 1020 ; 1021 ; AVX-LABEL: shuffle_v4f32_u051: 1022 ; AVX: # %bb.0: 1023 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1024 ; AVX-NEXT: retq 1025 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 undef, i32 0, i32 5, i32 1> 1026 ret <4 x float> %shuffle 1027 } 1028 1029 define <4 x float> @shuffle_v4f32_0zz4(<4 x float> %a, <4 x float> %b) { 1030 ; SSE2-LABEL: shuffle_v4f32_0zz4: 1031 ; SSE2: # %bb.0: 1032 ; SSE2-NEXT: xorps %xmm2, %xmm2 1033 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0] 1034 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0] 1035 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 1036 ; SSE2-NEXT: movaps %xmm2, %xmm0 1037 ; SSE2-NEXT: retq 1038 ; 1039 ; SSE3-LABEL: shuffle_v4f32_0zz4: 1040 ; SSE3: # %bb.0: 1041 ; SSE3-NEXT: xorps %xmm2, %xmm2 1042 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0] 1043 ; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0] 1044 ; SSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 1045 ; SSE3-NEXT: movaps %xmm2, %xmm0 1046 ; SSE3-NEXT: retq 1047 ; 1048 ; SSSE3-LABEL: shuffle_v4f32_0zz4: 1049 ; SSSE3: # %bb.0: 1050 ; SSSE3-NEXT: xorps %xmm2, %xmm2 1051 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,0] 1052 ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0] 1053 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 1054 ; SSSE3-NEXT: movaps %xmm2, %xmm0 1055 ; SSSE3-NEXT: retq 1056 ; 1057 ; SSE41-LABEL: shuffle_v4f32_0zz4: 1058 ; SSE41: # %bb.0: 1059 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] 1060 ; SSE41-NEXT: retq 1061 ; 1062 ; AVX-LABEL: shuffle_v4f32_0zz4: 1063 ; AVX: # %bb.0: 1064 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] 1065 ; AVX-NEXT: retq 1066 %shuffle = shufflevector <4 x float> %b, <4 x float> zeroinitializer, <4 x i32> <i32 undef, i32 5, i32 6, i32 0> 1067 %shuffle1 = shufflevector <4 x float> %a, <4 x float> %shuffle, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1068 ret <4 x float> %shuffle1 1069 } 1070 1071 define <4 x float> @shuffle_v4f32_0zz6(<4 x float> %a, <4 x float> %b) { 1072 ; SSE2-LABEL: shuffle_v4f32_0zz6: 1073 ; SSE2: # %bb.0: 1074 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2] 1075 ; SSE2-NEXT: xorps %xmm1, %xmm1 1076 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3] 1077 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0,1,3] 1078 ; SSE2-NEXT: movaps %xmm1, %xmm0 1079 ; SSE2-NEXT: retq 1080 ; 1081 ; SSE3-LABEL: shuffle_v4f32_0zz6: 1082 ; SSE3: # %bb.0: 1083 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2] 1084 ; SSE3-NEXT: xorps %xmm1, %xmm1 1085 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3] 1086 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0,1,3] 1087 ; SSE3-NEXT: movaps %xmm1, %xmm0 1088 ; SSE3-NEXT: retq 1089 ; 1090 ; SSSE3-LABEL: shuffle_v4f32_0zz6: 1091 ; SSSE3: # %bb.0: 1092 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2] 1093 ; SSSE3-NEXT: xorps %xmm1, %xmm1 1094 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[0,3] 1095 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0,1,3] 1096 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1097 ; SSSE3-NEXT: retq 1098 ; 1099 ; SSE41-LABEL: shuffle_v4f32_0zz6: 1100 ; SSE41: # %bb.0: 1101 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2] 1102 ; SSE41-NEXT: retq 1103 ; 1104 ; AVX-LABEL: shuffle_v4f32_0zz6: 1105 ; AVX: # %bb.0: 1106 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2] 1107 ; AVX-NEXT: retq 1108 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 6> 1109 %shuffle1 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuffle, <4 x i32> <i32 4, i32 1, i32 2, i32 7> 1110 ret <4 x float> %shuffle1 1111 } 1112 1113 define <4 x float> @shuffle_v4f32_0z24(<4 x float> %a, <4 x float> %b) { 1114 ; SSE2-LABEL: shuffle_v4f32_0z24: 1115 ; SSE2: # %bb.0: 1116 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 1117 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 1118 ; SSE2-NEXT: xorps %xmm1, %xmm1 1119 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 1120 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 1121 ; SSE2-NEXT: movaps %xmm1, %xmm0 1122 ; SSE2-NEXT: retq 1123 ; 1124 ; SSE3-LABEL: shuffle_v4f32_0z24: 1125 ; SSE3: # %bb.0: 1126 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 1127 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 1128 ; SSE3-NEXT: xorps %xmm1, %xmm1 1129 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 1130 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 1131 ; SSE3-NEXT: movaps %xmm1, %xmm0 1132 ; SSE3-NEXT: retq 1133 ; 1134 ; SSSE3-LABEL: shuffle_v4f32_0z24: 1135 ; SSSE3: # %bb.0: 1136 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 1137 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 1138 ; SSSE3-NEXT: xorps %xmm1, %xmm1 1139 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] 1140 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 1141 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1142 ; SSSE3-NEXT: retq 1143 ; 1144 ; SSE41-LABEL: shuffle_v4f32_0z24: 1145 ; SSE41: # %bb.0: 1146 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0] 1147 ; SSE41-NEXT: retq 1148 ; 1149 ; AVX-LABEL: shuffle_v4f32_0z24: 1150 ; AVX: # %bb.0: 1151 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0] 1152 ; AVX-NEXT: retq 1153 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 undef, i32 2, i32 4> 1154 %shuffle1 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuffle, <4 x i32> <i32 4, i32 1, i32 6, i32 7> 1155 ret <4 x float> %shuffle1 1156 } 1157 1158 define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) { 1159 ; SSE2-LABEL: shuffle_v4i32_4zzz: 1160 ; SSE2: # %bb.0: 1161 ; SSE2-NEXT: xorps %xmm1, %xmm1 1162 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1163 ; SSE2-NEXT: movaps %xmm1, %xmm0 1164 ; SSE2-NEXT: retq 1165 ; 1166 ; SSE3-LABEL: shuffle_v4i32_4zzz: 1167 ; SSE3: # %bb.0: 1168 ; SSE3-NEXT: xorps %xmm1, %xmm1 1169 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1170 ; SSE3-NEXT: movaps %xmm1, %xmm0 1171 ; SSE3-NEXT: retq 1172 ; 1173 ; SSSE3-LABEL: shuffle_v4i32_4zzz: 1174 ; SSSE3: # %bb.0: 1175 ; SSSE3-NEXT: xorps %xmm1, %xmm1 1176 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1177 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1178 ; SSSE3-NEXT: retq 1179 ; 1180 ; SSE41-LABEL: shuffle_v4i32_4zzz: 1181 ; SSE41: # %bb.0: 1182 ; SSE41-NEXT: xorps %xmm1, %xmm1 1183 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1184 ; SSE41-NEXT: retq 1185 ; 1186 ; AVX-LABEL: shuffle_v4i32_4zzz: 1187 ; AVX: # %bb.0: 1188 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1189 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1190 ; AVX-NEXT: retq 1191 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 1192 ret <4 x i32> %shuffle 1193 } 1194 1195 define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) { 1196 ; SSE2-LABEL: shuffle_v4i32_z4zz: 1197 ; SSE2: # %bb.0: 1198 ; SSE2-NEXT: xorps %xmm1, %xmm1 1199 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1200 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] 1201 ; SSE2-NEXT: retq 1202 ; 1203 ; SSE3-LABEL: shuffle_v4i32_z4zz: 1204 ; SSE3: # %bb.0: 1205 ; SSE3-NEXT: xorps %xmm1, %xmm1 1206 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1207 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] 1208 ; SSE3-NEXT: retq 1209 ; 1210 ; SSSE3-LABEL: shuffle_v4i32_z4zz: 1211 ; SSSE3: # %bb.0: 1212 ; SSSE3-NEXT: xorps %xmm1, %xmm1 1213 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1214 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] 1215 ; SSSE3-NEXT: retq 1216 ; 1217 ; SSE41-LABEL: shuffle_v4i32_z4zz: 1218 ; SSE41: # %bb.0: 1219 ; SSE41-NEXT: pxor %xmm1, %xmm1 1220 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1221 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] 1222 ; SSE41-NEXT: retq 1223 ; 1224 ; AVX1-LABEL: shuffle_v4i32_z4zz: 1225 ; AVX1: # %bb.0: 1226 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1227 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1228 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,1,1] 1229 ; AVX1-NEXT: retq 1230 ; 1231 ; AVX2-SLOW-LABEL: shuffle_v4i32_z4zz: 1232 ; AVX2-SLOW: # %bb.0: 1233 ; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 1234 ; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1235 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,1,1] 1236 ; AVX2-SLOW-NEXT: retq 1237 ; 1238 ; AVX2-FAST-LABEL: shuffle_v4i32_z4zz: 1239 ; AVX2-FAST: # %bb.0: 1240 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero 1241 ; AVX2-FAST-NEXT: retq 1242 ; 1243 ; AVX512VL-LABEL: shuffle_v4i32_z4zz: 1244 ; AVX512VL: # %bb.0: 1245 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero 1246 ; AVX512VL-NEXT: retq 1247 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0> 1248 ret <4 x i32> %shuffle 1249 } 1250 1251 define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) { 1252 ; SSE2-LABEL: shuffle_v4i32_zz4z: 1253 ; SSE2: # %bb.0: 1254 ; SSE2-NEXT: xorps %xmm1, %xmm1 1255 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1256 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] 1257 ; SSE2-NEXT: retq 1258 ; 1259 ; SSE3-LABEL: shuffle_v4i32_zz4z: 1260 ; SSE3: # %bb.0: 1261 ; SSE3-NEXT: xorps %xmm1, %xmm1 1262 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1263 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] 1264 ; SSE3-NEXT: retq 1265 ; 1266 ; SSSE3-LABEL: shuffle_v4i32_zz4z: 1267 ; SSSE3: # %bb.0: 1268 ; SSSE3-NEXT: xorps %xmm1, %xmm1 1269 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1270 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] 1271 ; SSSE3-NEXT: retq 1272 ; 1273 ; SSE41-LABEL: shuffle_v4i32_zz4z: 1274 ; SSE41: # %bb.0: 1275 ; SSE41-NEXT: pxor %xmm1, %xmm1 1276 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7] 1277 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] 1278 ; SSE41-NEXT: retq 1279 ; 1280 ; AVX1-LABEL: shuffle_v4i32_zz4z: 1281 ; AVX1: # %bb.0: 1282 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1283 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1284 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,1] 1285 ; AVX1-NEXT: retq 1286 ; 1287 ; AVX2-SLOW-LABEL: shuffle_v4i32_zz4z: 1288 ; AVX2-SLOW: # %bb.0: 1289 ; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 1290 ; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 1291 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,1] 1292 ; AVX2-SLOW-NEXT: retq 1293 ; 1294 ; AVX2-FAST-LABEL: shuffle_v4i32_zz4z: 1295 ; AVX2-FAST: # %bb.0: 1296 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero 1297 ; AVX2-FAST-NEXT: retq 1298 ; 1299 ; AVX512VL-LABEL: shuffle_v4i32_zz4z: 1300 ; AVX512VL: # %bb.0: 1301 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero 1302 ; AVX512VL-NEXT: retq 1303 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0> 1304 ret <4 x i32> %shuffle 1305 } 1306 1307 define <4 x i32> @shuffle_v4i32_zuu4(<4 x i32> %a) { 1308 ; SSE-LABEL: shuffle_v4i32_zuu4: 1309 ; SSE: # %bb.0: 1310 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] 1311 ; SSE-NEXT: retq 1312 ; 1313 ; AVX-LABEL: shuffle_v4i32_zuu4: 1314 ; AVX: # %bb.0: 1315 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] 1316 ; AVX-NEXT: retq 1317 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4> 1318 ret <4 x i32> %shuffle 1319 } 1320 1321 define <4 x i32> @shuffle_v4i32_z6zz(<4 x i32> %a) { 1322 ; SSE2-LABEL: shuffle_v4i32_z6zz: 1323 ; SSE2: # %bb.0: 1324 ; SSE2-NEXT: xorps %xmm1, %xmm1 1325 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 1326 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1327 ; SSE2-NEXT: retq 1328 ; 1329 ; SSE3-LABEL: shuffle_v4i32_z6zz: 1330 ; SSE3: # %bb.0: 1331 ; SSE3-NEXT: xorps %xmm1, %xmm1 1332 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 1333 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1334 ; SSE3-NEXT: retq 1335 ; 1336 ; SSSE3-LABEL: shuffle_v4i32_z6zz: 1337 ; SSSE3: # %bb.0: 1338 ; SSSE3-NEXT: xorps %xmm1, %xmm1 1339 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 1340 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 1341 ; SSSE3-NEXT: retq 1342 ; 1343 ; SSE41-LABEL: shuffle_v4i32_z6zz: 1344 ; SSE41: # %bb.0: 1345 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3] 1346 ; SSE41-NEXT: pxor %xmm0, %xmm0 1347 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 1348 ; SSE41-NEXT: retq 1349 ; 1350 ; AVX1-LABEL: shuffle_v4i32_z6zz: 1351 ; AVX1: # %bb.0: 1352 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 1353 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1354 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1355 ; AVX1-NEXT: retq 1356 ; 1357 ; AVX2-SLOW-LABEL: shuffle_v4i32_z6zz: 1358 ; AVX2-SLOW: # %bb.0: 1359 ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 1360 ; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 1361 ; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 1362 ; AVX2-SLOW-NEXT: retq 1363 ; 1364 ; AVX2-FAST-LABEL: shuffle_v4i32_z6zz: 1365 ; AVX2-FAST: # %bb.0: 1366 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[8,9,10,11],zero,zero,zero,zero,zero,zero,zero,zero 1367 ; AVX2-FAST-NEXT: retq 1368 ; 1369 ; AVX512VL-LABEL: shuffle_v4i32_z6zz: 1370 ; AVX512VL: # %bb.0: 1371 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[8,9,10,11],zero,zero,zero,zero,zero,zero,zero,zero 1372 ; AVX512VL-NEXT: retq 1373 %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3> 1374 ret <4 x i32> %shuffle 1375 } 1376 1377 define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) { 1378 ; SSE2-LABEL: shuffle_v4i32_7012: 1379 ; SSE2: # %bb.0: 1380 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0] 1381 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] 1382 ; SSE2-NEXT: movaps %xmm1, %xmm0 1383 ; SSE2-NEXT: retq 1384 ; 1385 ; SSE3-LABEL: shuffle_v4i32_7012: 1386 ; SSE3: # %bb.0: 1387 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0] 1388 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2] 1389 ; SSE3-NEXT: movaps %xmm1, %xmm0 1390 ; SSE3-NEXT: retq 1391 ; 1392 ; SSSE3-LABEL: shuffle_v4i32_7012: 1393 ; SSSE3: # %bb.0: 1394 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] 1395 ; SSSE3-NEXT: retq 1396 ; 1397 ; SSE41-LABEL: shuffle_v4i32_7012: 1398 ; SSE41: # %bb.0: 1399 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] 1400 ; SSE41-NEXT: retq 1401 ; 1402 ; AVX-LABEL: shuffle_v4i32_7012: 1403 ; AVX: # %bb.0: 1404 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] 1405 ; AVX-NEXT: retq 1406 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2> 1407 ret <4 x i32> %shuffle 1408 } 1409 1410 define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) { 1411 ; SSE2-LABEL: shuffle_v4i32_6701: 1412 ; SSE2: # %bb.0: 1413 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 1414 ; SSE2-NEXT: movapd %xmm1, %xmm0 1415 ; SSE2-NEXT: retq 1416 ; 1417 ; SSE3-LABEL: shuffle_v4i32_6701: 1418 ; SSE3: # %bb.0: 1419 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 1420 ; SSE3-NEXT: movapd %xmm1, %xmm0 1421 ; SSE3-NEXT: retq 1422 ; 1423 ; SSSE3-LABEL: shuffle_v4i32_6701: 1424 ; SSSE3: # %bb.0: 1425 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 1426 ; SSSE3-NEXT: retq 1427 ; 1428 ; SSE41-LABEL: shuffle_v4i32_6701: 1429 ; SSE41: # %bb.0: 1430 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 1431 ; SSE41-NEXT: retq 1432 ; 1433 ; AVX-LABEL: shuffle_v4i32_6701: 1434 ; AVX: # %bb.0: 1435 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 1436 ; AVX-NEXT: retq 1437 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 1438 ret <4 x i32> %shuffle 1439 } 1440 1441 define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) { 1442 ; SSE2-LABEL: shuffle_v4i32_5670: 1443 ; SSE2: # %bb.0: 1444 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 1445 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0] 1446 ; SSE2-NEXT: movaps %xmm1, %xmm0 1447 ; SSE2-NEXT: retq 1448 ; 1449 ; SSE3-LABEL: shuffle_v4i32_5670: 1450 ; SSE3: # %bb.0: 1451 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 1452 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0] 1453 ; SSE3-NEXT: movaps %xmm1, %xmm0 1454 ; SSE3-NEXT: retq 1455 ; 1456 ; SSSE3-LABEL: shuffle_v4i32_5670: 1457 ; SSSE3: # %bb.0: 1458 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] 1459 ; SSSE3-NEXT: retq 1460 ; 1461 ; SSE41-LABEL: shuffle_v4i32_5670: 1462 ; SSE41: # %bb.0: 1463 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] 1464 ; SSE41-NEXT: retq 1465 ; 1466 ; AVX-LABEL: shuffle_v4i32_5670: 1467 ; AVX: # %bb.0: 1468 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] 1469 ; AVX-NEXT: retq 1470 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 6, i32 7, i32 0> 1471 ret <4 x i32> %shuffle 1472 } 1473 1474 define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) { 1475 ; SSE2-LABEL: shuffle_v4i32_1234: 1476 ; SSE2: # %bb.0: 1477 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 1478 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0] 1479 ; SSE2-NEXT: retq 1480 ; 1481 ; SSE3-LABEL: shuffle_v4i32_1234: 1482 ; SSE3: # %bb.0: 1483 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 1484 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0] 1485 ; SSE3-NEXT: retq 1486 ; 1487 ; SSSE3-LABEL: shuffle_v4i32_1234: 1488 ; SSSE3: # %bb.0: 1489 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 1490 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1491 ; SSSE3-NEXT: retq 1492 ; 1493 ; SSE41-LABEL: shuffle_v4i32_1234: 1494 ; SSE41: # %bb.0: 1495 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 1496 ; SSE41-NEXT: movdqa %xmm1, %xmm0 1497 ; SSE41-NEXT: retq 1498 ; 1499 ; AVX-LABEL: shuffle_v4i32_1234: 1500 ; AVX: # %bb.0: 1501 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 1502 ; AVX-NEXT: retq 1503 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 1504 ret <4 x i32> %shuffle 1505 } 1506 1507 define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) { 1508 ; SSE2-LABEL: shuffle_v4i32_2345: 1509 ; SSE2: # %bb.0: 1510 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 1511 ; SSE2-NEXT: retq 1512 ; 1513 ; SSE3-LABEL: shuffle_v4i32_2345: 1514 ; SSE3: # %bb.0: 1515 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 1516 ; SSE3-NEXT: retq 1517 ; 1518 ; SSSE3-LABEL: shuffle_v4i32_2345: 1519 ; SSSE3: # %bb.0: 1520 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 1521 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1522 ; SSSE3-NEXT: retq 1523 ; 1524 ; SSE41-LABEL: shuffle_v4i32_2345: 1525 ; SSE41: # %bb.0: 1526 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 1527 ; SSE41-NEXT: movdqa %xmm1, %xmm0 1528 ; SSE41-NEXT: retq 1529 ; 1530 ; AVX-LABEL: shuffle_v4i32_2345: 1531 ; AVX: # %bb.0: 1532 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 1533 ; AVX-NEXT: retq 1534 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 1535 ret <4 x i32> %shuffle 1536 } 1537 1538 ; PR22391 1539 define <4 x i32> @shuffle_v4i32_2456(<4 x i32> %a, <4 x i32> %b) { 1540 ; SSE2-LABEL: shuffle_v4i32_2456: 1541 ; SSE2: # %bb.0: 1542 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 1543 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2] 1544 ; SSE2-NEXT: retq 1545 ; 1546 ; SSE3-LABEL: shuffle_v4i32_2456: 1547 ; SSE3: # %bb.0: 1548 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] 1549 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2] 1550 ; SSE3-NEXT: retq 1551 ; 1552 ; SSSE3-LABEL: shuffle_v4i32_2456: 1553 ; SSSE3: # %bb.0: 1554 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2] 1555 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] 1556 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1557 ; SSSE3-NEXT: retq 1558 ; 1559 ; SSE41-LABEL: shuffle_v4i32_2456: 1560 ; SSE41: # %bb.0: 1561 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2] 1562 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] 1563 ; SSE41-NEXT: movdqa %xmm1, %xmm0 1564 ; SSE41-NEXT: retq 1565 ; 1566 ; AVX-LABEL: shuffle_v4i32_2456: 1567 ; AVX: # %bb.0: 1568 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2] 1569 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] 1570 ; AVX-NEXT: retq 1571 %s1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2> 1572 %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6> 1573 ret <4 x i32> %s2 1574 } 1575 1576 define <4 x i32> @shuffle_v4i32_40u1(<4 x i32> %a, <4 x i32> %b) { 1577 ; SSE-LABEL: shuffle_v4i32_40u1: 1578 ; SSE: # %bb.0: 1579 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1580 ; SSE-NEXT: movaps %xmm1, %xmm0 1581 ; SSE-NEXT: retq 1582 ; 1583 ; AVX-LABEL: shuffle_v4i32_40u1: 1584 ; AVX: # %bb.0: 1585 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1586 ; AVX-NEXT: retq 1587 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 1> 1588 ret <4 x i32> %shuffle 1589 } 1590 1591 define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) { 1592 ; SSE2-LABEL: shuffle_v4i32_3456: 1593 ; SSE2: # %bb.0: 1594 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0] 1595 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2] 1596 ; SSE2-NEXT: retq 1597 ; 1598 ; SSE3-LABEL: shuffle_v4i32_3456: 1599 ; SSE3: # %bb.0: 1600 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0] 1601 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2] 1602 ; SSE3-NEXT: retq 1603 ; 1604 ; SSSE3-LABEL: shuffle_v4i32_3456: 1605 ; SSSE3: # %bb.0: 1606 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] 1607 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 1608 ; SSSE3-NEXT: retq 1609 ; 1610 ; SSE41-LABEL: shuffle_v4i32_3456: 1611 ; SSE41: # %bb.0: 1612 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] 1613 ; SSE41-NEXT: movdqa %xmm1, %xmm0 1614 ; SSE41-NEXT: retq 1615 ; 1616 ; AVX-LABEL: shuffle_v4i32_3456: 1617 ; AVX: # %bb.0: 1618 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] 1619 ; AVX-NEXT: retq 1620 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6> 1621 ret <4 x i32> %shuffle 1622 } 1623 1624 define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) { 1625 ; SSE2-LABEL: shuffle_v4i32_0u1u: 1626 ; SSE2: # %bb.0: 1627 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 1628 ; SSE2-NEXT: retq 1629 ; 1630 ; SSE3-LABEL: shuffle_v4i32_0u1u: 1631 ; SSE3: # %bb.0: 1632 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 1633 ; SSE3-NEXT: retq 1634 ; 1635 ; SSSE3-LABEL: shuffle_v4i32_0u1u: 1636 ; SSSE3: # %bb.0: 1637 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 1638 ; SSSE3-NEXT: retq 1639 ; 1640 ; SSE41-LABEL: shuffle_v4i32_0u1u: 1641 ; SSE41: # %bb.0: 1642 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1643 ; SSE41-NEXT: retq 1644 ; 1645 ; AVX-LABEL: shuffle_v4i32_0u1u: 1646 ; AVX: # %bb.0: 1647 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1648 ; AVX-NEXT: retq 1649 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef> 1650 ret <4 x i32> %shuffle 1651 } 1652 1653 define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) { 1654 ; SSE2-LABEL: shuffle_v4i32_0z1z: 1655 ; SSE2: # %bb.0: 1656 ; SSE2-NEXT: xorps %xmm1, %xmm1 1657 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1658 ; SSE2-NEXT: retq 1659 ; 1660 ; SSE3-LABEL: shuffle_v4i32_0z1z: 1661 ; SSE3: # %bb.0: 1662 ; SSE3-NEXT: xorps %xmm1, %xmm1 1663 ; SSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1664 ; SSE3-NEXT: retq 1665 ; 1666 ; SSSE3-LABEL: shuffle_v4i32_0z1z: 1667 ; SSSE3: # %bb.0: 1668 ; SSSE3-NEXT: xorps %xmm1, %xmm1 1669 ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1670 ; SSSE3-NEXT: retq 1671 ; 1672 ; SSE41-LABEL: shuffle_v4i32_0z1z: 1673 ; SSE41: # %bb.0: 1674 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1675 ; SSE41-NEXT: retq 1676 ; 1677 ; AVX-LABEL: shuffle_v4i32_0z1z: 1678 ; AVX: # %bb.0: 1679 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1680 ; AVX-NEXT: retq 1681 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 1682 ret <4 x i32> %shuffle 1683 } 1684 1685 define <4 x i32> @shuffle_v4i32_01zu(<4 x i32> %a) { 1686 ; SSE-LABEL: shuffle_v4i32_01zu: 1687 ; SSE: # %bb.0: 1688 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1689 ; SSE-NEXT: retq 1690 ; 1691 ; AVX-LABEL: shuffle_v4i32_01zu: 1692 ; AVX: # %bb.0: 1693 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1694 ; AVX-NEXT: retq 1695 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 7, i32 undef> 1696 ret <4 x i32> %shuffle 1697 } 1698 1699 define <4 x i32> @shuffle_v4i32_0z23(<4 x i32> %a) { 1700 ; SSE2-LABEL: shuffle_v4i32_0z23: 1701 ; SSE2: # %bb.0: 1702 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1703 ; SSE2-NEXT: retq 1704 ; 1705 ; SSE3-LABEL: shuffle_v4i32_0z23: 1706 ; SSE3: # %bb.0: 1707 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 1708 ; SSE3-NEXT: retq 1709 ; 1710 ; SSSE3-LABEL: shuffle_v4i32_0z23: 1711 ; SSSE3: # %bb.0: 1712 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 1713 ; SSSE3-NEXT: retq 1714 ; 1715 ; SSE41-LABEL: shuffle_v4i32_0z23: 1716 ; SSE41: # %bb.0: 1717 ; SSE41-NEXT: xorps %xmm1, %xmm1 1718 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 1719 ; SSE41-NEXT: retq 1720 ; 1721 ; AVX-LABEL: shuffle_v4i32_0z23: 1722 ; AVX: # %bb.0: 1723 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1724 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 1725 ; AVX-NEXT: retq 1726 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 1727 ret <4 x i32> %shuffle 1728 } 1729 1730 define <4 x i32> @shuffle_v4i32_01z3(<4 x i32> %a) { 1731 ; SSE2-LABEL: shuffle_v4i32_01z3: 1732 ; SSE2: # %bb.0: 1733 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1734 ; SSE2-NEXT: retq 1735 ; 1736 ; SSE3-LABEL: shuffle_v4i32_01z3: 1737 ; SSE3: # %bb.0: 1738 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 1739 ; SSE3-NEXT: retq 1740 ; 1741 ; SSSE3-LABEL: shuffle_v4i32_01z3: 1742 ; SSSE3: # %bb.0: 1743 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 1744 ; SSSE3-NEXT: retq 1745 ; 1746 ; SSE41-LABEL: shuffle_v4i32_01z3: 1747 ; SSE41: # %bb.0: 1748 ; SSE41-NEXT: xorps %xmm1, %xmm1 1749 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 1750 ; SSE41-NEXT: retq 1751 ; 1752 ; AVX-LABEL: shuffle_v4i32_01z3: 1753 ; AVX: # %bb.0: 1754 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1755 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 1756 ; AVX-NEXT: retq 1757 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3> 1758 ret <4 x i32> %shuffle 1759 } 1760 1761 define <4 x i32> @shuffle_v4i32_012z(<4 x i32> %a) { 1762 ; SSE2-LABEL: shuffle_v4i32_012z: 1763 ; SSE2: # %bb.0: 1764 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1765 ; SSE2-NEXT: retq 1766 ; 1767 ; SSE3-LABEL: shuffle_v4i32_012z: 1768 ; SSE3: # %bb.0: 1769 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 1770 ; SSE3-NEXT: retq 1771 ; 1772 ; SSSE3-LABEL: shuffle_v4i32_012z: 1773 ; SSSE3: # %bb.0: 1774 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 1775 ; SSSE3-NEXT: retq 1776 ; 1777 ; SSE41-LABEL: shuffle_v4i32_012z: 1778 ; SSE41: # %bb.0: 1779 ; SSE41-NEXT: xorps %xmm1, %xmm1 1780 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 1781 ; SSE41-NEXT: retq 1782 ; 1783 ; AVX-LABEL: shuffle_v4i32_012z: 1784 ; AVX: # %bb.0: 1785 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1786 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 1787 ; AVX-NEXT: retq 1788 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 1789 ret <4 x i32> %shuffle 1790 } 1791 1792 define <4 x i32> @shuffle_v4i32_0zz3(<4 x i32> %a) { 1793 ; SSE2-LABEL: shuffle_v4i32_0zz3: 1794 ; SSE2: # %bb.0: 1795 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1796 ; SSE2-NEXT: retq 1797 ; 1798 ; SSE3-LABEL: shuffle_v4i32_0zz3: 1799 ; SSE3: # %bb.0: 1800 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 1801 ; SSE3-NEXT: retq 1802 ; 1803 ; SSSE3-LABEL: shuffle_v4i32_0zz3: 1804 ; SSSE3: # %bb.0: 1805 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 1806 ; SSSE3-NEXT: retq 1807 ; 1808 ; SSE41-LABEL: shuffle_v4i32_0zz3: 1809 ; SSE41: # %bb.0: 1810 ; SSE41-NEXT: xorps %xmm1, %xmm1 1811 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 1812 ; SSE41-NEXT: retq 1813 ; 1814 ; AVX-LABEL: shuffle_v4i32_0zz3: 1815 ; AVX: # %bb.0: 1816 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1817 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 1818 ; AVX-NEXT: retq 1819 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3> 1820 ret <4 x i32> %shuffle 1821 } 1822 1823 define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) { 1824 ; SSE-LABEL: shuffle_v4i32_bitcast_0415: 1825 ; SSE: # %bb.0: 1826 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1827 ; SSE-NEXT: retq 1828 ; 1829 ; AVX-LABEL: shuffle_v4i32_bitcast_0415: 1830 ; AVX: # %bb.0: 1831 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1832 ; AVX-NEXT: retq 1833 %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 4> 1834 %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double> 1835 %shuffle64 = shufflevector <2 x double> %bitcast64, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1836 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x i32> 1837 ret <4 x i32> %bitcast32 1838 } 1839 1840 define <4 x float> @shuffle_v4f32_bitcast_4401(<4 x float> %a, <4 x i32> %b) { 1841 ; SSE-LABEL: shuffle_v4f32_bitcast_4401: 1842 ; SSE: # %bb.0: 1843 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,1] 1844 ; SSE-NEXT: movaps %xmm1, %xmm0 1845 ; SSE-NEXT: retq 1846 ; 1847 ; AVX1OR2-LABEL: shuffle_v4f32_bitcast_4401: 1848 ; AVX1OR2: # %bb.0: 1849 ; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,0],xmm0[0,1] 1850 ; AVX1OR2-NEXT: retq 1851 ; 1852 ; AVX512VL-LABEL: shuffle_v4f32_bitcast_4401: 1853 ; AVX512VL: # %bb.0: 1854 ; AVX512VL-NEXT: vbroadcastss %xmm1, %xmm1 1855 ; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1856 ; AVX512VL-NEXT: retq 1857 %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 1858 %2 = bitcast <4 x i32> %1 to <2 x double> 1859 %3 = bitcast <4 x float> %a to <2 x double> 1860 %4 = shufflevector <2 x double> %2, <2 x double> %3, <2 x i32> <i32 0, i32 2> 1861 %5 = bitcast <2 x double> %4 to <4 x float> 1862 ret <4 x float> %5 1863 } 1864 1865 define <4 x float> @shuffle_v4f32_bitcast_0045(<4 x float> %a, <4 x i32> %b) { 1866 ; SSE-LABEL: shuffle_v4f32_bitcast_0045: 1867 ; SSE: # %bb.0: 1868 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1] 1869 ; SSE-NEXT: retq 1870 ; 1871 ; AVX-LABEL: shuffle_v4f32_bitcast_0045: 1872 ; AVX: # %bb.0: 1873 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1] 1874 ; AVX-NEXT: retq 1875 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 1876 %2 = bitcast <4 x i32> %b to <4 x float> 1877 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 1, i32 0, i32 4, i32 5> 1878 ret <4 x float> %3 1879 } 1880 1881 define <4 x float> @mask_v4f32_4127(<4 x float> %a, <4 x float> %b) { 1882 ; SSE2-LABEL: mask_v4f32_4127: 1883 ; SSE2: # %bb.0: 1884 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2] 1885 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1] 1886 ; SSE2-NEXT: movaps %xmm1, %xmm0 1887 ; SSE2-NEXT: retq 1888 ; 1889 ; SSE3-LABEL: mask_v4f32_4127: 1890 ; SSE3: # %bb.0: 1891 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2] 1892 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1] 1893 ; SSE3-NEXT: movaps %xmm1, %xmm0 1894 ; SSE3-NEXT: retq 1895 ; 1896 ; SSSE3-LABEL: mask_v4f32_4127: 1897 ; SSSE3: # %bb.0: 1898 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm0[1,2] 1899 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1] 1900 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1901 ; SSSE3-NEXT: retq 1902 ; 1903 ; SSE41-LABEL: mask_v4f32_4127: 1904 ; SSE41: # %bb.0: 1905 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 1906 ; SSE41-NEXT: retq 1907 ; 1908 ; AVX-LABEL: mask_v4f32_4127: 1909 ; AVX: # %bb.0: 1910 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 1911 ; AVX-NEXT: retq 1912 %1 = bitcast <4 x float> %a to <4 x i32> 1913 %2 = bitcast <4 x float> %b to <4 x i32> 1914 %3 = and <4 x i32> %1, <i32 0, i32 -1, i32 -1, i32 0> 1915 %4 = and <4 x i32> %2, <i32 -1, i32 0, i32 0, i32 -1> 1916 %5 = or <4 x i32> %4, %3 1917 %6 = bitcast <4 x i32> %5 to <4 x float> 1918 ret <4 x float> %6 1919 } 1920 1921 define <4 x float> @mask_v4f32_0127(<4 x float> %a, <4 x float> %b) { 1922 ; SSE2-LABEL: mask_v4f32_0127: 1923 ; SSE2: # %bb.0: 1924 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 1925 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1926 ; SSE2-NEXT: movaps %xmm1, %xmm0 1927 ; SSE2-NEXT: retq 1928 ; 1929 ; SSE3-LABEL: mask_v4f32_0127: 1930 ; SSE3: # %bb.0: 1931 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 1932 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1933 ; SSE3-NEXT: movaps %xmm1, %xmm0 1934 ; SSE3-NEXT: retq 1935 ; 1936 ; SSSE3-LABEL: mask_v4f32_0127: 1937 ; SSSE3: # %bb.0: 1938 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 1939 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1940 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1941 ; SSSE3-NEXT: retq 1942 ; 1943 ; SSE41-LABEL: mask_v4f32_0127: 1944 ; SSE41: # %bb.0: 1945 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 1946 ; SSE41-NEXT: retq 1947 ; 1948 ; AVX-LABEL: mask_v4f32_0127: 1949 ; AVX: # %bb.0: 1950 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 1951 ; AVX-NEXT: retq 1952 %1 = bitcast <4 x float> %a to <2 x i64> 1953 %2 = bitcast <4 x float> %b to <2 x i64> 1954 %3 = and <2 x i64> %1, <i64 0, i64 -4294967296> 1955 %4 = and <2 x i64> %2, <i64 -1, i64 4294967295> 1956 %5 = or <2 x i64> %4, %3 1957 %6 = bitcast <2 x i64> %5 to <4 x float> 1958 ret <4 x float> %6 1959 } 1960 1961 define <4 x i32> @mask_v4i32_0127(<4 x i32> %a, <4 x i32> %b) { 1962 ; SSE2-LABEL: mask_v4i32_0127: 1963 ; SSE2: # %bb.0: 1964 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 1965 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1966 ; SSE2-NEXT: movaps %xmm1, %xmm0 1967 ; SSE2-NEXT: retq 1968 ; 1969 ; SSE3-LABEL: mask_v4i32_0127: 1970 ; SSE3: # %bb.0: 1971 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 1972 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1973 ; SSE3-NEXT: movaps %xmm1, %xmm0 1974 ; SSE3-NEXT: retq 1975 ; 1976 ; SSSE3-LABEL: mask_v4i32_0127: 1977 ; SSSE3: # %bb.0: 1978 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] 1979 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 1980 ; SSSE3-NEXT: movaps %xmm1, %xmm0 1981 ; SSSE3-NEXT: retq 1982 ; 1983 ; SSE41-LABEL: mask_v4i32_0127: 1984 ; SSE41: # %bb.0: 1985 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 1986 ; SSE41-NEXT: retq 1987 ; 1988 ; AVX-LABEL: mask_v4i32_0127: 1989 ; AVX: # %bb.0: 1990 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 1991 ; AVX-NEXT: retq 1992 %1 = bitcast <4 x i32> %a to <2 x i64> 1993 %2 = bitcast <4 x i32> %b to <2 x i64> 1994 %3 = and <2 x i64> %1, <i64 0, i64 -4294967296> 1995 %4 = and <2 x i64> %2, <i64 -1, i64 4294967295> 1996 %5 = or <2 x i64> %4, %3 1997 %6 = bitcast <2 x i64> %5 to <4 x i32> 1998 ret <4 x i32> %6 1999 } 2000 2001 define <4 x float> @broadcast_v4f32_0101_from_v2f32(<2 x float>* %x) { 2002 ; SSE2-LABEL: broadcast_v4f32_0101_from_v2f32: 2003 ; SSE2: # %bb.0: 2004 ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2005 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2006 ; SSE2-NEXT: retq 2007 ; 2008 ; SSE3-LABEL: broadcast_v4f32_0101_from_v2f32: 2009 ; SSE3: # %bb.0: 2010 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 2011 ; SSE3-NEXT: retq 2012 ; 2013 ; SSSE3-LABEL: broadcast_v4f32_0101_from_v2f32: 2014 ; SSSE3: # %bb.0: 2015 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 2016 ; SSSE3-NEXT: retq 2017 ; 2018 ; SSE41-LABEL: broadcast_v4f32_0101_from_v2f32: 2019 ; SSE41: # %bb.0: 2020 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 2021 ; SSE41-NEXT: retq 2022 ; 2023 ; AVX-LABEL: broadcast_v4f32_0101_from_v2f32: 2024 ; AVX: # %bb.0: 2025 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 2026 ; AVX-NEXT: retq 2027 %1 = load <2 x float>, <2 x float>* %x, align 1 2028 %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 2029 ret <4 x float> %2 2030 } 2031 2032 define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) { 2033 ; SSE-LABEL: insert_reg_and_zero_v4i32: 2034 ; SSE: # %bb.0: 2035 ; SSE-NEXT: movd %edi, %xmm0 2036 ; SSE-NEXT: retq 2037 ; 2038 ; AVX-LABEL: insert_reg_and_zero_v4i32: 2039 ; AVX: # %bb.0: 2040 ; AVX-NEXT: vmovd %edi, %xmm0 2041 ; AVX-NEXT: retq 2042 %v = insertelement <4 x i32> undef, i32 %a, i32 0 2043 %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 2044 ret <4 x i32> %shuffle 2045 } 2046 2047 define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) { 2048 ; SSE-LABEL: insert_mem_and_zero_v4i32: 2049 ; SSE: # %bb.0: 2050 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2051 ; SSE-NEXT: retq 2052 ; 2053 ; AVX-LABEL: insert_mem_and_zero_v4i32: 2054 ; AVX: # %bb.0: 2055 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2056 ; AVX-NEXT: retq 2057 %a = load i32, i32* %ptr 2058 %v = insertelement <4 x i32> undef, i32 %a, i32 0 2059 %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 2060 ret <4 x i32> %shuffle 2061 } 2062 2063 define <4 x float> @insert_reg_and_zero_v4f32(float %a) { 2064 ; SSE2-LABEL: insert_reg_and_zero_v4f32: 2065 ; SSE2: # %bb.0: 2066 ; SSE2-NEXT: xorps %xmm1, %xmm1 2067 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 2068 ; SSE2-NEXT: movaps %xmm1, %xmm0 2069 ; SSE2-NEXT: retq 2070 ; 2071 ; SSE3-LABEL: insert_reg_and_zero_v4f32: 2072 ; SSE3: # %bb.0: 2073 ; SSE3-NEXT: xorps %xmm1, %xmm1 2074 ; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 2075 ; SSE3-NEXT: movaps %xmm1, %xmm0 2076 ; SSE3-NEXT: retq 2077 ; 2078 ; SSSE3-LABEL: insert_reg_and_zero_v4f32: 2079 ; SSSE3: # %bb.0: 2080 ; SSSE3-NEXT: xorps %xmm1, %xmm1 2081 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 2082 ; SSSE3-NEXT: movaps %xmm1, %xmm0 2083 ; SSSE3-NEXT: retq 2084 ; 2085 ; SSE41-LABEL: insert_reg_and_zero_v4f32: 2086 ; SSE41: # %bb.0: 2087 ; SSE41-NEXT: xorps %xmm1, %xmm1 2088 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2089 ; SSE41-NEXT: retq 2090 ; 2091 ; AVX-LABEL: insert_reg_and_zero_v4f32: 2092 ; AVX: # %bb.0: 2093 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 2094 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2095 ; AVX-NEXT: retq 2096 %v = insertelement <4 x float> undef, float %a, i32 0 2097 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 2098 ret <4 x float> %shuffle 2099 } 2100 2101 define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) { 2102 ; SSE-LABEL: insert_mem_and_zero_v4f32: 2103 ; SSE: # %bb.0: 2104 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2105 ; SSE-NEXT: retq 2106 ; 2107 ; AVX-LABEL: insert_mem_and_zero_v4f32: 2108 ; AVX: # %bb.0: 2109 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2110 ; AVX-NEXT: retq 2111 %a = load float, float* %ptr 2112 %v = insertelement <4 x float> undef, float %a, i32 0 2113 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 2114 ret <4 x float> %shuffle 2115 } 2116 2117 define <4 x i32> @insert_reg_lo_v4i32(i64 %a, <4 x i32> %b) { 2118 ; SSE2-LABEL: insert_reg_lo_v4i32: 2119 ; SSE2: # %bb.0: 2120 ; SSE2-NEXT: movq %rdi, %xmm1 2121 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2122 ; SSE2-NEXT: retq 2123 ; 2124 ; SSE3-LABEL: insert_reg_lo_v4i32: 2125 ; SSE3: # %bb.0: 2126 ; SSE3-NEXT: movq %rdi, %xmm1 2127 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2128 ; SSE3-NEXT: retq 2129 ; 2130 ; SSSE3-LABEL: insert_reg_lo_v4i32: 2131 ; SSSE3: # %bb.0: 2132 ; SSSE3-NEXT: movq %rdi, %xmm1 2133 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 2134 ; SSSE3-NEXT: retq 2135 ; 2136 ; SSE41-LABEL: insert_reg_lo_v4i32: 2137 ; SSE41: # %bb.0: 2138 ; SSE41-NEXT: movq %rdi, %xmm1 2139 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 2140 ; SSE41-NEXT: retq 2141 ; 2142 ; AVX1-LABEL: insert_reg_lo_v4i32: 2143 ; AVX1: # %bb.0: 2144 ; AVX1-NEXT: vmovq %rdi, %xmm1 2145 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 2146 ; AVX1-NEXT: retq 2147 ; 2148 ; AVX2OR512VL-LABEL: insert_reg_lo_v4i32: 2149 ; AVX2OR512VL: # %bb.0: 2150 ; AVX2OR512VL-NEXT: vmovq %rdi, %xmm1 2151 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 2152 ; AVX2OR512VL-NEXT: retq 2153 %a.cast = bitcast i64 %a to <2 x i32> 2154 %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2155 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 2156 ret <4 x i32> %shuffle 2157 } 2158 2159 define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) { 2160 ; SSE2-LABEL: insert_mem_lo_v4i32: 2161 ; SSE2: # %bb.0: 2162 ; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 2163 ; SSE2-NEXT: retq 2164 ; 2165 ; SSE3-LABEL: insert_mem_lo_v4i32: 2166 ; SSE3: # %bb.0: 2167 ; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 2168 ; SSE3-NEXT: retq 2169 ; 2170 ; SSSE3-LABEL: insert_mem_lo_v4i32: 2171 ; SSSE3: # %bb.0: 2172 ; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 2173 ; SSSE3-NEXT: retq 2174 ; 2175 ; SSE41-LABEL: insert_mem_lo_v4i32: 2176 ; SSE41: # %bb.0: 2177 ; SSE41-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 2178 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 2179 ; SSE41-NEXT: retq 2180 ; 2181 ; AVX-LABEL: insert_mem_lo_v4i32: 2182 ; AVX: # %bb.0: 2183 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 2184 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 2185 ; AVX-NEXT: retq 2186 %a = load <2 x i32>, <2 x i32>* %ptr 2187 %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2188 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 2189 ret <4 x i32> %shuffle 2190 } 2191 2192 define <4 x i32> @insert_reg_hi_v4i32(i64 %a, <4 x i32> %b) { 2193 ; SSE-LABEL: insert_reg_hi_v4i32: 2194 ; SSE: # %bb.0: 2195 ; SSE-NEXT: movq %rdi, %xmm1 2196 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2197 ; SSE-NEXT: retq 2198 ; 2199 ; AVX-LABEL: insert_reg_hi_v4i32: 2200 ; AVX: # %bb.0: 2201 ; AVX-NEXT: vmovq %rdi, %xmm1 2202 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2203 ; AVX-NEXT: retq 2204 %a.cast = bitcast i64 %a to <2 x i32> 2205 %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2206 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 2207 ret <4 x i32> %shuffle 2208 } 2209 2210 define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) { 2211 ; SSE-LABEL: insert_mem_hi_v4i32: 2212 ; SSE: # %bb.0: 2213 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 2214 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2215 ; SSE-NEXT: retq 2216 ; 2217 ; AVX-LABEL: insert_mem_hi_v4i32: 2218 ; AVX: # %bb.0: 2219 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 2220 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2221 ; AVX-NEXT: retq 2222 %a = load <2 x i32>, <2 x i32>* %ptr 2223 %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2224 %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 2225 ret <4 x i32> %shuffle 2226 } 2227 2228 define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) { 2229 ; SSE2-LABEL: insert_reg_lo_v4f32: 2230 ; SSE2: # %bb.0: 2231 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 2232 ; SSE2-NEXT: movapd %xmm1, %xmm0 2233 ; SSE2-NEXT: retq 2234 ; 2235 ; SSE3-LABEL: insert_reg_lo_v4f32: 2236 ; SSE3: # %bb.0: 2237 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 2238 ; SSE3-NEXT: movapd %xmm1, %xmm0 2239 ; SSE3-NEXT: retq 2240 ; 2241 ; SSSE3-LABEL: insert_reg_lo_v4f32: 2242 ; SSSE3: # %bb.0: 2243 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 2244 ; SSSE3-NEXT: movapd %xmm1, %xmm0 2245 ; SSSE3-NEXT: retq 2246 ; 2247 ; SSE41-LABEL: insert_reg_lo_v4f32: 2248 ; SSE41: # %bb.0: 2249 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 2250 ; SSE41-NEXT: retq 2251 ; 2252 ; AVX-LABEL: insert_reg_lo_v4f32: 2253 ; AVX: # %bb.0: 2254 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 2255 ; AVX-NEXT: retq 2256 %a.cast = bitcast double %a to <2 x float> 2257 %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2258 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 2259 ret <4 x float> %shuffle 2260 } 2261 2262 define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) { 2263 ; SSE-LABEL: insert_mem_lo_v4f32: 2264 ; SSE: # %bb.0: 2265 ; SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 2266 ; SSE-NEXT: retq 2267 ; 2268 ; AVX-LABEL: insert_mem_lo_v4f32: 2269 ; AVX: # %bb.0: 2270 ; AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 2271 ; AVX-NEXT: retq 2272 %a = load <2 x float>, <2 x float>* %ptr 2273 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2274 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 2275 ret <4 x float> %shuffle 2276 } 2277 2278 define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) { 2279 ; SSE-LABEL: insert_reg_hi_v4f32: 2280 ; SSE: # %bb.0: 2281 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 2282 ; SSE-NEXT: movaps %xmm1, %xmm0 2283 ; SSE-NEXT: retq 2284 ; 2285 ; AVX-LABEL: insert_reg_hi_v4f32: 2286 ; AVX: # %bb.0: 2287 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2288 ; AVX-NEXT: retq 2289 %a.cast = bitcast double %a to <2 x float> 2290 %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2291 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 2292 ret <4 x float> %shuffle 2293 } 2294 2295 define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) { 2296 ; SSE-LABEL: insert_mem_hi_v4f32: 2297 ; SSE: # %bb.0: 2298 ; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2299 ; SSE-NEXT: retq 2300 ; 2301 ; AVX-LABEL: insert_mem_hi_v4f32: 2302 ; AVX: # %bb.0: 2303 ; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 2304 ; AVX-NEXT: retq 2305 %a = load <2 x float>, <2 x float>* %ptr 2306 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2307 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 2308 ret <4 x float> %shuffle 2309 } 2310 2311 ; PR21137 2312 define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) { 2313 ; SSE-LABEL: shuffle_mem_v4f32_3210: 2314 ; SSE: # %bb.0: 2315 ; SSE-NEXT: movaps (%rdi), %xmm0 2316 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] 2317 ; SSE-NEXT: retq 2318 ; 2319 ; AVX-LABEL: shuffle_mem_v4f32_3210: 2320 ; AVX: # %bb.0: 2321 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0] 2322 ; AVX-NEXT: retq 2323 %a = load <4 x float>, <4 x float>* %ptr 2324 %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 2325 ret <4 x float> %shuffle 2326 } 2327 2328 define <4 x i32> @insert_dup_mem_v4i32(i32* %ptr) { 2329 ; SSE-LABEL: insert_dup_mem_v4i32: 2330 ; SSE: # %bb.0: 2331 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2332 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2333 ; SSE-NEXT: retq 2334 ; 2335 ; AVX-LABEL: insert_dup_mem_v4i32: 2336 ; AVX: # %bb.0: 2337 ; AVX-NEXT: vbroadcastss (%rdi), %xmm0 2338 ; AVX-NEXT: retq 2339 %tmp = load i32, i32* %ptr, align 4 2340 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2341 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer 2342 ret <4 x i32> %tmp2 2343 } 2344 2345 ; 2346 ; Shuffle to logical bit shifts 2347 ; 2348 2349 define <4 x i32> @shuffle_v4i32_z0zX(<4 x i32> %a) { 2350 ; SSE-LABEL: shuffle_v4i32_z0zX: 2351 ; SSE: # %bb.0: 2352 ; SSE-NEXT: psllq $32, %xmm0 2353 ; SSE-NEXT: retq 2354 ; 2355 ; AVX-LABEL: shuffle_v4i32_z0zX: 2356 ; AVX: # %bb.0: 2357 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0 2358 ; AVX-NEXT: retq 2359 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 4, i32 undef> 2360 ret <4 x i32> %shuffle 2361 } 2362 2363 define <4 x i32> @shuffle_v4i32_1z3z(<4 x i32> %a) { 2364 ; SSE-LABEL: shuffle_v4i32_1z3z: 2365 ; SSE: # %bb.0: 2366 ; SSE-NEXT: psrlq $32, %xmm0 2367 ; SSE-NEXT: retq 2368 ; 2369 ; AVX-LABEL: shuffle_v4i32_1z3z: 2370 ; AVX: # %bb.0: 2371 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0 2372 ; AVX-NEXT: retq 2373 %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 4, i32 3, i32 4> 2374 ret <4 x i32> %shuffle 2375 } 2376