1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL 9 10 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) { 11 ; SSE-LABEL: shuffle_v2i64_00: 12 ; SSE: # %bb.0: 13 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 14 ; SSE-NEXT: retq 15 ; 16 ; AVX1-LABEL: shuffle_v2i64_00: 17 ; AVX1: # %bb.0: 18 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] 19 ; AVX1-NEXT: retq 20 ; 21 ; AVX2-LABEL: shuffle_v2i64_00: 22 ; AVX2: # %bb.0: 23 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 24 ; AVX2-NEXT: retq 25 ; 26 ; AVX512VL-LABEL: shuffle_v2i64_00: 27 ; AVX512VL: # %bb.0: 28 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0 29 ; AVX512VL-NEXT: retq 30 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0> 31 ret <2 x i64> %shuffle 32 } 33 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) { 34 ; SSE-LABEL: shuffle_v2i64_10: 35 ; SSE: # %bb.0: 36 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 37 ; SSE-NEXT: retq 38 ; 39 ; AVX-LABEL: shuffle_v2i64_10: 40 ; AVX: # %bb.0: 41 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] 42 ; AVX-NEXT: retq 43 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0> 44 ret <2 x i64> %shuffle 45 } 46 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) { 47 ; SSE-LABEL: shuffle_v2i64_11: 48 ; SSE: # %bb.0: 49 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 50 ; SSE-NEXT: retq 51 ; 52 ; AVX-LABEL: shuffle_v2i64_11: 53 ; AVX: # %bb.0: 54 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3] 55 ; AVX-NEXT: retq 56 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1> 57 ret <2 x i64> %shuffle 58 } 59 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) { 60 ; SSE-LABEL: shuffle_v2i64_22: 61 ; SSE: # %bb.0: 62 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 63 ; SSE-NEXT: retq 64 ; 65 ; AVX1-LABEL: shuffle_v2i64_22: 66 ; AVX1: # %bb.0: 67 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,1,0,1] 68 ; AVX1-NEXT: retq 69 ; 70 ; AVX2-LABEL: shuffle_v2i64_22: 71 ; AVX2: # %bb.0: 72 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0 73 ; AVX2-NEXT: retq 74 ; 75 ; AVX512VL-LABEL: shuffle_v2i64_22: 76 ; AVX512VL: # %bb.0: 77 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0 78 ; AVX512VL-NEXT: retq 79 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2> 80 ret <2 x i64> %shuffle 81 } 82 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) { 83 ; SSE-LABEL: shuffle_v2i64_32: 84 ; SSE: # %bb.0: 85 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 86 ; SSE-NEXT: retq 87 ; 88 ; AVX-LABEL: shuffle_v2i64_32: 89 ; AVX: # %bb.0: 90 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,3,0,1] 91 ; AVX-NEXT: retq 92 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2> 93 ret <2 x i64> %shuffle 94 } 95 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) { 96 ; SSE-LABEL: shuffle_v2i64_33: 97 ; SSE: # %bb.0: 98 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 99 ; SSE-NEXT: retq 100 ; 101 ; AVX-LABEL: shuffle_v2i64_33: 102 ; AVX: # %bb.0: 103 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,3,2,3] 104 ; AVX-NEXT: retq 105 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3> 106 ret <2 x i64> %shuffle 107 } 108 109 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { 110 ; SSE2-LABEL: shuffle_v2f64_00: 111 ; SSE2: # %bb.0: 112 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 113 ; SSE2-NEXT: retq 114 ; 115 ; SSE3-LABEL: shuffle_v2f64_00: 116 ; SSE3: # %bb.0: 117 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 118 ; SSE3-NEXT: retq 119 ; 120 ; SSSE3-LABEL: shuffle_v2f64_00: 121 ; SSSE3: # %bb.0: 122 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 123 ; SSSE3-NEXT: retq 124 ; 125 ; SSE41-LABEL: shuffle_v2f64_00: 126 ; SSE41: # %bb.0: 127 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 128 ; SSE41-NEXT: retq 129 ; 130 ; AVX-LABEL: shuffle_v2f64_00: 131 ; AVX: # %bb.0: 132 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 133 ; AVX-NEXT: retq 134 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0> 135 ret <2 x double> %shuffle 136 } 137 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) { 138 ; SSE-LABEL: shuffle_v2f64_10: 139 ; SSE: # %bb.0: 140 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 141 ; SSE-NEXT: retq 142 ; 143 ; AVX-LABEL: shuffle_v2f64_10: 144 ; AVX: # %bb.0: 145 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 146 ; AVX-NEXT: retq 147 148 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0> 149 ret <2 x double> %shuffle 150 } 151 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) { 152 ; SSE-LABEL: shuffle_v2f64_11: 153 ; SSE: # %bb.0: 154 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 155 ; SSE-NEXT: retq 156 ; 157 ; AVX-LABEL: shuffle_v2f64_11: 158 ; AVX: # %bb.0: 159 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1] 160 ; AVX-NEXT: retq 161 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1> 162 ret <2 x double> %shuffle 163 } 164 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { 165 ; SSE2-LABEL: shuffle_v2f64_22: 166 ; SSE2: # %bb.0: 167 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] 168 ; SSE2-NEXT: movaps %xmm1, %xmm0 169 ; SSE2-NEXT: retq 170 ; 171 ; SSE3-LABEL: shuffle_v2f64_22: 172 ; SSE3: # %bb.0: 173 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 174 ; SSE3-NEXT: retq 175 ; 176 ; SSSE3-LABEL: shuffle_v2f64_22: 177 ; SSSE3: # %bb.0: 178 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 179 ; SSSE3-NEXT: retq 180 ; 181 ; SSE41-LABEL: shuffle_v2f64_22: 182 ; SSE41: # %bb.0: 183 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 184 ; SSE41-NEXT: retq 185 ; 186 ; AVX-LABEL: shuffle_v2f64_22: 187 ; AVX: # %bb.0: 188 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 189 ; AVX-NEXT: retq 190 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2> 191 ret <2 x double> %shuffle 192 } 193 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { 194 ; SSE-LABEL: shuffle_v2f64_32: 195 ; SSE: # %bb.0: 196 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 197 ; SSE-NEXT: movapd %xmm1, %xmm0 198 ; SSE-NEXT: retq 199 ; 200 ; AVX-LABEL: shuffle_v2f64_32: 201 ; AVX: # %bb.0: 202 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 203 ; AVX-NEXT: retq 204 205 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2> 206 ret <2 x double> %shuffle 207 } 208 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { 209 ; SSE-LABEL: shuffle_v2f64_33: 210 ; SSE: # %bb.0: 211 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 212 ; SSE-NEXT: movaps %xmm1, %xmm0 213 ; SSE-NEXT: retq 214 ; 215 ; AVX-LABEL: shuffle_v2f64_33: 216 ; AVX: # %bb.0: 217 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,1] 218 ; AVX-NEXT: retq 219 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3> 220 ret <2 x double> %shuffle 221 } 222 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { 223 ; SSE2-LABEL: shuffle_v2f64_03: 224 ; SSE2: # %bb.0: 225 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 226 ; SSE2-NEXT: movapd %xmm1, %xmm0 227 ; SSE2-NEXT: retq 228 ; 229 ; SSE3-LABEL: shuffle_v2f64_03: 230 ; SSE3: # %bb.0: 231 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 232 ; SSE3-NEXT: movapd %xmm1, %xmm0 233 ; SSE3-NEXT: retq 234 ; 235 ; SSSE3-LABEL: shuffle_v2f64_03: 236 ; SSSE3: # %bb.0: 237 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 238 ; SSSE3-NEXT: movapd %xmm1, %xmm0 239 ; SSSE3-NEXT: retq 240 ; 241 ; SSE41-LABEL: shuffle_v2f64_03: 242 ; SSE41: # %bb.0: 243 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 244 ; SSE41-NEXT: retq 245 ; 246 ; AVX-LABEL: shuffle_v2f64_03: 247 ; AVX: # %bb.0: 248 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 249 ; AVX-NEXT: retq 250 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3> 251 ret <2 x double> %shuffle 252 } 253 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { 254 ; SSE2-LABEL: shuffle_v2f64_21: 255 ; SSE2: # %bb.0: 256 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 257 ; SSE2-NEXT: retq 258 ; 259 ; SSE3-LABEL: shuffle_v2f64_21: 260 ; SSE3: # %bb.0: 261 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 262 ; SSE3-NEXT: retq 263 ; 264 ; SSSE3-LABEL: shuffle_v2f64_21: 265 ; SSSE3: # %bb.0: 266 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 267 ; SSSE3-NEXT: retq 268 ; 269 ; SSE41-LABEL: shuffle_v2f64_21: 270 ; SSE41: # %bb.0: 271 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 272 ; SSE41-NEXT: retq 273 ; 274 ; AVX-LABEL: shuffle_v2f64_21: 275 ; AVX: # %bb.0: 276 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 277 ; AVX-NEXT: retq 278 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1> 279 ret <2 x double> %shuffle 280 } 281 define <2 x double> @shuffle_v2f64_u2(<2 x double> %a, <2 x double> %b) { 282 ; SSE2-LABEL: shuffle_v2f64_u2: 283 ; SSE2: # %bb.0: 284 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 285 ; SSE2-NEXT: retq 286 ; 287 ; SSE3-LABEL: shuffle_v2f64_u2: 288 ; SSE3: # %bb.0: 289 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 290 ; SSE3-NEXT: retq 291 ; 292 ; SSSE3-LABEL: shuffle_v2f64_u2: 293 ; SSSE3: # %bb.0: 294 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 295 ; SSSE3-NEXT: retq 296 ; 297 ; SSE41-LABEL: shuffle_v2f64_u2: 298 ; SSE41: # %bb.0: 299 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 300 ; SSE41-NEXT: retq 301 ; 302 ; AVX-LABEL: shuffle_v2f64_u2: 303 ; AVX: # %bb.0: 304 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 305 ; AVX-NEXT: retq 306 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 undef, i32 2> 307 ret <2 x double> %shuffle 308 } 309 define <2 x double> @shuffle_v2f64_3u(<2 x double> %a, <2 x double> %b) { 310 ; SSE-LABEL: shuffle_v2f64_3u: 311 ; SSE: # %bb.0: 312 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 313 ; SSE-NEXT: movaps %xmm1, %xmm0 314 ; SSE-NEXT: retq 315 ; 316 ; AVX-LABEL: shuffle_v2f64_3u: 317 ; AVX: # %bb.0: 318 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 319 ; AVX-NEXT: retq 320 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 undef> 321 ret <2 x double> %shuffle 322 } 323 324 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { 325 ; SSE-LABEL: shuffle_v2i64_02: 326 ; SSE: # %bb.0: 327 ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 328 ; SSE-NEXT: retq 329 ; 330 ; AVX-LABEL: shuffle_v2i64_02: 331 ; AVX: # %bb.0: 332 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 333 ; AVX-NEXT: retq 334 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 335 ret <2 x i64> %shuffle 336 } 337 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 338 ; SSE-LABEL: shuffle_v2i64_02_copy: 339 ; SSE: # %bb.0: 340 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 341 ; SSE-NEXT: movaps %xmm1, %xmm0 342 ; SSE-NEXT: retq 343 ; 344 ; AVX-LABEL: shuffle_v2i64_02_copy: 345 ; AVX: # %bb.0: 346 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm2[0] 347 ; AVX-NEXT: retq 348 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 349 ret <2 x i64> %shuffle 350 } 351 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { 352 ; SSE2-LABEL: shuffle_v2i64_03: 353 ; SSE2: # %bb.0: 354 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 355 ; SSE2-NEXT: movapd %xmm1, %xmm0 356 ; SSE2-NEXT: retq 357 ; 358 ; SSE3-LABEL: shuffle_v2i64_03: 359 ; SSE3: # %bb.0: 360 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 361 ; SSE3-NEXT: movapd %xmm1, %xmm0 362 ; SSE3-NEXT: retq 363 ; 364 ; SSSE3-LABEL: shuffle_v2i64_03: 365 ; SSSE3: # %bb.0: 366 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 367 ; SSSE3-NEXT: movapd %xmm1, %xmm0 368 ; SSSE3-NEXT: retq 369 ; 370 ; SSE41-LABEL: shuffle_v2i64_03: 371 ; SSE41: # %bb.0: 372 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 373 ; SSE41-NEXT: retq 374 ; 375 ; AVX-LABEL: shuffle_v2i64_03: 376 ; AVX: # %bb.0: 377 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 378 ; AVX-NEXT: retq 379 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 380 ret <2 x i64> %shuffle 381 } 382 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 383 ; SSE2-LABEL: shuffle_v2i64_03_copy: 384 ; SSE2: # %bb.0: 385 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 386 ; SSE2-NEXT: movapd %xmm2, %xmm0 387 ; SSE2-NEXT: retq 388 ; 389 ; SSE3-LABEL: shuffle_v2i64_03_copy: 390 ; SSE3: # %bb.0: 391 ; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 392 ; SSE3-NEXT: movapd %xmm2, %xmm0 393 ; SSE3-NEXT: retq 394 ; 395 ; SSSE3-LABEL: shuffle_v2i64_03_copy: 396 ; SSSE3: # %bb.0: 397 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 398 ; SSSE3-NEXT: movapd %xmm2, %xmm0 399 ; SSSE3-NEXT: retq 400 ; 401 ; SSE41-LABEL: shuffle_v2i64_03_copy: 402 ; SSE41: # %bb.0: 403 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3] 404 ; SSE41-NEXT: movaps %xmm1, %xmm0 405 ; SSE41-NEXT: retq 406 ; 407 ; AVX-LABEL: shuffle_v2i64_03_copy: 408 ; AVX: # %bb.0: 409 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] 410 ; AVX-NEXT: retq 411 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 412 ret <2 x i64> %shuffle 413 } 414 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) { 415 ; SSE2-LABEL: shuffle_v2i64_12: 416 ; SSE2: # %bb.0: 417 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 418 ; SSE2-NEXT: retq 419 ; 420 ; SSE3-LABEL: shuffle_v2i64_12: 421 ; SSE3: # %bb.0: 422 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 423 ; SSE3-NEXT: retq 424 ; 425 ; SSSE3-LABEL: shuffle_v2i64_12: 426 ; SSSE3: # %bb.0: 427 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 428 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 429 ; SSSE3-NEXT: retq 430 ; 431 ; SSE41-LABEL: shuffle_v2i64_12: 432 ; SSE41: # %bb.0: 433 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 434 ; SSE41-NEXT: movdqa %xmm1, %xmm0 435 ; SSE41-NEXT: retq 436 ; 437 ; AVX-LABEL: shuffle_v2i64_12: 438 ; AVX: # %bb.0: 439 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 440 ; AVX-NEXT: retq 441 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 442 ret <2 x i64> %shuffle 443 } 444 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 445 ; SSE2-LABEL: shuffle_v2i64_12_copy: 446 ; SSE2: # %bb.0: 447 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] 448 ; SSE2-NEXT: movapd %xmm1, %xmm0 449 ; SSE2-NEXT: retq 450 ; 451 ; SSE3-LABEL: shuffle_v2i64_12_copy: 452 ; SSE3: # %bb.0: 453 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] 454 ; SSE3-NEXT: movapd %xmm1, %xmm0 455 ; SSE3-NEXT: retq 456 ; 457 ; SSSE3-LABEL: shuffle_v2i64_12_copy: 458 ; SSSE3: # %bb.0: 459 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 460 ; SSSE3-NEXT: movdqa %xmm2, %xmm0 461 ; SSSE3-NEXT: retq 462 ; 463 ; SSE41-LABEL: shuffle_v2i64_12_copy: 464 ; SSE41: # %bb.0: 465 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 466 ; SSE41-NEXT: movdqa %xmm2, %xmm0 467 ; SSE41-NEXT: retq 468 ; 469 ; AVX-LABEL: shuffle_v2i64_12_copy: 470 ; AVX: # %bb.0: 471 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 472 ; AVX-NEXT: retq 473 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 474 ret <2 x i64> %shuffle 475 } 476 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) { 477 ; SSE-LABEL: shuffle_v2i64_13: 478 ; SSE: # %bb.0: 479 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 480 ; SSE-NEXT: retq 481 ; 482 ; AVX-LABEL: shuffle_v2i64_13: 483 ; AVX: # %bb.0: 484 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 485 ; AVX-NEXT: retq 486 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 487 ret <2 x i64> %shuffle 488 } 489 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 490 ; SSE-LABEL: shuffle_v2i64_13_copy: 491 ; SSE: # %bb.0: 492 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] 493 ; SSE-NEXT: movaps %xmm1, %xmm0 494 ; SSE-NEXT: retq 495 ; 496 ; AVX-LABEL: shuffle_v2i64_13_copy: 497 ; AVX: # %bb.0: 498 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm2[1] 499 ; AVX-NEXT: retq 500 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 501 ret <2 x i64> %shuffle 502 } 503 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { 504 ; SSE-LABEL: shuffle_v2i64_20: 505 ; SSE: # %bb.0: 506 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 507 ; SSE-NEXT: movaps %xmm1, %xmm0 508 ; SSE-NEXT: retq 509 ; 510 ; AVX-LABEL: shuffle_v2i64_20: 511 ; AVX: # %bb.0: 512 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 513 ; AVX-NEXT: retq 514 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 515 ret <2 x i64> %shuffle 516 } 517 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 518 ; SSE-LABEL: shuffle_v2i64_20_copy: 519 ; SSE: # %bb.0: 520 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] 521 ; SSE-NEXT: movaps %xmm2, %xmm0 522 ; SSE-NEXT: retq 523 ; 524 ; AVX-LABEL: shuffle_v2i64_20_copy: 525 ; AVX: # %bb.0: 526 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm1[0] 527 ; AVX-NEXT: retq 528 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 529 ret <2 x i64> %shuffle 530 } 531 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { 532 ; SSE2-LABEL: shuffle_v2i64_21: 533 ; SSE2: # %bb.0: 534 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 535 ; SSE2-NEXT: retq 536 ; 537 ; SSE3-LABEL: shuffle_v2i64_21: 538 ; SSE3: # %bb.0: 539 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 540 ; SSE3-NEXT: retq 541 ; 542 ; SSSE3-LABEL: shuffle_v2i64_21: 543 ; SSSE3: # %bb.0: 544 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 545 ; SSSE3-NEXT: retq 546 ; 547 ; SSE41-LABEL: shuffle_v2i64_21: 548 ; SSE41: # %bb.0: 549 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 550 ; SSE41-NEXT: retq 551 ; 552 ; AVX-LABEL: shuffle_v2i64_21: 553 ; AVX: # %bb.0: 554 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 555 ; AVX-NEXT: retq 556 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 557 ret <2 x i64> %shuffle 558 } 559 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 560 ; SSE2-LABEL: shuffle_v2i64_21_copy: 561 ; SSE2: # %bb.0: 562 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 563 ; SSE2-NEXT: movapd %xmm1, %xmm0 564 ; SSE2-NEXT: retq 565 ; 566 ; SSE3-LABEL: shuffle_v2i64_21_copy: 567 ; SSE3: # %bb.0: 568 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 569 ; SSE3-NEXT: movapd %xmm1, %xmm0 570 ; SSE3-NEXT: retq 571 ; 572 ; SSSE3-LABEL: shuffle_v2i64_21_copy: 573 ; SSSE3: # %bb.0: 574 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 575 ; SSSE3-NEXT: movapd %xmm1, %xmm0 576 ; SSSE3-NEXT: retq 577 ; 578 ; SSE41-LABEL: shuffle_v2i64_21_copy: 579 ; SSE41: # %bb.0: 580 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] 581 ; SSE41-NEXT: movaps %xmm1, %xmm0 582 ; SSE41-NEXT: retq 583 ; 584 ; AVX-LABEL: shuffle_v2i64_21_copy: 585 ; AVX: # %bb.0: 586 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] 587 ; AVX-NEXT: retq 588 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 589 ret <2 x i64> %shuffle 590 } 591 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) { 592 ; SSE2-LABEL: shuffle_v2i64_30: 593 ; SSE2: # %bb.0: 594 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 595 ; SSE2-NEXT: movapd %xmm1, %xmm0 596 ; SSE2-NEXT: retq 597 ; 598 ; SSE3-LABEL: shuffle_v2i64_30: 599 ; SSE3: # %bb.0: 600 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 601 ; SSE3-NEXT: movapd %xmm1, %xmm0 602 ; SSE3-NEXT: retq 603 ; 604 ; SSSE3-LABEL: shuffle_v2i64_30: 605 ; SSSE3: # %bb.0: 606 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 607 ; SSSE3-NEXT: retq 608 ; 609 ; SSE41-LABEL: shuffle_v2i64_30: 610 ; SSE41: # %bb.0: 611 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 612 ; SSE41-NEXT: retq 613 ; 614 ; AVX-LABEL: shuffle_v2i64_30: 615 ; AVX: # %bb.0: 616 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 617 ; AVX-NEXT: retq 618 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 619 ret <2 x i64> %shuffle 620 } 621 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 622 ; SSE2-LABEL: shuffle_v2i64_30_copy: 623 ; SSE2: # %bb.0: 624 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] 625 ; SSE2-NEXT: movapd %xmm2, %xmm0 626 ; SSE2-NEXT: retq 627 ; 628 ; SSE3-LABEL: shuffle_v2i64_30_copy: 629 ; SSE3: # %bb.0: 630 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] 631 ; SSE3-NEXT: movapd %xmm2, %xmm0 632 ; SSE3-NEXT: retq 633 ; 634 ; SSSE3-LABEL: shuffle_v2i64_30_copy: 635 ; SSSE3: # %bb.0: 636 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 637 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 638 ; SSSE3-NEXT: retq 639 ; 640 ; SSE41-LABEL: shuffle_v2i64_30_copy: 641 ; SSE41: # %bb.0: 642 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 643 ; SSE41-NEXT: movdqa %xmm1, %xmm0 644 ; SSE41-NEXT: retq 645 ; 646 ; AVX-LABEL: shuffle_v2i64_30_copy: 647 ; AVX: # %bb.0: 648 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 649 ; AVX-NEXT: retq 650 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 651 ret <2 x i64> %shuffle 652 } 653 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) { 654 ; SSE-LABEL: shuffle_v2i64_31: 655 ; SSE: # %bb.0: 656 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 657 ; SSE-NEXT: movaps %xmm1, %xmm0 658 ; SSE-NEXT: retq 659 ; 660 ; AVX-LABEL: shuffle_v2i64_31: 661 ; AVX: # %bb.0: 662 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] 663 ; AVX-NEXT: retq 664 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 665 ret <2 x i64> %shuffle 666 } 667 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 668 ; SSE-LABEL: shuffle_v2i64_31_copy: 669 ; SSE: # %bb.0: 670 ; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] 671 ; SSE-NEXT: movaps %xmm2, %xmm0 672 ; SSE-NEXT: retq 673 ; 674 ; AVX-LABEL: shuffle_v2i64_31_copy: 675 ; AVX: # %bb.0: 676 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm2[1],xmm1[1] 677 ; AVX-NEXT: retq 678 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 679 ret <2 x i64> %shuffle 680 } 681 682 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) { 683 ; SSE-LABEL: shuffle_v2i64_0z: 684 ; SSE: # %bb.0: 685 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 686 ; SSE-NEXT: retq 687 ; 688 ; AVX-LABEL: shuffle_v2i64_0z: 689 ; AVX: # %bb.0: 690 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 691 ; AVX-NEXT: retq 692 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 693 ret <2 x i64> %shuffle 694 } 695 696 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) { 697 ; SSE-LABEL: shuffle_v2i64_1z: 698 ; SSE: # %bb.0: 699 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 700 ; SSE-NEXT: retq 701 ; 702 ; AVX-LABEL: shuffle_v2i64_1z: 703 ; AVX: # %bb.0: 704 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 705 ; AVX-NEXT: retq 706 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3> 707 ret <2 x i64> %shuffle 708 } 709 710 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) { 711 ; SSE-LABEL: shuffle_v2i64_z0: 712 ; SSE: # %bb.0: 713 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 714 ; SSE-NEXT: retq 715 ; 716 ; AVX-LABEL: shuffle_v2i64_z0: 717 ; AVX: # %bb.0: 718 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 719 ; AVX-NEXT: retq 720 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0> 721 ret <2 x i64> %shuffle 722 } 723 724 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) { 725 ; SSE2-LABEL: shuffle_v2i64_z1: 726 ; SSE2: # %bb.0: 727 ; SSE2-NEXT: xorpd %xmm1, %xmm1 728 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 729 ; SSE2-NEXT: retq 730 ; 731 ; SSE3-LABEL: shuffle_v2i64_z1: 732 ; SSE3: # %bb.0: 733 ; SSE3-NEXT: xorpd %xmm1, %xmm1 734 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 735 ; SSE3-NEXT: retq 736 ; 737 ; SSSE3-LABEL: shuffle_v2i64_z1: 738 ; SSSE3: # %bb.0: 739 ; SSSE3-NEXT: xorpd %xmm1, %xmm1 740 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 741 ; SSSE3-NEXT: retq 742 ; 743 ; SSE41-LABEL: shuffle_v2i64_z1: 744 ; SSE41: # %bb.0: 745 ; SSE41-NEXT: xorps %xmm1, %xmm1 746 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 747 ; SSE41-NEXT: retq 748 ; 749 ; AVX-LABEL: shuffle_v2i64_z1: 750 ; AVX: # %bb.0: 751 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 752 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 753 ; AVX-NEXT: retq 754 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1> 755 ret <2 x i64> %shuffle 756 } 757 758 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) { 759 ; SSE-LABEL: shuffle_v2f64_0z: 760 ; SSE: # %bb.0: 761 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 762 ; SSE-NEXT: retq 763 ; 764 ; AVX-LABEL: shuffle_v2f64_0z: 765 ; AVX: # %bb.0: 766 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 767 ; AVX-NEXT: retq 768 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 769 ret <2 x double> %shuffle 770 } 771 772 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) { 773 ; SSE-LABEL: shuffle_v2f64_1z: 774 ; SSE: # %bb.0: 775 ; SSE-NEXT: xorps %xmm1, %xmm1 776 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 777 ; SSE-NEXT: retq 778 ; 779 ; AVX-LABEL: shuffle_v2f64_1z: 780 ; AVX: # %bb.0: 781 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 782 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 783 ; AVX-NEXT: retq 784 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3> 785 ret <2 x double> %shuffle 786 } 787 788 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { 789 ; SSE-LABEL: shuffle_v2f64_z0: 790 ; SSE: # %bb.0: 791 ; SSE-NEXT: xorps %xmm1, %xmm1 792 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 793 ; SSE-NEXT: movaps %xmm1, %xmm0 794 ; SSE-NEXT: retq 795 ; 796 ; AVX-LABEL: shuffle_v2f64_z0: 797 ; AVX: # %bb.0: 798 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 799 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 800 ; AVX-NEXT: retq 801 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0> 802 ret <2 x double> %shuffle 803 } 804 805 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { 806 ; SSE2-LABEL: shuffle_v2f64_z1: 807 ; SSE2: # %bb.0: 808 ; SSE2-NEXT: xorpd %xmm1, %xmm1 809 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 810 ; SSE2-NEXT: retq 811 ; 812 ; SSE3-LABEL: shuffle_v2f64_z1: 813 ; SSE3: # %bb.0: 814 ; SSE3-NEXT: xorpd %xmm1, %xmm1 815 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 816 ; SSE3-NEXT: retq 817 ; 818 ; SSSE3-LABEL: shuffle_v2f64_z1: 819 ; SSSE3: # %bb.0: 820 ; SSSE3-NEXT: xorpd %xmm1, %xmm1 821 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 822 ; SSSE3-NEXT: retq 823 ; 824 ; SSE41-LABEL: shuffle_v2f64_z1: 825 ; SSE41: # %bb.0: 826 ; SSE41-NEXT: xorps %xmm1, %xmm1 827 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 828 ; SSE41-NEXT: retq 829 ; 830 ; AVX-LABEL: shuffle_v2f64_z1: 831 ; AVX: # %bb.0: 832 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 833 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 834 ; AVX-NEXT: retq 835 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 836 ret <2 x double> %shuffle 837 } 838 839 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { 840 ; SSE-LABEL: shuffle_v2f64_bitcast_1z: 841 ; SSE: # %bb.0: 842 ; SSE-NEXT: xorps %xmm1, %xmm1 843 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 844 ; SSE-NEXT: retq 845 ; 846 ; AVX-LABEL: shuffle_v2f64_bitcast_1z: 847 ; AVX: # %bb.0: 848 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 849 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 850 ; AVX-NEXT: retq 851 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 852 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> 853 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 854 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double> 855 ret <2 x double> %bitcast64 856 } 857 858 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) { 859 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123: 860 ; SSE2: # %bb.0: 861 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 862 ; SSE2-NEXT: retq 863 ; 864 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123: 865 ; SSE3: # %bb.0: 866 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 867 ; SSE3-NEXT: retq 868 ; 869 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123: 870 ; SSSE3: # %bb.0: 871 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 872 ; SSSE3-NEXT: retq 873 ; 874 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123: 875 ; SSE41: # %bb.0: 876 ; SSE41-NEXT: xorps %xmm1, %xmm1 877 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 878 ; SSE41-NEXT: retq 879 ; 880 ; AVX-LABEL: shuffle_v2i64_bitcast_z123: 881 ; AVX: # %bb.0: 882 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 883 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 884 ; AVX-NEXT: retq 885 %bitcast32 = bitcast <2 x i64> %x to <4 x float> 886 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 887 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64> 888 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1> 889 ret <2 x i64> %and 890 } 891 892 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) { 893 ; SSE-LABEL: insert_reg_and_zero_v2i64: 894 ; SSE: # %bb.0: 895 ; SSE-NEXT: movq %rdi, %xmm0 896 ; SSE-NEXT: retq 897 ; 898 ; AVX-LABEL: insert_reg_and_zero_v2i64: 899 ; AVX: # %bb.0: 900 ; AVX-NEXT: vmovq %rdi, %xmm0 901 ; AVX-NEXT: retq 902 %v = insertelement <2 x i64> undef, i64 %a, i32 0 903 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 904 ret <2 x i64> %shuffle 905 } 906 907 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) { 908 ; SSE-LABEL: insert_mem_and_zero_v2i64: 909 ; SSE: # %bb.0: 910 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 911 ; SSE-NEXT: retq 912 ; 913 ; AVX-LABEL: insert_mem_and_zero_v2i64: 914 ; AVX: # %bb.0: 915 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 916 ; AVX-NEXT: retq 917 %a = load i64, i64* %ptr 918 %v = insertelement <2 x i64> undef, i64 %a, i32 0 919 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 920 ret <2 x i64> %shuffle 921 } 922 923 define <2 x double> @insert_reg_and_zero_v2f64(double %a) { 924 ; SSE-LABEL: insert_reg_and_zero_v2f64: 925 ; SSE: # %bb.0: 926 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 927 ; SSE-NEXT: retq 928 ; 929 ; AVX-LABEL: insert_reg_and_zero_v2f64: 930 ; AVX: # %bb.0: 931 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 932 ; AVX-NEXT: retq 933 %v = insertelement <2 x double> undef, double %a, i32 0 934 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 935 ret <2 x double> %shuffle 936 } 937 938 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) { 939 ; SSE-LABEL: insert_mem_and_zero_v2f64: 940 ; SSE: # %bb.0: 941 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 942 ; SSE-NEXT: retq 943 ; 944 ; AVX-LABEL: insert_mem_and_zero_v2f64: 945 ; AVX: # %bb.0: 946 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 947 ; AVX-NEXT: retq 948 %a = load double, double* %ptr 949 %v = insertelement <2 x double> undef, double %a, i32 0 950 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 951 ret <2 x double> %shuffle 952 } 953 954 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) { 955 ; SSE2-LABEL: insert_reg_lo_v2i64: 956 ; SSE2: # %bb.0: 957 ; SSE2-NEXT: movq %rdi, %xmm1 958 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 959 ; SSE2-NEXT: retq 960 ; 961 ; SSE3-LABEL: insert_reg_lo_v2i64: 962 ; SSE3: # %bb.0: 963 ; SSE3-NEXT: movq %rdi, %xmm1 964 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 965 ; SSE3-NEXT: retq 966 ; 967 ; SSSE3-LABEL: insert_reg_lo_v2i64: 968 ; SSSE3: # %bb.0: 969 ; SSSE3-NEXT: movq %rdi, %xmm1 970 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 971 ; SSSE3-NEXT: retq 972 ; 973 ; SSE41-LABEL: insert_reg_lo_v2i64: 974 ; SSE41: # %bb.0: 975 ; SSE41-NEXT: pinsrq $0, %rdi, %xmm0 976 ; SSE41-NEXT: retq 977 ; 978 ; AVX-LABEL: insert_reg_lo_v2i64: 979 ; AVX: # %bb.0: 980 ; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0 981 ; AVX-NEXT: retq 982 %v = insertelement <2 x i64> undef, i64 %a, i32 0 983 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 984 ret <2 x i64> %shuffle 985 } 986 987 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) { 988 ; SSE2-LABEL: insert_mem_lo_v2i64: 989 ; SSE2: # %bb.0: 990 ; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 991 ; SSE2-NEXT: retq 992 ; 993 ; SSE3-LABEL: insert_mem_lo_v2i64: 994 ; SSE3: # %bb.0: 995 ; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 996 ; SSE3-NEXT: retq 997 ; 998 ; SSSE3-LABEL: insert_mem_lo_v2i64: 999 ; SSSE3: # %bb.0: 1000 ; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1001 ; SSSE3-NEXT: retq 1002 ; 1003 ; SSE41-LABEL: insert_mem_lo_v2i64: 1004 ; SSE41: # %bb.0: 1005 ; SSE41-NEXT: pinsrq $0, (%rdi), %xmm0 1006 ; SSE41-NEXT: retq 1007 ; 1008 ; AVX-LABEL: insert_mem_lo_v2i64: 1009 ; AVX: # %bb.0: 1010 ; AVX-NEXT: vpinsrq $0, (%rdi), %xmm0, %xmm0 1011 ; AVX-NEXT: retq 1012 %a = load i64, i64* %ptr 1013 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1014 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 1015 ret <2 x i64> %shuffle 1016 } 1017 1018 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) { 1019 ; SSE2-LABEL: insert_reg_hi_v2i64: 1020 ; SSE2: # %bb.0: 1021 ; SSE2-NEXT: movq %rdi, %xmm1 1022 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1023 ; SSE2-NEXT: retq 1024 ; 1025 ; SSE3-LABEL: insert_reg_hi_v2i64: 1026 ; SSE3: # %bb.0: 1027 ; SSE3-NEXT: movq %rdi, %xmm1 1028 ; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1029 ; SSE3-NEXT: retq 1030 ; 1031 ; SSSE3-LABEL: insert_reg_hi_v2i64: 1032 ; SSSE3: # %bb.0: 1033 ; SSSE3-NEXT: movq %rdi, %xmm1 1034 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1035 ; SSSE3-NEXT: retq 1036 ; 1037 ; SSE41-LABEL: insert_reg_hi_v2i64: 1038 ; SSE41: # %bb.0: 1039 ; SSE41-NEXT: pinsrq $1, %rdi, %xmm0 1040 ; SSE41-NEXT: retq 1041 ; 1042 ; AVX-LABEL: insert_reg_hi_v2i64: 1043 ; AVX: # %bb.0: 1044 ; AVX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 1045 ; AVX-NEXT: retq 1046 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1047 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1048 ret <2 x i64> %shuffle 1049 } 1050 1051 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { 1052 ; SSE2-LABEL: insert_mem_hi_v2i64: 1053 ; SSE2: # %bb.0: 1054 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1055 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1056 ; SSE2-NEXT: retq 1057 ; 1058 ; SSE3-LABEL: insert_mem_hi_v2i64: 1059 ; SSE3: # %bb.0: 1060 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1061 ; SSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1062 ; SSE3-NEXT: retq 1063 ; 1064 ; SSSE3-LABEL: insert_mem_hi_v2i64: 1065 ; SSSE3: # %bb.0: 1066 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1067 ; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1068 ; SSSE3-NEXT: retq 1069 ; 1070 ; SSE41-LABEL: insert_mem_hi_v2i64: 1071 ; SSE41: # %bb.0: 1072 ; SSE41-NEXT: pinsrq $1, (%rdi), %xmm0 1073 ; SSE41-NEXT: retq 1074 ; 1075 ; AVX-LABEL: insert_mem_hi_v2i64: 1076 ; AVX: # %bb.0: 1077 ; AVX-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm0 1078 ; AVX-NEXT: retq 1079 %a = load i64, i64* %ptr 1080 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1081 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1082 ret <2 x i64> %shuffle 1083 } 1084 1085 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) { 1086 ; SSE2-LABEL: insert_reg_lo_v2f64: 1087 ; SSE2: # %bb.0: 1088 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1089 ; SSE2-NEXT: movapd %xmm1, %xmm0 1090 ; SSE2-NEXT: retq 1091 ; 1092 ; SSE3-LABEL: insert_reg_lo_v2f64: 1093 ; SSE3: # %bb.0: 1094 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1095 ; SSE3-NEXT: movapd %xmm1, %xmm0 1096 ; SSE3-NEXT: retq 1097 ; 1098 ; SSSE3-LABEL: insert_reg_lo_v2f64: 1099 ; SSSE3: # %bb.0: 1100 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1101 ; SSSE3-NEXT: movapd %xmm1, %xmm0 1102 ; SSSE3-NEXT: retq 1103 ; 1104 ; SSE41-LABEL: insert_reg_lo_v2f64: 1105 ; SSE41: # %bb.0: 1106 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1107 ; SSE41-NEXT: retq 1108 ; 1109 ; AVX-LABEL: insert_reg_lo_v2f64: 1110 ; AVX: # %bb.0: 1111 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 1112 ; AVX-NEXT: retq 1113 %v = insertelement <2 x double> undef, double %a, i32 0 1114 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1115 ret <2 x double> %shuffle 1116 } 1117 1118 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { 1119 ; SSE-LABEL: insert_mem_lo_v2f64: 1120 ; SSE: # %bb.0: 1121 ; SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1122 ; SSE-NEXT: retq 1123 ; 1124 ; AVX-LABEL: insert_mem_lo_v2f64: 1125 ; AVX: # %bb.0: 1126 ; AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1127 ; AVX-NEXT: retq 1128 %a = load double, double* %ptr 1129 %v = insertelement <2 x double> undef, double %a, i32 0 1130 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1131 ret <2 x double> %shuffle 1132 } 1133 1134 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) { 1135 ; SSE-LABEL: insert_reg_hi_v2f64: 1136 ; SSE: # %bb.0: 1137 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1138 ; SSE-NEXT: movaps %xmm1, %xmm0 1139 ; SSE-NEXT: retq 1140 ; 1141 ; AVX-LABEL: insert_reg_hi_v2f64: 1142 ; AVX: # %bb.0: 1143 ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1144 ; AVX-NEXT: retq 1145 %v = insertelement <2 x double> undef, double %a, i32 0 1146 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1147 ret <2 x double> %shuffle 1148 } 1149 1150 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { 1151 ; SSE-LABEL: insert_mem_hi_v2f64: 1152 ; SSE: # %bb.0: 1153 ; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 1154 ; SSE-NEXT: retq 1155 ; 1156 ; AVX-LABEL: insert_mem_hi_v2f64: 1157 ; AVX: # %bb.0: 1158 ; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 1159 ; AVX-NEXT: retq 1160 %a = load double, double* %ptr 1161 %v = insertelement <2 x double> undef, double %a, i32 0 1162 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1163 ret <2 x double> %shuffle 1164 } 1165 1166 define <2 x double> @insert_dup_reg_v2f64(double %a) { 1167 ; SSE2-LABEL: insert_dup_reg_v2f64: 1168 ; SSE2: # %bb.0: 1169 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1170 ; SSE2-NEXT: retq 1171 ; 1172 ; SSE3-LABEL: insert_dup_reg_v2f64: 1173 ; SSE3: # %bb.0: 1174 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1175 ; SSE3-NEXT: retq 1176 ; 1177 ; SSSE3-LABEL: insert_dup_reg_v2f64: 1178 ; SSSE3: # %bb.0: 1179 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1180 ; SSSE3-NEXT: retq 1181 ; 1182 ; SSE41-LABEL: insert_dup_reg_v2f64: 1183 ; SSE41: # %bb.0: 1184 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1185 ; SSE41-NEXT: retq 1186 ; 1187 ; AVX-LABEL: insert_dup_reg_v2f64: 1188 ; AVX: # %bb.0: 1189 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1190 ; AVX-NEXT: retq 1191 %v = insertelement <2 x double> undef, double %a, i32 0 1192 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1193 ret <2 x double> %shuffle 1194 } 1195 1196 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) { 1197 ; SSE2-LABEL: insert_dup_mem_v2f64: 1198 ; SSE2: # %bb.0: 1199 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1200 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1201 ; SSE2-NEXT: retq 1202 ; 1203 ; SSE3-LABEL: insert_dup_mem_v2f64: 1204 ; SSE3: # %bb.0: 1205 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1206 ; SSE3-NEXT: retq 1207 ; 1208 ; SSSE3-LABEL: insert_dup_mem_v2f64: 1209 ; SSSE3: # %bb.0: 1210 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1211 ; SSSE3-NEXT: retq 1212 ; 1213 ; SSE41-LABEL: insert_dup_mem_v2f64: 1214 ; SSE41: # %bb.0: 1215 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1216 ; SSE41-NEXT: retq 1217 ; 1218 ; AVX-LABEL: insert_dup_mem_v2f64: 1219 ; AVX: # %bb.0: 1220 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1221 ; AVX-NEXT: retq 1222 %a = load double, double* %ptr 1223 %v = insertelement <2 x double> undef, double %a, i32 0 1224 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1225 ret <2 x double> %shuffle 1226 } 1227 1228 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind { 1229 ; SSE2-LABEL: insert_dup_mem128_v2f64: 1230 ; SSE2: # %bb.0: 1231 ; SSE2-NEXT: movaps (%rdi), %xmm0 1232 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1233 ; SSE2-NEXT: retq 1234 ; 1235 ; SSE3-LABEL: insert_dup_mem128_v2f64: 1236 ; SSE3: # %bb.0: 1237 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1238 ; SSE3-NEXT: retq 1239 ; 1240 ; SSSE3-LABEL: insert_dup_mem128_v2f64: 1241 ; SSSE3: # %bb.0: 1242 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1243 ; SSSE3-NEXT: retq 1244 ; 1245 ; SSE41-LABEL: insert_dup_mem128_v2f64: 1246 ; SSE41: # %bb.0: 1247 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1248 ; SSE41-NEXT: retq 1249 ; 1250 ; AVX-LABEL: insert_dup_mem128_v2f64: 1251 ; AVX: # %bb.0: 1252 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1253 ; AVX-NEXT: retq 1254 %v = load <2 x double>, <2 x double>* %ptr 1255 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1256 ret <2 x double> %shuffle 1257 } 1258 1259 1260 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) { 1261 ; SSE-LABEL: insert_dup_mem_v2i64: 1262 ; SSE: # %bb.0: 1263 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1264 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1265 ; SSE-NEXT: retq 1266 ; 1267 ; AVX1-LABEL: insert_dup_mem_v2i64: 1268 ; AVX1: # %bb.0: 1269 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1270 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] 1271 ; AVX1-NEXT: retq 1272 ; 1273 ; AVX2-LABEL: insert_dup_mem_v2i64: 1274 ; AVX2: # %bb.0: 1275 ; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0 1276 ; AVX2-NEXT: retq 1277 ; 1278 ; AVX512VL-LABEL: insert_dup_mem_v2i64: 1279 ; AVX512VL: # %bb.0: 1280 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0 1281 ; AVX512VL-NEXT: retq 1282 %tmp = load i64, i64* %ptr, align 1 1283 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 1284 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer 1285 ret <2 x i64> %tmp2 1286 } 1287 1288 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) { 1289 ; SSE-LABEL: shuffle_mem_v2f64_10: 1290 ; SSE: # %bb.0: 1291 ; SSE-NEXT: movapd (%rdi), %xmm0 1292 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 1293 ; SSE-NEXT: retq 1294 ; 1295 ; AVX-LABEL: shuffle_mem_v2f64_10: 1296 ; AVX: # %bb.0: 1297 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] 1298 ; AVX-NEXT: retq 1299 1300 %a = load <2 x double>, <2 x double>* %ptr 1301 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1302 ret <2 x double> %shuffle 1303 } 1304 1305 define <2 x double> @shuffle_mem_v2f64_31(<2 x double> %a, <2 x double>* %b) { 1306 ; SSE-LABEL: shuffle_mem_v2f64_31: 1307 ; SSE: # %bb.0: 1308 ; SSE-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1309 ; SSE-NEXT: retq 1310 ; 1311 ; AVX-LABEL: shuffle_mem_v2f64_31: 1312 ; AVX: # %bb.0: 1313 ; AVX-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 1314 ; AVX-NEXT: retq 1315 %c = load <2 x double>, <2 x double>* %b 1316 %f = shufflevector <2 x double> %a, <2 x double> %c, <2 x i32> <i32 3, i32 1> 1317 ret <2 x double> %f 1318 } 1319