1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL 9 10 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 11 target triple = "x86_64-unknown-unknown" 12 13 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) { 14 ; SSE-LABEL: shuffle_v2i64_00: 15 ; SSE: # BB#0: 16 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 17 ; SSE-NEXT: retq 18 ; 19 ; AVX1-LABEL: shuffle_v2i64_00: 20 ; AVX1: # BB#0: 21 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 22 ; AVX1-NEXT: retq 23 ; 24 ; AVX2-LABEL: shuffle_v2i64_00: 25 ; AVX2: # BB#0: 26 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 27 ; AVX2-NEXT: retq 28 ; 29 ; AVX512VL-LABEL: shuffle_v2i64_00: 30 ; AVX512VL: # BB#0: 31 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0 32 ; AVX512VL-NEXT: retq 33 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0> 34 ret <2 x i64> %shuffle 35 } 36 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) { 37 ; SSE-LABEL: shuffle_v2i64_10: 38 ; SSE: # BB#0: 39 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 40 ; SSE-NEXT: retq 41 ; 42 ; AVX-LABEL: shuffle_v2i64_10: 43 ; AVX: # BB#0: 44 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 45 ; AVX-NEXT: retq 46 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0> 47 ret <2 x i64> %shuffle 48 } 49 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) { 50 ; SSE-LABEL: shuffle_v2i64_11: 51 ; SSE: # BB#0: 52 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 53 ; SSE-NEXT: retq 54 ; 55 ; AVX-LABEL: shuffle_v2i64_11: 56 ; AVX: # BB#0: 57 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 58 ; AVX-NEXT: retq 59 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1> 60 ret <2 x i64> %shuffle 61 } 62 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) { 63 ; SSE-LABEL: shuffle_v2i64_22: 64 ; SSE: # BB#0: 65 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 66 ; SSE-NEXT: retq 67 ; 68 ; AVX1-LABEL: shuffle_v2i64_22: 69 ; AVX1: # BB#0: 70 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 71 ; AVX1-NEXT: retq 72 ; 73 ; AVX2-LABEL: shuffle_v2i64_22: 74 ; AVX2: # BB#0: 75 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0 76 ; AVX2-NEXT: retq 77 ; 78 ; AVX512VL-LABEL: shuffle_v2i64_22: 79 ; AVX512VL: # BB#0: 80 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0 81 ; AVX512VL-NEXT: retq 82 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2> 83 ret <2 x i64> %shuffle 84 } 85 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) { 86 ; SSE-LABEL: shuffle_v2i64_32: 87 ; SSE: # BB#0: 88 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 89 ; SSE-NEXT: retq 90 ; 91 ; AVX-LABEL: shuffle_v2i64_32: 92 ; AVX: # BB#0: 93 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 94 ; AVX-NEXT: retq 95 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2> 96 ret <2 x i64> %shuffle 97 } 98 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) { 99 ; SSE-LABEL: shuffle_v2i64_33: 100 ; SSE: # BB#0: 101 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 102 ; SSE-NEXT: retq 103 ; 104 ; AVX-LABEL: shuffle_v2i64_33: 105 ; AVX: # BB#0: 106 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 107 ; AVX-NEXT: retq 108 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3> 109 ret <2 x i64> %shuffle 110 } 111 112 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { 113 ; SSE2-LABEL: shuffle_v2f64_00: 114 ; SSE2: # BB#0: 115 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 116 ; SSE2-NEXT: retq 117 ; 118 ; SSE3-LABEL: shuffle_v2f64_00: 119 ; SSE3: # BB#0: 120 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 121 ; SSE3-NEXT: retq 122 ; 123 ; SSSE3-LABEL: shuffle_v2f64_00: 124 ; SSSE3: # BB#0: 125 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 126 ; SSSE3-NEXT: retq 127 ; 128 ; SSE41-LABEL: shuffle_v2f64_00: 129 ; SSE41: # BB#0: 130 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 131 ; SSE41-NEXT: retq 132 ; 133 ; AVX-LABEL: shuffle_v2f64_00: 134 ; AVX: # BB#0: 135 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 136 ; AVX-NEXT: retq 137 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0> 138 ret <2 x double> %shuffle 139 } 140 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) { 141 ; SSE-LABEL: shuffle_v2f64_10: 142 ; SSE: # BB#0: 143 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 144 ; SSE-NEXT: retq 145 ; 146 ; AVX-LABEL: shuffle_v2f64_10: 147 ; AVX: # BB#0: 148 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 149 ; AVX-NEXT: retq 150 151 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0> 152 ret <2 x double> %shuffle 153 } 154 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) { 155 ; SSE-LABEL: shuffle_v2f64_11: 156 ; SSE: # BB#0: 157 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 158 ; SSE-NEXT: retq 159 ; 160 ; AVX-LABEL: shuffle_v2f64_11: 161 ; AVX: # BB#0: 162 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1] 163 ; AVX-NEXT: retq 164 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1> 165 ret <2 x double> %shuffle 166 } 167 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { 168 ; SSE2-LABEL: shuffle_v2f64_22: 169 ; SSE2: # BB#0: 170 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] 171 ; SSE2-NEXT: movaps %xmm1, %xmm0 172 ; SSE2-NEXT: retq 173 ; 174 ; SSE3-LABEL: shuffle_v2f64_22: 175 ; SSE3: # BB#0: 176 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 177 ; SSE3-NEXT: retq 178 ; 179 ; SSSE3-LABEL: shuffle_v2f64_22: 180 ; SSSE3: # BB#0: 181 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 182 ; SSSE3-NEXT: retq 183 ; 184 ; SSE41-LABEL: shuffle_v2f64_22: 185 ; SSE41: # BB#0: 186 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 187 ; SSE41-NEXT: retq 188 ; 189 ; AVX-LABEL: shuffle_v2f64_22: 190 ; AVX: # BB#0: 191 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 192 ; AVX-NEXT: retq 193 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2> 194 ret <2 x double> %shuffle 195 } 196 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { 197 ; SSE-LABEL: shuffle_v2f64_32: 198 ; SSE: # BB#0: 199 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 200 ; SSE-NEXT: movapd %xmm1, %xmm0 201 ; SSE-NEXT: retq 202 ; 203 ; AVX-LABEL: shuffle_v2f64_32: 204 ; AVX: # BB#0: 205 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 206 ; AVX-NEXT: retq 207 208 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2> 209 ret <2 x double> %shuffle 210 } 211 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { 212 ; SSE-LABEL: shuffle_v2f64_33: 213 ; SSE: # BB#0: 214 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 215 ; SSE-NEXT: movaps %xmm1, %xmm0 216 ; SSE-NEXT: retq 217 ; 218 ; AVX-LABEL: shuffle_v2f64_33: 219 ; AVX: # BB#0: 220 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,1] 221 ; AVX-NEXT: retq 222 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3> 223 ret <2 x double> %shuffle 224 } 225 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { 226 ; SSE2-LABEL: shuffle_v2f64_03: 227 ; SSE2: # BB#0: 228 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 229 ; SSE2-NEXT: movapd %xmm1, %xmm0 230 ; SSE2-NEXT: retq 231 ; 232 ; SSE3-LABEL: shuffle_v2f64_03: 233 ; SSE3: # BB#0: 234 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 235 ; SSE3-NEXT: movapd %xmm1, %xmm0 236 ; SSE3-NEXT: retq 237 ; 238 ; SSSE3-LABEL: shuffle_v2f64_03: 239 ; SSSE3: # BB#0: 240 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 241 ; SSSE3-NEXT: movapd %xmm1, %xmm0 242 ; SSSE3-NEXT: retq 243 ; 244 ; SSE41-LABEL: shuffle_v2f64_03: 245 ; SSE41: # BB#0: 246 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 247 ; SSE41-NEXT: retq 248 ; 249 ; AVX-LABEL: shuffle_v2f64_03: 250 ; AVX: # BB#0: 251 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 252 ; AVX-NEXT: retq 253 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3> 254 ret <2 x double> %shuffle 255 } 256 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { 257 ; SSE2-LABEL: shuffle_v2f64_21: 258 ; SSE2: # BB#0: 259 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 260 ; SSE2-NEXT: retq 261 ; 262 ; SSE3-LABEL: shuffle_v2f64_21: 263 ; SSE3: # BB#0: 264 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 265 ; SSE3-NEXT: retq 266 ; 267 ; SSSE3-LABEL: shuffle_v2f64_21: 268 ; SSSE3: # BB#0: 269 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 270 ; SSSE3-NEXT: retq 271 ; 272 ; SSE41-LABEL: shuffle_v2f64_21: 273 ; SSE41: # BB#0: 274 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 275 ; SSE41-NEXT: retq 276 ; 277 ; AVX-LABEL: shuffle_v2f64_21: 278 ; AVX: # BB#0: 279 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 280 ; AVX-NEXT: retq 281 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1> 282 ret <2 x double> %shuffle 283 } 284 285 286 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { 287 ; SSE-LABEL: shuffle_v2i64_02: 288 ; SSE: # BB#0: 289 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 290 ; SSE-NEXT: retq 291 ; 292 ; AVX-LABEL: shuffle_v2i64_02: 293 ; AVX: # BB#0: 294 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 295 ; AVX-NEXT: retq 296 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 297 ret <2 x i64> %shuffle 298 } 299 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 300 ; SSE-LABEL: shuffle_v2i64_02_copy: 301 ; SSE: # BB#0: 302 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 303 ; SSE-NEXT: movdqa %xmm1, %xmm0 304 ; SSE-NEXT: retq 305 ; 306 ; AVX-LABEL: shuffle_v2i64_02_copy: 307 ; AVX: # BB#0: 308 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0] 309 ; AVX-NEXT: retq 310 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 311 ret <2 x i64> %shuffle 312 } 313 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { 314 ; SSE2-LABEL: shuffle_v2i64_03: 315 ; SSE2: # BB#0: 316 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 317 ; SSE2-NEXT: movapd %xmm1, %xmm0 318 ; SSE2-NEXT: retq 319 ; 320 ; SSE3-LABEL: shuffle_v2i64_03: 321 ; SSE3: # BB#0: 322 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 323 ; SSE3-NEXT: movapd %xmm1, %xmm0 324 ; SSE3-NEXT: retq 325 ; 326 ; SSSE3-LABEL: shuffle_v2i64_03: 327 ; SSSE3: # BB#0: 328 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 329 ; SSSE3-NEXT: movapd %xmm1, %xmm0 330 ; SSSE3-NEXT: retq 331 ; 332 ; SSE41-LABEL: shuffle_v2i64_03: 333 ; SSE41: # BB#0: 334 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 335 ; SSE41-NEXT: retq 336 ; 337 ; AVX1-LABEL: shuffle_v2i64_03: 338 ; AVX1: # BB#0: 339 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 340 ; AVX1-NEXT: retq 341 ; 342 ; AVX2-LABEL: shuffle_v2i64_03: 343 ; AVX2: # BB#0: 344 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 345 ; AVX2-NEXT: retq 346 ; 347 ; AVX512VL-LABEL: shuffle_v2i64_03: 348 ; AVX512VL: # BB#0: 349 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 350 ; AVX512VL-NEXT: retq 351 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 352 ret <2 x i64> %shuffle 353 } 354 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 355 ; SSE2-LABEL: shuffle_v2i64_03_copy: 356 ; SSE2: # BB#0: 357 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 358 ; SSE2-NEXT: movapd %xmm2, %xmm0 359 ; SSE2-NEXT: retq 360 ; 361 ; SSE3-LABEL: shuffle_v2i64_03_copy: 362 ; SSE3: # BB#0: 363 ; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 364 ; SSE3-NEXT: movapd %xmm2, %xmm0 365 ; SSE3-NEXT: retq 366 ; 367 ; SSSE3-LABEL: shuffle_v2i64_03_copy: 368 ; SSSE3: # BB#0: 369 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 370 ; SSSE3-NEXT: movapd %xmm2, %xmm0 371 ; SSSE3-NEXT: retq 372 ; 373 ; SSE41-LABEL: shuffle_v2i64_03_copy: 374 ; SSE41: # BB#0: 375 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 376 ; SSE41-NEXT: movdqa %xmm1, %xmm0 377 ; SSE41-NEXT: retq 378 ; 379 ; AVX1-LABEL: shuffle_v2i64_03_copy: 380 ; AVX1: # BB#0: 381 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7] 382 ; AVX1-NEXT: retq 383 ; 384 ; AVX2-LABEL: shuffle_v2i64_03_copy: 385 ; AVX2: # BB#0: 386 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] 387 ; AVX2-NEXT: retq 388 ; 389 ; AVX512VL-LABEL: shuffle_v2i64_03_copy: 390 ; AVX512VL: # BB#0: 391 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] 392 ; AVX512VL-NEXT: retq 393 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 394 ret <2 x i64> %shuffle 395 } 396 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) { 397 ; SSE2-LABEL: shuffle_v2i64_12: 398 ; SSE2: # BB#0: 399 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 400 ; SSE2-NEXT: retq 401 ; 402 ; SSE3-LABEL: shuffle_v2i64_12: 403 ; SSE3: # BB#0: 404 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 405 ; SSE3-NEXT: retq 406 ; 407 ; SSSE3-LABEL: shuffle_v2i64_12: 408 ; SSSE3: # BB#0: 409 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 410 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 411 ; SSSE3-NEXT: retq 412 ; 413 ; SSE41-LABEL: shuffle_v2i64_12: 414 ; SSE41: # BB#0: 415 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 416 ; SSE41-NEXT: movdqa %xmm1, %xmm0 417 ; SSE41-NEXT: retq 418 ; 419 ; AVX-LABEL: shuffle_v2i64_12: 420 ; AVX: # BB#0: 421 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 422 ; AVX-NEXT: retq 423 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 424 ret <2 x i64> %shuffle 425 } 426 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 427 ; SSE2-LABEL: shuffle_v2i64_12_copy: 428 ; SSE2: # BB#0: 429 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] 430 ; SSE2-NEXT: movapd %xmm1, %xmm0 431 ; SSE2-NEXT: retq 432 ; 433 ; SSE3-LABEL: shuffle_v2i64_12_copy: 434 ; SSE3: # BB#0: 435 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] 436 ; SSE3-NEXT: movapd %xmm1, %xmm0 437 ; SSE3-NEXT: retq 438 ; 439 ; SSSE3-LABEL: shuffle_v2i64_12_copy: 440 ; SSSE3: # BB#0: 441 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 442 ; SSSE3-NEXT: movdqa %xmm2, %xmm0 443 ; SSSE3-NEXT: retq 444 ; 445 ; SSE41-LABEL: shuffle_v2i64_12_copy: 446 ; SSE41: # BB#0: 447 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 448 ; SSE41-NEXT: movdqa %xmm2, %xmm0 449 ; SSE41-NEXT: retq 450 ; 451 ; AVX-LABEL: shuffle_v2i64_12_copy: 452 ; AVX: # BB#0: 453 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 454 ; AVX-NEXT: retq 455 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 456 ret <2 x i64> %shuffle 457 } 458 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) { 459 ; SSE-LABEL: shuffle_v2i64_13: 460 ; SSE: # BB#0: 461 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 462 ; SSE-NEXT: retq 463 ; 464 ; AVX-LABEL: shuffle_v2i64_13: 465 ; AVX: # BB#0: 466 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 467 ; AVX-NEXT: retq 468 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 469 ret <2 x i64> %shuffle 470 } 471 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 472 ; SSE-LABEL: shuffle_v2i64_13_copy: 473 ; SSE: # BB#0: 474 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1] 475 ; SSE-NEXT: movdqa %xmm1, %xmm0 476 ; SSE-NEXT: retq 477 ; 478 ; AVX-LABEL: shuffle_v2i64_13_copy: 479 ; AVX: # BB#0: 480 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1] 481 ; AVX-NEXT: retq 482 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 483 ret <2 x i64> %shuffle 484 } 485 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { 486 ; SSE-LABEL: shuffle_v2i64_20: 487 ; SSE: # BB#0: 488 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 489 ; SSE-NEXT: movdqa %xmm1, %xmm0 490 ; SSE-NEXT: retq 491 ; 492 ; AVX-LABEL: shuffle_v2i64_20: 493 ; AVX: # BB#0: 494 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 495 ; AVX-NEXT: retq 496 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 497 ret <2 x i64> %shuffle 498 } 499 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 500 ; SSE-LABEL: shuffle_v2i64_20_copy: 501 ; SSE: # BB#0: 502 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 503 ; SSE-NEXT: movdqa %xmm2, %xmm0 504 ; SSE-NEXT: retq 505 ; 506 ; AVX-LABEL: shuffle_v2i64_20_copy: 507 ; AVX: # BB#0: 508 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0] 509 ; AVX-NEXT: retq 510 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 511 ret <2 x i64> %shuffle 512 } 513 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { 514 ; SSE2-LABEL: shuffle_v2i64_21: 515 ; SSE2: # BB#0: 516 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 517 ; SSE2-NEXT: retq 518 ; 519 ; SSE3-LABEL: shuffle_v2i64_21: 520 ; SSE3: # BB#0: 521 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 522 ; SSE3-NEXT: retq 523 ; 524 ; SSSE3-LABEL: shuffle_v2i64_21: 525 ; SSSE3: # BB#0: 526 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 527 ; SSSE3-NEXT: retq 528 ; 529 ; SSE41-LABEL: shuffle_v2i64_21: 530 ; SSE41: # BB#0: 531 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 532 ; SSE41-NEXT: retq 533 ; 534 ; AVX1-LABEL: shuffle_v2i64_21: 535 ; AVX1: # BB#0: 536 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 537 ; AVX1-NEXT: retq 538 ; 539 ; AVX2-LABEL: shuffle_v2i64_21: 540 ; AVX2: # BB#0: 541 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 542 ; AVX2-NEXT: retq 543 ; 544 ; AVX512VL-LABEL: shuffle_v2i64_21: 545 ; AVX512VL: # BB#0: 546 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 547 ; AVX512VL-NEXT: retq 548 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 549 ret <2 x i64> %shuffle 550 } 551 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 552 ; SSE2-LABEL: shuffle_v2i64_21_copy: 553 ; SSE2: # BB#0: 554 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 555 ; SSE2-NEXT: movapd %xmm1, %xmm0 556 ; SSE2-NEXT: retq 557 ; 558 ; SSE3-LABEL: shuffle_v2i64_21_copy: 559 ; SSE3: # BB#0: 560 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 561 ; SSE3-NEXT: movapd %xmm1, %xmm0 562 ; SSE3-NEXT: retq 563 ; 564 ; SSSE3-LABEL: shuffle_v2i64_21_copy: 565 ; SSSE3: # BB#0: 566 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 567 ; SSSE3-NEXT: movapd %xmm1, %xmm0 568 ; SSSE3-NEXT: retq 569 ; 570 ; SSE41-LABEL: shuffle_v2i64_21_copy: 571 ; SSE41: # BB#0: 572 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 573 ; SSE41-NEXT: movdqa %xmm1, %xmm0 574 ; SSE41-NEXT: retq 575 ; 576 ; AVX1-LABEL: shuffle_v2i64_21_copy: 577 ; AVX1: # BB#0: 578 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7] 579 ; AVX1-NEXT: retq 580 ; 581 ; AVX2-LABEL: shuffle_v2i64_21_copy: 582 ; AVX2: # BB#0: 583 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] 584 ; AVX2-NEXT: retq 585 ; 586 ; AVX512VL-LABEL: shuffle_v2i64_21_copy: 587 ; AVX512VL: # BB#0: 588 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] 589 ; AVX512VL-NEXT: retq 590 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 591 ret <2 x i64> %shuffle 592 } 593 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) { 594 ; SSE2-LABEL: shuffle_v2i64_30: 595 ; SSE2: # BB#0: 596 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 597 ; SSE2-NEXT: movapd %xmm1, %xmm0 598 ; SSE2-NEXT: retq 599 ; 600 ; SSE3-LABEL: shuffle_v2i64_30: 601 ; SSE3: # BB#0: 602 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 603 ; SSE3-NEXT: movapd %xmm1, %xmm0 604 ; SSE3-NEXT: retq 605 ; 606 ; SSSE3-LABEL: shuffle_v2i64_30: 607 ; SSSE3: # BB#0: 608 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 609 ; SSSE3-NEXT: retq 610 ; 611 ; SSE41-LABEL: shuffle_v2i64_30: 612 ; SSE41: # BB#0: 613 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 614 ; SSE41-NEXT: retq 615 ; 616 ; AVX-LABEL: shuffle_v2i64_30: 617 ; AVX: # BB#0: 618 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 619 ; AVX-NEXT: retq 620 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 621 ret <2 x i64> %shuffle 622 } 623 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 624 ; SSE2-LABEL: shuffle_v2i64_30_copy: 625 ; SSE2: # BB#0: 626 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] 627 ; SSE2-NEXT: movapd %xmm2, %xmm0 628 ; SSE2-NEXT: retq 629 ; 630 ; SSE3-LABEL: shuffle_v2i64_30_copy: 631 ; SSE3: # BB#0: 632 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] 633 ; SSE3-NEXT: movapd %xmm2, %xmm0 634 ; SSE3-NEXT: retq 635 ; 636 ; SSSE3-LABEL: shuffle_v2i64_30_copy: 637 ; SSSE3: # BB#0: 638 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 639 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 640 ; SSSE3-NEXT: retq 641 ; 642 ; SSE41-LABEL: shuffle_v2i64_30_copy: 643 ; SSE41: # BB#0: 644 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 645 ; SSE41-NEXT: movdqa %xmm1, %xmm0 646 ; SSE41-NEXT: retq 647 ; 648 ; AVX-LABEL: shuffle_v2i64_30_copy: 649 ; AVX: # BB#0: 650 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 651 ; AVX-NEXT: retq 652 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 653 ret <2 x i64> %shuffle 654 } 655 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) { 656 ; SSE-LABEL: shuffle_v2i64_31: 657 ; SSE: # BB#0: 658 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 659 ; SSE-NEXT: movdqa %xmm1, %xmm0 660 ; SSE-NEXT: retq 661 ; 662 ; AVX-LABEL: shuffle_v2i64_31: 663 ; AVX: # BB#0: 664 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 665 ; AVX-NEXT: retq 666 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 667 ret <2 x i64> %shuffle 668 } 669 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 670 ; SSE-LABEL: shuffle_v2i64_31_copy: 671 ; SSE: # BB#0: 672 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 673 ; SSE-NEXT: movdqa %xmm2, %xmm0 674 ; SSE-NEXT: retq 675 ; 676 ; AVX-LABEL: shuffle_v2i64_31_copy: 677 ; AVX: # BB#0: 678 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1] 679 ; AVX-NEXT: retq 680 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 681 ret <2 x i64> %shuffle 682 } 683 684 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) { 685 ; SSE-LABEL: shuffle_v2i64_0z: 686 ; SSE: # BB#0: 687 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 688 ; SSE-NEXT: retq 689 ; 690 ; AVX-LABEL: shuffle_v2i64_0z: 691 ; AVX: # BB#0: 692 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 693 ; AVX-NEXT: retq 694 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 695 ret <2 x i64> %shuffle 696 } 697 698 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) { 699 ; SSE-LABEL: shuffle_v2i64_1z: 700 ; SSE: # BB#0: 701 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 702 ; SSE-NEXT: retq 703 ; 704 ; AVX-LABEL: shuffle_v2i64_1z: 705 ; AVX: # BB#0: 706 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 707 ; AVX-NEXT: retq 708 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3> 709 ret <2 x i64> %shuffle 710 } 711 712 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) { 713 ; SSE-LABEL: shuffle_v2i64_z0: 714 ; SSE: # BB#0: 715 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 716 ; SSE-NEXT: retq 717 ; 718 ; AVX-LABEL: shuffle_v2i64_z0: 719 ; AVX: # BB#0: 720 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 721 ; AVX-NEXT: retq 722 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0> 723 ret <2 x i64> %shuffle 724 } 725 726 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) { 727 ; SSE2-LABEL: shuffle_v2i64_z1: 728 ; SSE2: # BB#0: 729 ; SSE2-NEXT: xorpd %xmm1, %xmm1 730 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 731 ; SSE2-NEXT: retq 732 ; 733 ; SSE3-LABEL: shuffle_v2i64_z1: 734 ; SSE3: # BB#0: 735 ; SSE3-NEXT: xorpd %xmm1, %xmm1 736 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 737 ; SSE3-NEXT: retq 738 ; 739 ; SSSE3-LABEL: shuffle_v2i64_z1: 740 ; SSSE3: # BB#0: 741 ; SSSE3-NEXT: xorpd %xmm1, %xmm1 742 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 743 ; SSSE3-NEXT: retq 744 ; 745 ; SSE41-LABEL: shuffle_v2i64_z1: 746 ; SSE41: # BB#0: 747 ; SSE41-NEXT: pxor %xmm1, %xmm1 748 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 749 ; SSE41-NEXT: retq 750 ; 751 ; AVX1-LABEL: shuffle_v2i64_z1: 752 ; AVX1: # BB#0: 753 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 754 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 755 ; AVX1-NEXT: retq 756 ; 757 ; AVX2-LABEL: shuffle_v2i64_z1: 758 ; AVX2: # BB#0: 759 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 760 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 761 ; AVX2-NEXT: retq 762 ; 763 ; AVX512VL-LABEL: shuffle_v2i64_z1: 764 ; AVX512VL: # BB#0: 765 ; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 766 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 767 ; AVX512VL-NEXT: retq 768 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1> 769 ret <2 x i64> %shuffle 770 } 771 772 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) { 773 ; SSE-LABEL: shuffle_v2f64_0z: 774 ; SSE: # BB#0: 775 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 776 ; SSE-NEXT: retq 777 ; 778 ; AVX-LABEL: shuffle_v2f64_0z: 779 ; AVX: # BB#0: 780 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 781 ; AVX-NEXT: retq 782 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 783 ret <2 x double> %shuffle 784 } 785 786 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) { 787 ; SSE-LABEL: shuffle_v2f64_1z: 788 ; SSE: # BB#0: 789 ; SSE-NEXT: xorpd %xmm1, %xmm1 790 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 791 ; SSE-NEXT: retq 792 ; 793 ; AVX1-LABEL: shuffle_v2f64_1z: 794 ; AVX1: # BB#0: 795 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 796 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 797 ; AVX1-NEXT: retq 798 ; 799 ; AVX2-LABEL: shuffle_v2f64_1z: 800 ; AVX2: # BB#0: 801 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 802 ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 803 ; AVX2-NEXT: retq 804 ; 805 ; AVX512VL-LABEL: shuffle_v2f64_1z: 806 ; AVX512VL: # BB#0: 807 ; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 808 ; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 809 ; AVX512VL-NEXT: retq 810 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3> 811 ret <2 x double> %shuffle 812 } 813 814 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { 815 ; SSE-LABEL: shuffle_v2f64_z0: 816 ; SSE: # BB#0: 817 ; SSE-NEXT: xorpd %xmm1, %xmm1 818 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 819 ; SSE-NEXT: movapd %xmm1, %xmm0 820 ; SSE-NEXT: retq 821 ; 822 ; AVX1-LABEL: shuffle_v2f64_z0: 823 ; AVX1: # BB#0: 824 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 825 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 826 ; AVX1-NEXT: retq 827 ; 828 ; AVX2-LABEL: shuffle_v2f64_z0: 829 ; AVX2: # BB#0: 830 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 831 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 832 ; AVX2-NEXT: retq 833 ; 834 ; AVX512VL-LABEL: shuffle_v2f64_z0: 835 ; AVX512VL: # BB#0: 836 ; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 837 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 838 ; AVX512VL-NEXT: retq 839 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0> 840 ret <2 x double> %shuffle 841 } 842 843 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { 844 ; SSE2-LABEL: shuffle_v2f64_z1: 845 ; SSE2: # BB#0: 846 ; SSE2-NEXT: xorpd %xmm1, %xmm1 847 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 848 ; SSE2-NEXT: retq 849 ; 850 ; SSE3-LABEL: shuffle_v2f64_z1: 851 ; SSE3: # BB#0: 852 ; SSE3-NEXT: xorpd %xmm1, %xmm1 853 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 854 ; SSE3-NEXT: retq 855 ; 856 ; SSSE3-LABEL: shuffle_v2f64_z1: 857 ; SSSE3: # BB#0: 858 ; SSSE3-NEXT: xorpd %xmm1, %xmm1 859 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 860 ; SSSE3-NEXT: retq 861 ; 862 ; SSE41-LABEL: shuffle_v2f64_z1: 863 ; SSE41: # BB#0: 864 ; SSE41-NEXT: xorpd %xmm1, %xmm1 865 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 866 ; SSE41-NEXT: retq 867 ; 868 ; AVX1-LABEL: shuffle_v2f64_z1: 869 ; AVX1: # BB#0: 870 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 871 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 872 ; AVX1-NEXT: retq 873 ; 874 ; AVX2-LABEL: shuffle_v2f64_z1: 875 ; AVX2: # BB#0: 876 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 877 ; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 878 ; AVX2-NEXT: retq 879 ; 880 ; AVX512VL-LABEL: shuffle_v2f64_z1: 881 ; AVX512VL: # BB#0: 882 ; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 883 ; AVX512VL-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 884 ; AVX512VL-NEXT: retq 885 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 886 ret <2 x double> %shuffle 887 } 888 889 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { 890 ; SSE-LABEL: shuffle_v2f64_bitcast_1z: 891 ; SSE: # BB#0: 892 ; SSE-NEXT: xorpd %xmm1, %xmm1 893 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 894 ; SSE-NEXT: retq 895 ; 896 ; AVX1-LABEL: shuffle_v2f64_bitcast_1z: 897 ; AVX1: # BB#0: 898 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 899 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 900 ; AVX1-NEXT: retq 901 ; 902 ; AVX2-LABEL: shuffle_v2f64_bitcast_1z: 903 ; AVX2: # BB#0: 904 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 905 ; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 906 ; AVX2-NEXT: retq 907 ; 908 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z: 909 ; AVX512VL: # BB#0: 910 ; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 911 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 912 ; AVX512VL-NEXT: retq 913 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 914 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> 915 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 916 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double> 917 ret <2 x double> %bitcast64 918 } 919 920 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) { 921 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123: 922 ; SSE2: # BB#0: 923 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 924 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 925 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 926 ; SSE2-NEXT: retq 927 ; 928 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123: 929 ; SSE3: # BB#0: 930 ; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 931 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 932 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 933 ; SSE3-NEXT: retq 934 ; 935 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123: 936 ; SSSE3: # BB#0: 937 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 938 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 939 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 940 ; SSSE3-NEXT: retq 941 ; 942 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123: 943 ; SSE41: # BB#0: 944 ; SSE41-NEXT: pxor %xmm1, %xmm1 945 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 946 ; SSE41-NEXT: retq 947 ; 948 ; AVX1-LABEL: shuffle_v2i64_bitcast_z123: 949 ; AVX1: # BB#0: 950 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 951 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 952 ; AVX1-NEXT: retq 953 ; 954 ; AVX2-LABEL: shuffle_v2i64_bitcast_z123: 955 ; AVX2: # BB#0: 956 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 957 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 958 ; AVX2-NEXT: retq 959 ; 960 ; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123: 961 ; AVX512VL: # BB#0: 962 ; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 963 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 964 ; AVX512VL-NEXT: retq 965 %bitcast32 = bitcast <2 x i64> %x to <4 x float> 966 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 967 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64> 968 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1> 969 ret <2 x i64> %and 970 } 971 972 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) { 973 ; SSE-LABEL: insert_reg_and_zero_v2i64: 974 ; SSE: # BB#0: 975 ; SSE-NEXT: movd %rdi, %xmm0 976 ; SSE-NEXT: retq 977 ; 978 ; AVX-LABEL: insert_reg_and_zero_v2i64: 979 ; AVX: # BB#0: 980 ; AVX-NEXT: vmovq %rdi, %xmm0 981 ; AVX-NEXT: retq 982 %v = insertelement <2 x i64> undef, i64 %a, i32 0 983 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 984 ret <2 x i64> %shuffle 985 } 986 987 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) { 988 ; SSE-LABEL: insert_mem_and_zero_v2i64: 989 ; SSE: # BB#0: 990 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 991 ; SSE-NEXT: retq 992 ; 993 ; AVX-LABEL: insert_mem_and_zero_v2i64: 994 ; AVX: # BB#0: 995 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 996 ; AVX-NEXT: retq 997 %a = load i64, i64* %ptr 998 %v = insertelement <2 x i64> undef, i64 %a, i32 0 999 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 1000 ret <2 x i64> %shuffle 1001 } 1002 1003 define <2 x double> @insert_reg_and_zero_v2f64(double %a) { 1004 ; SSE-LABEL: insert_reg_and_zero_v2f64: 1005 ; SSE: # BB#0: 1006 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1007 ; SSE-NEXT: retq 1008 ; 1009 ; AVX-LABEL: insert_reg_and_zero_v2f64: 1010 ; AVX: # BB#0: 1011 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1012 ; AVX-NEXT: retq 1013 %v = insertelement <2 x double> undef, double %a, i32 0 1014 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 1015 ret <2 x double> %shuffle 1016 } 1017 1018 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) { 1019 ; SSE-LABEL: insert_mem_and_zero_v2f64: 1020 ; SSE: # BB#0: 1021 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1022 ; SSE-NEXT: retq 1023 ; 1024 ; AVX-LABEL: insert_mem_and_zero_v2f64: 1025 ; AVX: # BB#0: 1026 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1027 ; AVX-NEXT: retq 1028 %a = load double, double* %ptr 1029 %v = insertelement <2 x double> undef, double %a, i32 0 1030 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 1031 ret <2 x double> %shuffle 1032 } 1033 1034 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) { 1035 ; SSE2-LABEL: insert_reg_lo_v2i64: 1036 ; SSE2: # BB#0: 1037 ; SSE2-NEXT: movd %rdi, %xmm1 1038 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1039 ; SSE2-NEXT: retq 1040 ; 1041 ; SSE3-LABEL: insert_reg_lo_v2i64: 1042 ; SSE3: # BB#0: 1043 ; SSE3-NEXT: movd %rdi, %xmm1 1044 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1045 ; SSE3-NEXT: retq 1046 ; 1047 ; SSSE3-LABEL: insert_reg_lo_v2i64: 1048 ; SSSE3: # BB#0: 1049 ; SSSE3-NEXT: movd %rdi, %xmm1 1050 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1051 ; SSSE3-NEXT: retq 1052 ; 1053 ; SSE41-LABEL: insert_reg_lo_v2i64: 1054 ; SSE41: # BB#0: 1055 ; SSE41-NEXT: movd %rdi, %xmm1 1056 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1057 ; SSE41-NEXT: retq 1058 ; 1059 ; AVX1-LABEL: insert_reg_lo_v2i64: 1060 ; AVX1: # BB#0: 1061 ; AVX1-NEXT: vmovq %rdi, %xmm1 1062 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1063 ; AVX1-NEXT: retq 1064 ; 1065 ; AVX2-LABEL: insert_reg_lo_v2i64: 1066 ; AVX2: # BB#0: 1067 ; AVX2-NEXT: vmovq %rdi, %xmm1 1068 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1069 ; AVX2-NEXT: retq 1070 ; 1071 ; AVX512VL-LABEL: insert_reg_lo_v2i64: 1072 ; AVX512VL: # BB#0: 1073 ; AVX512VL-NEXT: vmovq %rdi, %xmm1 1074 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1075 ; AVX512VL-NEXT: retq 1076 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1077 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 1078 ret <2 x i64> %shuffle 1079 } 1080 1081 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) { 1082 ; SSE2-LABEL: insert_mem_lo_v2i64: 1083 ; SSE2: # BB#0: 1084 ; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1085 ; SSE2-NEXT: retq 1086 ; 1087 ; SSE3-LABEL: insert_mem_lo_v2i64: 1088 ; SSE3: # BB#0: 1089 ; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1090 ; SSE3-NEXT: retq 1091 ; 1092 ; SSSE3-LABEL: insert_mem_lo_v2i64: 1093 ; SSSE3: # BB#0: 1094 ; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1095 ; SSSE3-NEXT: retq 1096 ; 1097 ; SSE41-LABEL: insert_mem_lo_v2i64: 1098 ; SSE41: # BB#0: 1099 ; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1100 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1101 ; SSE41-NEXT: retq 1102 ; 1103 ; AVX1-LABEL: insert_mem_lo_v2i64: 1104 ; AVX1: # BB#0: 1105 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1106 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1107 ; AVX1-NEXT: retq 1108 ; 1109 ; AVX2-LABEL: insert_mem_lo_v2i64: 1110 ; AVX2: # BB#0: 1111 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1112 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1113 ; AVX2-NEXT: retq 1114 ; 1115 ; AVX512VL-LABEL: insert_mem_lo_v2i64: 1116 ; AVX512VL: # BB#0: 1117 ; AVX512VL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1118 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1119 ; AVX512VL-NEXT: retq 1120 %a = load i64, i64* %ptr 1121 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1122 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 1123 ret <2 x i64> %shuffle 1124 } 1125 1126 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) { 1127 ; SSE-LABEL: insert_reg_hi_v2i64: 1128 ; SSE: # BB#0: 1129 ; SSE-NEXT: movd %rdi, %xmm1 1130 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1131 ; SSE-NEXT: retq 1132 ; 1133 ; AVX-LABEL: insert_reg_hi_v2i64: 1134 ; AVX: # BB#0: 1135 ; AVX-NEXT: vmovq %rdi, %xmm1 1136 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1137 ; AVX-NEXT: retq 1138 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1139 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1140 ret <2 x i64> %shuffle 1141 } 1142 1143 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { 1144 ; SSE-LABEL: insert_mem_hi_v2i64: 1145 ; SSE: # BB#0: 1146 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1147 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1148 ; SSE-NEXT: retq 1149 ; 1150 ; AVX-LABEL: insert_mem_hi_v2i64: 1151 ; AVX: # BB#0: 1152 ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1153 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1154 ; AVX-NEXT: retq 1155 %a = load i64, i64* %ptr 1156 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1157 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1158 ret <2 x i64> %shuffle 1159 } 1160 1161 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) { 1162 ; SSE-LABEL: insert_reg_lo_v2f64: 1163 ; SSE: # BB#0: 1164 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1165 ; SSE-NEXT: movapd %xmm1, %xmm0 1166 ; SSE-NEXT: retq 1167 ; 1168 ; AVX-LABEL: insert_reg_lo_v2f64: 1169 ; AVX: # BB#0: 1170 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1171 ; AVX-NEXT: retq 1172 %v = insertelement <2 x double> undef, double %a, i32 0 1173 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1174 ret <2 x double> %shuffle 1175 } 1176 1177 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { 1178 ; SSE-LABEL: insert_mem_lo_v2f64: 1179 ; SSE: # BB#0: 1180 ; SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1181 ; SSE-NEXT: retq 1182 ; 1183 ; AVX-LABEL: insert_mem_lo_v2f64: 1184 ; AVX: # BB#0: 1185 ; AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1186 ; AVX-NEXT: retq 1187 %a = load double, double* %ptr 1188 %v = insertelement <2 x double> undef, double %a, i32 0 1189 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1190 ret <2 x double> %shuffle 1191 } 1192 1193 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) { 1194 ; SSE-LABEL: insert_reg_hi_v2f64: 1195 ; SSE: # BB#0: 1196 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1197 ; SSE-NEXT: movapd %xmm1, %xmm0 1198 ; SSE-NEXT: retq 1199 ; 1200 ; AVX-LABEL: insert_reg_hi_v2f64: 1201 ; AVX: # BB#0: 1202 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1203 ; AVX-NEXT: retq 1204 %v = insertelement <2 x double> undef, double %a, i32 0 1205 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1206 ret <2 x double> %shuffle 1207 } 1208 1209 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { 1210 ; SSE-LABEL: insert_mem_hi_v2f64: 1211 ; SSE: # BB#0: 1212 ; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 1213 ; SSE-NEXT: retq 1214 ; 1215 ; AVX-LABEL: insert_mem_hi_v2f64: 1216 ; AVX: # BB#0: 1217 ; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 1218 ; AVX-NEXT: retq 1219 %a = load double, double* %ptr 1220 %v = insertelement <2 x double> undef, double %a, i32 0 1221 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1222 ret <2 x double> %shuffle 1223 } 1224 1225 define <2 x double> @insert_dup_reg_v2f64(double %a) { 1226 ; SSE2-LABEL: insert_dup_reg_v2f64: 1227 ; SSE2: # BB#0: 1228 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1229 ; SSE2-NEXT: retq 1230 ; 1231 ; SSE3-LABEL: insert_dup_reg_v2f64: 1232 ; SSE3: # BB#0: 1233 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1234 ; SSE3-NEXT: retq 1235 ; 1236 ; SSSE3-LABEL: insert_dup_reg_v2f64: 1237 ; SSSE3: # BB#0: 1238 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1239 ; SSSE3-NEXT: retq 1240 ; 1241 ; SSE41-LABEL: insert_dup_reg_v2f64: 1242 ; SSE41: # BB#0: 1243 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1244 ; SSE41-NEXT: retq 1245 ; 1246 ; AVX-LABEL: insert_dup_reg_v2f64: 1247 ; AVX: # BB#0: 1248 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1249 ; AVX-NEXT: retq 1250 %v = insertelement <2 x double> undef, double %a, i32 0 1251 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1252 ret <2 x double> %shuffle 1253 } 1254 1255 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) { 1256 ; SSE2-LABEL: insert_dup_mem_v2f64: 1257 ; SSE2: # BB#0: 1258 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1259 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1260 ; SSE2-NEXT: retq 1261 ; 1262 ; SSE3-LABEL: insert_dup_mem_v2f64: 1263 ; SSE3: # BB#0: 1264 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1265 ; SSE3-NEXT: retq 1266 ; 1267 ; SSSE3-LABEL: insert_dup_mem_v2f64: 1268 ; SSSE3: # BB#0: 1269 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1270 ; SSSE3-NEXT: retq 1271 ; 1272 ; SSE41-LABEL: insert_dup_mem_v2f64: 1273 ; SSE41: # BB#0: 1274 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1275 ; SSE41-NEXT: retq 1276 ; 1277 ; AVX-LABEL: insert_dup_mem_v2f64: 1278 ; AVX: # BB#0: 1279 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1280 ; AVX-NEXT: retq 1281 %a = load double, double* %ptr 1282 %v = insertelement <2 x double> undef, double %a, i32 0 1283 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1284 ret <2 x double> %shuffle 1285 } 1286 1287 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind { 1288 ; SSE2-LABEL: insert_dup_mem128_v2f64: 1289 ; SSE2: # BB#0: 1290 ; SSE2-NEXT: movaps (%rdi), %xmm0 1291 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1292 ; SSE2-NEXT: retq 1293 ; 1294 ; SSE3-LABEL: insert_dup_mem128_v2f64: 1295 ; SSE3: # BB#0: 1296 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1297 ; SSE3-NEXT: retq 1298 ; 1299 ; SSSE3-LABEL: insert_dup_mem128_v2f64: 1300 ; SSSE3: # BB#0: 1301 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1302 ; SSSE3-NEXT: retq 1303 ; 1304 ; SSE41-LABEL: insert_dup_mem128_v2f64: 1305 ; SSE41: # BB#0: 1306 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1307 ; SSE41-NEXT: retq 1308 ; 1309 ; AVX-LABEL: insert_dup_mem128_v2f64: 1310 ; AVX: # BB#0: 1311 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1312 ; AVX-NEXT: retq 1313 %v = load <2 x double>, <2 x double>* %ptr 1314 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1315 ret <2 x double> %shuffle 1316 } 1317 1318 1319 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) { 1320 ; SSE-LABEL: insert_dup_mem_v2i64: 1321 ; SSE: # BB#0: 1322 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1323 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1324 ; SSE-NEXT: retq 1325 ; 1326 ; AVX1-LABEL: insert_dup_mem_v2i64: 1327 ; AVX1: # BB#0: 1328 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 1329 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1330 ; AVX1-NEXT: retq 1331 ; 1332 ; AVX2-LABEL: insert_dup_mem_v2i64: 1333 ; AVX2: # BB#0: 1334 ; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0 1335 ; AVX2-NEXT: retq 1336 ; 1337 ; AVX512VL-LABEL: insert_dup_mem_v2i64: 1338 ; AVX512VL: # BB#0: 1339 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0 1340 ; AVX512VL-NEXT: retq 1341 %tmp = load i64, i64* %ptr, align 1 1342 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 1343 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer 1344 ret <2 x i64> %tmp2 1345 } 1346 1347 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) { 1348 ; SSE-LABEL: shuffle_mem_v2f64_10: 1349 ; SSE: # BB#0: 1350 ; SSE-NEXT: movapd (%rdi), %xmm0 1351 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 1352 ; SSE-NEXT: retq 1353 ; 1354 ; AVX-LABEL: shuffle_mem_v2f64_10: 1355 ; AVX: # BB#0: 1356 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] 1357 ; AVX-NEXT: retq 1358 1359 %a = load <2 x double>, <2 x double>* %ptr 1360 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1361 ret <2 x double> %shuffle 1362 } 1363