1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL 9 10 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 11 target triple = "x86_64-unknown-unknown" 12 13 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) { 14 ; SSE-LABEL: shuffle_v2i64_00: 15 ; SSE: # BB#0: 16 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 17 ; SSE-NEXT: retq 18 ; 19 ; AVX1-LABEL: shuffle_v2i64_00: 20 ; AVX1: # BB#0: 21 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 22 ; AVX1-NEXT: retq 23 ; 24 ; AVX2-LABEL: shuffle_v2i64_00: 25 ; AVX2: # BB#0: 26 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 27 ; AVX2-NEXT: retq 28 ; 29 ; AVX512VL-LABEL: shuffle_v2i64_00: 30 ; AVX512VL: # BB#0: 31 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0 32 ; AVX512VL-NEXT: retq 33 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0> 34 ret <2 x i64> %shuffle 35 } 36 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) { 37 ; SSE-LABEL: shuffle_v2i64_10: 38 ; SSE: # BB#0: 39 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 40 ; SSE-NEXT: retq 41 ; 42 ; AVX-LABEL: shuffle_v2i64_10: 43 ; AVX: # BB#0: 44 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 45 ; AVX-NEXT: retq 46 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0> 47 ret <2 x i64> %shuffle 48 } 49 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) { 50 ; SSE-LABEL: shuffle_v2i64_11: 51 ; SSE: # BB#0: 52 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 53 ; SSE-NEXT: retq 54 ; 55 ; AVX-LABEL: shuffle_v2i64_11: 56 ; AVX: # BB#0: 57 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 58 ; AVX-NEXT: retq 59 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1> 60 ret <2 x i64> %shuffle 61 } 62 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) { 63 ; SSE-LABEL: shuffle_v2i64_22: 64 ; SSE: # BB#0: 65 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 66 ; SSE-NEXT: retq 67 ; 68 ; AVX1-LABEL: shuffle_v2i64_22: 69 ; AVX1: # BB#0: 70 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 71 ; AVX1-NEXT: retq 72 ; 73 ; AVX2-LABEL: shuffle_v2i64_22: 74 ; AVX2: # BB#0: 75 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0 76 ; AVX2-NEXT: retq 77 ; 78 ; AVX512VL-LABEL: shuffle_v2i64_22: 79 ; AVX512VL: # BB#0: 80 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0 81 ; AVX512VL-NEXT: retq 82 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2> 83 ret <2 x i64> %shuffle 84 } 85 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) { 86 ; SSE-LABEL: shuffle_v2i64_32: 87 ; SSE: # BB#0: 88 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 89 ; SSE-NEXT: retq 90 ; 91 ; AVX-LABEL: shuffle_v2i64_32: 92 ; AVX: # BB#0: 93 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 94 ; AVX-NEXT: retq 95 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2> 96 ret <2 x i64> %shuffle 97 } 98 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) { 99 ; SSE-LABEL: shuffle_v2i64_33: 100 ; SSE: # BB#0: 101 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 102 ; SSE-NEXT: retq 103 ; 104 ; AVX-LABEL: shuffle_v2i64_33: 105 ; AVX: # BB#0: 106 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 107 ; AVX-NEXT: retq 108 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3> 109 ret <2 x i64> %shuffle 110 } 111 112 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { 113 ; SSE2-LABEL: shuffle_v2f64_00: 114 ; SSE2: # BB#0: 115 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 116 ; SSE2-NEXT: retq 117 ; 118 ; SSE3-LABEL: shuffle_v2f64_00: 119 ; SSE3: # BB#0: 120 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 121 ; SSE3-NEXT: retq 122 ; 123 ; SSSE3-LABEL: shuffle_v2f64_00: 124 ; SSSE3: # BB#0: 125 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 126 ; SSSE3-NEXT: retq 127 ; 128 ; SSE41-LABEL: shuffle_v2f64_00: 129 ; SSE41: # BB#0: 130 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 131 ; SSE41-NEXT: retq 132 ; 133 ; AVX-LABEL: shuffle_v2f64_00: 134 ; AVX: # BB#0: 135 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 136 ; AVX-NEXT: retq 137 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0> 138 ret <2 x double> %shuffle 139 } 140 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) { 141 ; SSE-LABEL: shuffle_v2f64_10: 142 ; SSE: # BB#0: 143 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 144 ; SSE-NEXT: retq 145 ; 146 ; AVX-LABEL: shuffle_v2f64_10: 147 ; AVX: # BB#0: 148 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 149 ; AVX-NEXT: retq 150 151 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0> 152 ret <2 x double> %shuffle 153 } 154 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) { 155 ; SSE-LABEL: shuffle_v2f64_11: 156 ; SSE: # BB#0: 157 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 158 ; SSE-NEXT: retq 159 ; 160 ; AVX-LABEL: shuffle_v2f64_11: 161 ; AVX: # BB#0: 162 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 163 ; AVX-NEXT: retq 164 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1> 165 ret <2 x double> %shuffle 166 } 167 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { 168 ; SSE2-LABEL: shuffle_v2f64_22: 169 ; SSE2: # BB#0: 170 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] 171 ; SSE2-NEXT: movaps %xmm1, %xmm0 172 ; SSE2-NEXT: retq 173 ; 174 ; SSE3-LABEL: shuffle_v2f64_22: 175 ; SSE3: # BB#0: 176 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 177 ; SSE3-NEXT: retq 178 ; 179 ; SSSE3-LABEL: shuffle_v2f64_22: 180 ; SSSE3: # BB#0: 181 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 182 ; SSSE3-NEXT: retq 183 ; 184 ; SSE41-LABEL: shuffle_v2f64_22: 185 ; SSE41: # BB#0: 186 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 187 ; SSE41-NEXT: retq 188 ; 189 ; AVX-LABEL: shuffle_v2f64_22: 190 ; AVX: # BB#0: 191 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 192 ; AVX-NEXT: retq 193 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2> 194 ret <2 x double> %shuffle 195 } 196 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { 197 ; SSE-LABEL: shuffle_v2f64_32: 198 ; SSE: # BB#0: 199 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 200 ; SSE-NEXT: movapd %xmm1, %xmm0 201 ; SSE-NEXT: retq 202 ; 203 ; AVX-LABEL: shuffle_v2f64_32: 204 ; AVX: # BB#0: 205 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 206 ; AVX-NEXT: retq 207 208 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2> 209 ret <2 x double> %shuffle 210 } 211 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { 212 ; SSE-LABEL: shuffle_v2f64_33: 213 ; SSE: # BB#0: 214 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 215 ; SSE-NEXT: movaps %xmm1, %xmm0 216 ; SSE-NEXT: retq 217 ; 218 ; AVX-LABEL: shuffle_v2f64_33: 219 ; AVX: # BB#0: 220 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1] 221 ; AVX-NEXT: retq 222 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3> 223 ret <2 x double> %shuffle 224 } 225 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { 226 ; SSE2-LABEL: shuffle_v2f64_03: 227 ; SSE2: # BB#0: 228 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 229 ; SSE2-NEXT: movapd %xmm1, %xmm0 230 ; SSE2-NEXT: retq 231 ; 232 ; SSE3-LABEL: shuffle_v2f64_03: 233 ; SSE3: # BB#0: 234 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 235 ; SSE3-NEXT: movapd %xmm1, %xmm0 236 ; SSE3-NEXT: retq 237 ; 238 ; SSSE3-LABEL: shuffle_v2f64_03: 239 ; SSSE3: # BB#0: 240 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 241 ; SSSE3-NEXT: movapd %xmm1, %xmm0 242 ; SSSE3-NEXT: retq 243 ; 244 ; SSE41-LABEL: shuffle_v2f64_03: 245 ; SSE41: # BB#0: 246 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 247 ; SSE41-NEXT: retq 248 ; 249 ; AVX-LABEL: shuffle_v2f64_03: 250 ; AVX: # BB#0: 251 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 252 ; AVX-NEXT: retq 253 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3> 254 ret <2 x double> %shuffle 255 } 256 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { 257 ; SSE2-LABEL: shuffle_v2f64_21: 258 ; SSE2: # BB#0: 259 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 260 ; SSE2-NEXT: retq 261 ; 262 ; SSE3-LABEL: shuffle_v2f64_21: 263 ; SSE3: # BB#0: 264 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 265 ; SSE3-NEXT: retq 266 ; 267 ; SSSE3-LABEL: shuffle_v2f64_21: 268 ; SSSE3: # BB#0: 269 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 270 ; SSSE3-NEXT: retq 271 ; 272 ; SSE41-LABEL: shuffle_v2f64_21: 273 ; SSE41: # BB#0: 274 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 275 ; SSE41-NEXT: retq 276 ; 277 ; AVX-LABEL: shuffle_v2f64_21: 278 ; AVX: # BB#0: 279 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 280 ; AVX-NEXT: retq 281 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1> 282 ret <2 x double> %shuffle 283 } 284 285 286 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { 287 ; SSE-LABEL: shuffle_v2i64_02: 288 ; SSE: # BB#0: 289 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 290 ; SSE-NEXT: retq 291 ; 292 ; AVX-LABEL: shuffle_v2i64_02: 293 ; AVX: # BB#0: 294 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 295 ; AVX-NEXT: retq 296 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 297 ret <2 x i64> %shuffle 298 } 299 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 300 ; SSE-LABEL: shuffle_v2i64_02_copy: 301 ; SSE: # BB#0: 302 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 303 ; SSE-NEXT: movdqa %xmm1, %xmm0 304 ; SSE-NEXT: retq 305 ; 306 ; AVX-LABEL: shuffle_v2i64_02_copy: 307 ; AVX: # BB#0: 308 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0] 309 ; AVX-NEXT: retq 310 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 311 ret <2 x i64> %shuffle 312 } 313 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { 314 ; SSE2-LABEL: shuffle_v2i64_03: 315 ; SSE2: # BB#0: 316 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 317 ; SSE2-NEXT: movapd %xmm1, %xmm0 318 ; SSE2-NEXT: retq 319 ; 320 ; SSE3-LABEL: shuffle_v2i64_03: 321 ; SSE3: # BB#0: 322 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 323 ; SSE3-NEXT: movapd %xmm1, %xmm0 324 ; SSE3-NEXT: retq 325 ; 326 ; SSSE3-LABEL: shuffle_v2i64_03: 327 ; SSSE3: # BB#0: 328 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 329 ; SSSE3-NEXT: movapd %xmm1, %xmm0 330 ; SSSE3-NEXT: retq 331 ; 332 ; SSE41-LABEL: shuffle_v2i64_03: 333 ; SSE41: # BB#0: 334 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 335 ; SSE41-NEXT: retq 336 ; 337 ; AVX1-LABEL: shuffle_v2i64_03: 338 ; AVX1: # BB#0: 339 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 340 ; AVX1-NEXT: retq 341 ; 342 ; AVX2-LABEL: shuffle_v2i64_03: 343 ; AVX2: # BB#0: 344 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 345 ; AVX2-NEXT: retq 346 ; 347 ; AVX512VL-LABEL: shuffle_v2i64_03: 348 ; AVX512VL: # BB#0: 349 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 350 ; AVX512VL-NEXT: retq 351 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 352 ret <2 x i64> %shuffle 353 } 354 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 355 ; SSE2-LABEL: shuffle_v2i64_03_copy: 356 ; SSE2: # BB#0: 357 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 358 ; SSE2-NEXT: movapd %xmm2, %xmm0 359 ; SSE2-NEXT: retq 360 ; 361 ; SSE3-LABEL: shuffle_v2i64_03_copy: 362 ; SSE3: # BB#0: 363 ; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 364 ; SSE3-NEXT: movapd %xmm2, %xmm0 365 ; SSE3-NEXT: retq 366 ; 367 ; SSSE3-LABEL: shuffle_v2i64_03_copy: 368 ; SSSE3: # BB#0: 369 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 370 ; SSSE3-NEXT: movapd %xmm2, %xmm0 371 ; SSSE3-NEXT: retq 372 ; 373 ; SSE41-LABEL: shuffle_v2i64_03_copy: 374 ; SSE41: # BB#0: 375 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 376 ; SSE41-NEXT: movdqa %xmm1, %xmm0 377 ; SSE41-NEXT: retq 378 ; 379 ; AVX1-LABEL: shuffle_v2i64_03_copy: 380 ; AVX1: # BB#0: 381 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7] 382 ; AVX1-NEXT: retq 383 ; 384 ; AVX2-LABEL: shuffle_v2i64_03_copy: 385 ; AVX2: # BB#0: 386 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] 387 ; AVX2-NEXT: retq 388 ; 389 ; AVX512VL-LABEL: shuffle_v2i64_03_copy: 390 ; AVX512VL: # BB#0: 391 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] 392 ; AVX512VL-NEXT: retq 393 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 394 ret <2 x i64> %shuffle 395 } 396 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) { 397 ; SSE2-LABEL: shuffle_v2i64_12: 398 ; SSE2: # BB#0: 399 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 400 ; SSE2-NEXT: retq 401 ; 402 ; SSE3-LABEL: shuffle_v2i64_12: 403 ; SSE3: # BB#0: 404 ; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 405 ; SSE3-NEXT: retq 406 ; 407 ; SSSE3-LABEL: shuffle_v2i64_12: 408 ; SSSE3: # BB#0: 409 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 410 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 411 ; SSSE3-NEXT: retq 412 ; 413 ; SSE41-LABEL: shuffle_v2i64_12: 414 ; SSE41: # BB#0: 415 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 416 ; SSE41-NEXT: movdqa %xmm1, %xmm0 417 ; SSE41-NEXT: retq 418 ; 419 ; AVX-LABEL: shuffle_v2i64_12: 420 ; AVX: # BB#0: 421 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 422 ; AVX-NEXT: retq 423 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 424 ret <2 x i64> %shuffle 425 } 426 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 427 ; SSE2-LABEL: shuffle_v2i64_12_copy: 428 ; SSE2: # BB#0: 429 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] 430 ; SSE2-NEXT: movapd %xmm1, %xmm0 431 ; SSE2-NEXT: retq 432 ; 433 ; SSE3-LABEL: shuffle_v2i64_12_copy: 434 ; SSE3: # BB#0: 435 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] 436 ; SSE3-NEXT: movapd %xmm1, %xmm0 437 ; SSE3-NEXT: retq 438 ; 439 ; SSSE3-LABEL: shuffle_v2i64_12_copy: 440 ; SSSE3: # BB#0: 441 ; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 442 ; SSSE3-NEXT: movdqa %xmm2, %xmm0 443 ; SSSE3-NEXT: retq 444 ; 445 ; SSE41-LABEL: shuffle_v2i64_12_copy: 446 ; SSE41: # BB#0: 447 ; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 448 ; SSE41-NEXT: movdqa %xmm2, %xmm0 449 ; SSE41-NEXT: retq 450 ; 451 ; AVX-LABEL: shuffle_v2i64_12_copy: 452 ; AVX: # BB#0: 453 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 454 ; AVX-NEXT: retq 455 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 456 ret <2 x i64> %shuffle 457 } 458 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) { 459 ; SSE-LABEL: shuffle_v2i64_13: 460 ; SSE: # BB#0: 461 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 462 ; SSE-NEXT: retq 463 ; 464 ; AVX-LABEL: shuffle_v2i64_13: 465 ; AVX: # BB#0: 466 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 467 ; AVX-NEXT: retq 468 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 469 ret <2 x i64> %shuffle 470 } 471 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 472 ; SSE-LABEL: shuffle_v2i64_13_copy: 473 ; SSE: # BB#0: 474 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1] 475 ; SSE-NEXT: movdqa %xmm1, %xmm0 476 ; SSE-NEXT: retq 477 ; 478 ; AVX-LABEL: shuffle_v2i64_13_copy: 479 ; AVX: # BB#0: 480 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1] 481 ; AVX-NEXT: retq 482 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 483 ret <2 x i64> %shuffle 484 } 485 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { 486 ; SSE-LABEL: shuffle_v2i64_20: 487 ; SSE: # BB#0: 488 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 489 ; SSE-NEXT: movdqa %xmm1, %xmm0 490 ; SSE-NEXT: retq 491 ; 492 ; AVX-LABEL: shuffle_v2i64_20: 493 ; AVX: # BB#0: 494 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 495 ; AVX-NEXT: retq 496 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 497 ret <2 x i64> %shuffle 498 } 499 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 500 ; SSE-LABEL: shuffle_v2i64_20_copy: 501 ; SSE: # BB#0: 502 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 503 ; SSE-NEXT: movdqa %xmm2, %xmm0 504 ; SSE-NEXT: retq 505 ; 506 ; AVX-LABEL: shuffle_v2i64_20_copy: 507 ; AVX: # BB#0: 508 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0] 509 ; AVX-NEXT: retq 510 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 511 ret <2 x i64> %shuffle 512 } 513 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { 514 ; SSE2-LABEL: shuffle_v2i64_21: 515 ; SSE2: # BB#0: 516 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 517 ; SSE2-NEXT: retq 518 ; 519 ; SSE3-LABEL: shuffle_v2i64_21: 520 ; SSE3: # BB#0: 521 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 522 ; SSE3-NEXT: retq 523 ; 524 ; SSSE3-LABEL: shuffle_v2i64_21: 525 ; SSSE3: # BB#0: 526 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 527 ; SSSE3-NEXT: retq 528 ; 529 ; SSE41-LABEL: shuffle_v2i64_21: 530 ; SSE41: # BB#0: 531 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 532 ; SSE41-NEXT: retq 533 ; 534 ; AVX1-LABEL: shuffle_v2i64_21: 535 ; AVX1: # BB#0: 536 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 537 ; AVX1-NEXT: retq 538 ; 539 ; AVX2-LABEL: shuffle_v2i64_21: 540 ; AVX2: # BB#0: 541 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 542 ; AVX2-NEXT: retq 543 ; 544 ; AVX512VL-LABEL: shuffle_v2i64_21: 545 ; AVX512VL: # BB#0: 546 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 547 ; AVX512VL-NEXT: retq 548 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 549 ret <2 x i64> %shuffle 550 } 551 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 552 ; SSE2-LABEL: shuffle_v2i64_21_copy: 553 ; SSE2: # BB#0: 554 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 555 ; SSE2-NEXT: movapd %xmm1, %xmm0 556 ; SSE2-NEXT: retq 557 ; 558 ; SSE3-LABEL: shuffle_v2i64_21_copy: 559 ; SSE3: # BB#0: 560 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 561 ; SSE3-NEXT: movapd %xmm1, %xmm0 562 ; SSE3-NEXT: retq 563 ; 564 ; SSSE3-LABEL: shuffle_v2i64_21_copy: 565 ; SSSE3: # BB#0: 566 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 567 ; SSSE3-NEXT: movapd %xmm1, %xmm0 568 ; SSSE3-NEXT: retq 569 ; 570 ; SSE41-LABEL: shuffle_v2i64_21_copy: 571 ; SSE41: # BB#0: 572 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 573 ; SSE41-NEXT: movdqa %xmm1, %xmm0 574 ; SSE41-NEXT: retq 575 ; 576 ; AVX1-LABEL: shuffle_v2i64_21_copy: 577 ; AVX1: # BB#0: 578 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7] 579 ; AVX1-NEXT: retq 580 ; 581 ; AVX2-LABEL: shuffle_v2i64_21_copy: 582 ; AVX2: # BB#0: 583 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] 584 ; AVX2-NEXT: retq 585 ; 586 ; AVX512VL-LABEL: shuffle_v2i64_21_copy: 587 ; AVX512VL: # BB#0: 588 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] 589 ; AVX512VL-NEXT: retq 590 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 591 ret <2 x i64> %shuffle 592 } 593 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) { 594 ; SSE2-LABEL: shuffle_v2i64_30: 595 ; SSE2: # BB#0: 596 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 597 ; SSE2-NEXT: movapd %xmm1, %xmm0 598 ; SSE2-NEXT: retq 599 ; 600 ; SSE3-LABEL: shuffle_v2i64_30: 601 ; SSE3: # BB#0: 602 ; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 603 ; SSE3-NEXT: movapd %xmm1, %xmm0 604 ; SSE3-NEXT: retq 605 ; 606 ; SSSE3-LABEL: shuffle_v2i64_30: 607 ; SSSE3: # BB#0: 608 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 609 ; SSSE3-NEXT: retq 610 ; 611 ; SSE41-LABEL: shuffle_v2i64_30: 612 ; SSE41: # BB#0: 613 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 614 ; SSE41-NEXT: retq 615 ; 616 ; AVX-LABEL: shuffle_v2i64_30: 617 ; AVX: # BB#0: 618 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 619 ; AVX-NEXT: retq 620 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 621 ret <2 x i64> %shuffle 622 } 623 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 624 ; SSE2-LABEL: shuffle_v2i64_30_copy: 625 ; SSE2: # BB#0: 626 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] 627 ; SSE2-NEXT: movapd %xmm2, %xmm0 628 ; SSE2-NEXT: retq 629 ; 630 ; SSE3-LABEL: shuffle_v2i64_30_copy: 631 ; SSE3: # BB#0: 632 ; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] 633 ; SSE3-NEXT: movapd %xmm2, %xmm0 634 ; SSE3-NEXT: retq 635 ; 636 ; SSSE3-LABEL: shuffle_v2i64_30_copy: 637 ; SSSE3: # BB#0: 638 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 639 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 640 ; SSSE3-NEXT: retq 641 ; 642 ; SSE41-LABEL: shuffle_v2i64_30_copy: 643 ; SSE41: # BB#0: 644 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 645 ; SSE41-NEXT: movdqa %xmm1, %xmm0 646 ; SSE41-NEXT: retq 647 ; 648 ; AVX-LABEL: shuffle_v2i64_30_copy: 649 ; AVX: # BB#0: 650 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 651 ; AVX-NEXT: retq 652 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 653 ret <2 x i64> %shuffle 654 } 655 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) { 656 ; SSE-LABEL: shuffle_v2i64_31: 657 ; SSE: # BB#0: 658 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 659 ; SSE-NEXT: movdqa %xmm1, %xmm0 660 ; SSE-NEXT: retq 661 ; 662 ; AVX-LABEL: shuffle_v2i64_31: 663 ; AVX: # BB#0: 664 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 665 ; AVX-NEXT: retq 666 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 667 ret <2 x i64> %shuffle 668 } 669 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 670 ; SSE-LABEL: shuffle_v2i64_31_copy: 671 ; SSE: # BB#0: 672 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 673 ; SSE-NEXT: movdqa %xmm2, %xmm0 674 ; SSE-NEXT: retq 675 ; 676 ; AVX-LABEL: shuffle_v2i64_31_copy: 677 ; AVX: # BB#0: 678 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1] 679 ; AVX-NEXT: retq 680 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 681 ret <2 x i64> %shuffle 682 } 683 684 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) { 685 ; SSE-LABEL: shuffle_v2i64_0z: 686 ; SSE: # BB#0: 687 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 688 ; SSE-NEXT: retq 689 ; 690 ; AVX-LABEL: shuffle_v2i64_0z: 691 ; AVX: # BB#0: 692 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 693 ; AVX-NEXT: retq 694 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 695 ret <2 x i64> %shuffle 696 } 697 698 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) { 699 ; SSE-LABEL: shuffle_v2i64_1z: 700 ; SSE: # BB#0: 701 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 702 ; SSE-NEXT: retq 703 ; 704 ; AVX-LABEL: shuffle_v2i64_1z: 705 ; AVX: # BB#0: 706 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 707 ; AVX-NEXT: retq 708 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3> 709 ret <2 x i64> %shuffle 710 } 711 712 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) { 713 ; SSE-LABEL: shuffle_v2i64_z0: 714 ; SSE: # BB#0: 715 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 716 ; SSE-NEXT: retq 717 ; 718 ; AVX-LABEL: shuffle_v2i64_z0: 719 ; AVX: # BB#0: 720 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 721 ; AVX-NEXT: retq 722 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0> 723 ret <2 x i64> %shuffle 724 } 725 726 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) { 727 ; SSE2-LABEL: shuffle_v2i64_z1: 728 ; SSE2: # BB#0: 729 ; SSE2-NEXT: xorpd %xmm1, %xmm1 730 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 731 ; SSE2-NEXT: retq 732 ; 733 ; SSE3-LABEL: shuffle_v2i64_z1: 734 ; SSE3: # BB#0: 735 ; SSE3-NEXT: xorpd %xmm1, %xmm1 736 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 737 ; SSE3-NEXT: retq 738 ; 739 ; SSSE3-LABEL: shuffle_v2i64_z1: 740 ; SSSE3: # BB#0: 741 ; SSSE3-NEXT: xorpd %xmm1, %xmm1 742 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 743 ; SSSE3-NEXT: retq 744 ; 745 ; SSE41-LABEL: shuffle_v2i64_z1: 746 ; SSE41: # BB#0: 747 ; SSE41-NEXT: pxor %xmm1, %xmm1 748 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 749 ; SSE41-NEXT: retq 750 ; 751 ; AVX1-LABEL: shuffle_v2i64_z1: 752 ; AVX1: # BB#0: 753 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 754 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 755 ; AVX1-NEXT: retq 756 ; 757 ; AVX2-LABEL: shuffle_v2i64_z1: 758 ; AVX2: # BB#0: 759 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 760 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 761 ; AVX2-NEXT: retq 762 ; 763 ; AVX512VL-LABEL: shuffle_v2i64_z1: 764 ; AVX512VL: # BB#0: 765 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 766 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 767 ; AVX512VL-NEXT: retq 768 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1> 769 ret <2 x i64> %shuffle 770 } 771 772 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) { 773 ; SSE-LABEL: shuffle_v2f64_0z: 774 ; SSE: # BB#0: 775 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 776 ; SSE-NEXT: retq 777 ; 778 ; AVX-LABEL: shuffle_v2f64_0z: 779 ; AVX: # BB#0: 780 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 781 ; AVX-NEXT: retq 782 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 783 ret <2 x double> %shuffle 784 } 785 786 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) { 787 ; SSE-LABEL: shuffle_v2f64_1z: 788 ; SSE: # BB#0: 789 ; SSE-NEXT: xorpd %xmm1, %xmm1 790 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 791 ; SSE-NEXT: retq 792 ; 793 ; AVX1-LABEL: shuffle_v2f64_1z: 794 ; AVX1: # BB#0: 795 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 796 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 797 ; AVX1-NEXT: retq 798 ; 799 ; AVX2-LABEL: shuffle_v2f64_1z: 800 ; AVX2: # BB#0: 801 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 802 ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 803 ; AVX2-NEXT: retq 804 ; 805 ; AVX512VL-LABEL: shuffle_v2f64_1z: 806 ; AVX512VL: # BB#0: 807 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 808 ; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 809 ; AVX512VL-NEXT: retq 810 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3> 811 ret <2 x double> %shuffle 812 } 813 814 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { 815 ; SSE-LABEL: shuffle_v2f64_z0: 816 ; SSE: # BB#0: 817 ; SSE-NEXT: xorpd %xmm1, %xmm1 818 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 819 ; SSE-NEXT: movapd %xmm1, %xmm0 820 ; SSE-NEXT: retq 821 ; 822 ; AVX1-LABEL: shuffle_v2f64_z0: 823 ; AVX1: # BB#0: 824 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 825 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 826 ; AVX1-NEXT: retq 827 ; 828 ; AVX2-LABEL: shuffle_v2f64_z0: 829 ; AVX2: # BB#0: 830 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 831 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 832 ; AVX2-NEXT: retq 833 ; 834 ; AVX512VL-LABEL: shuffle_v2f64_z0: 835 ; AVX512VL: # BB#0: 836 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 837 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 838 ; AVX512VL-NEXT: retq 839 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0> 840 ret <2 x double> %shuffle 841 } 842 843 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { 844 ; SSE2-LABEL: shuffle_v2f64_z1: 845 ; SSE2: # BB#0: 846 ; SSE2-NEXT: xorpd %xmm1, %xmm1 847 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 848 ; SSE2-NEXT: retq 849 ; 850 ; SSE3-LABEL: shuffle_v2f64_z1: 851 ; SSE3: # BB#0: 852 ; SSE3-NEXT: xorpd %xmm1, %xmm1 853 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 854 ; SSE3-NEXT: retq 855 ; 856 ; SSSE3-LABEL: shuffle_v2f64_z1: 857 ; SSSE3: # BB#0: 858 ; SSSE3-NEXT: xorpd %xmm1, %xmm1 859 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 860 ; SSSE3-NEXT: retq 861 ; 862 ; SSE41-LABEL: shuffle_v2f64_z1: 863 ; SSE41: # BB#0: 864 ; SSE41-NEXT: xorpd %xmm1, %xmm1 865 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 866 ; SSE41-NEXT: retq 867 ; 868 ; AVX-LABEL: shuffle_v2f64_z1: 869 ; AVX: # BB#0: 870 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 871 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 872 ; AVX-NEXT: retq 873 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 874 ret <2 x double> %shuffle 875 } 876 877 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { 878 ; SSE-LABEL: shuffle_v2f64_bitcast_1z: 879 ; SSE: # BB#0: 880 ; SSE-NEXT: xorpd %xmm1, %xmm1 881 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 882 ; SSE-NEXT: retq 883 ; 884 ; AVX1-LABEL: shuffle_v2f64_bitcast_1z: 885 ; AVX1: # BB#0: 886 ; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 887 ; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 888 ; AVX1-NEXT: retq 889 ; 890 ; AVX2-LABEL: shuffle_v2f64_bitcast_1z: 891 ; AVX2: # BB#0: 892 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 893 ; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 894 ; AVX2-NEXT: retq 895 ; 896 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z: 897 ; AVX512VL: # BB#0: 898 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 899 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 900 ; AVX512VL-NEXT: retq 901 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 902 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> 903 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 904 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double> 905 ret <2 x double> %bitcast64 906 } 907 908 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) { 909 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123: 910 ; SSE2: # BB#0: 911 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 912 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 913 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 914 ; SSE2-NEXT: retq 915 ; 916 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123: 917 ; SSE3: # BB#0: 918 ; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 919 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 920 ; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 921 ; SSE3-NEXT: retq 922 ; 923 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123: 924 ; SSSE3: # BB#0: 925 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 926 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 927 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 928 ; SSSE3-NEXT: retq 929 ; 930 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123: 931 ; SSE41: # BB#0: 932 ; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 933 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 934 ; SSE41-NEXT: xorps %xmm1, %xmm1 935 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 936 ; SSE41-NEXT: retq 937 ; 938 ; AVX1-LABEL: shuffle_v2i64_bitcast_z123: 939 ; AVX1: # BB#0: 940 ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 941 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 942 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 943 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 944 ; AVX1-NEXT: retq 945 ; 946 ; AVX2-LABEL: shuffle_v2i64_bitcast_z123: 947 ; AVX2: # BB#0: 948 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 949 ; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 950 ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 951 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 952 ; AVX2-NEXT: retq 953 ; 954 ; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123: 955 ; AVX512VL: # BB#0: 956 ; AVX512VL-NEXT: vmovss {{.*}}(%rip), %xmm1 957 ; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 958 ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 959 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 960 ; AVX512VL-NEXT: retq 961 %bitcast32 = bitcast <2 x i64> %x to <4 x float> 962 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 963 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64> 964 %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1> 965 ret <2 x i64> %and 966 } 967 968 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) { 969 ; SSE-LABEL: insert_reg_and_zero_v2i64: 970 ; SSE: # BB#0: 971 ; SSE-NEXT: movd %rdi, %xmm0 972 ; SSE-NEXT: retq 973 ; 974 ; AVX-LABEL: insert_reg_and_zero_v2i64: 975 ; AVX: # BB#0: 976 ; AVX-NEXT: vmovq %rdi, %xmm0 977 ; AVX-NEXT: retq 978 %v = insertelement <2 x i64> undef, i64 %a, i32 0 979 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 980 ret <2 x i64> %shuffle 981 } 982 983 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) { 984 ; SSE-LABEL: insert_mem_and_zero_v2i64: 985 ; SSE: # BB#0: 986 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 987 ; SSE-NEXT: retq 988 ; 989 ; AVX1-LABEL: insert_mem_and_zero_v2i64: 990 ; AVX1: # BB#0: 991 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 992 ; AVX1-NEXT: retq 993 ; 994 ; AVX2-LABEL: insert_mem_and_zero_v2i64: 995 ; AVX2: # BB#0: 996 ; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 997 ; AVX2-NEXT: retq 998 ; 999 ; AVX512VL-LABEL: insert_mem_and_zero_v2i64: 1000 ; AVX512VL: # BB#0: 1001 ; AVX512VL-NEXT: vmovq (%rdi), %xmm0 1002 ; AVX512VL-NEXT: retq 1003 %a = load i64, i64* %ptr 1004 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1005 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 1006 ret <2 x i64> %shuffle 1007 } 1008 1009 define <2 x double> @insert_reg_and_zero_v2f64(double %a) { 1010 ; SSE-LABEL: insert_reg_and_zero_v2f64: 1011 ; SSE: # BB#0: 1012 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1013 ; SSE-NEXT: retq 1014 ; 1015 ; AVX-LABEL: insert_reg_and_zero_v2f64: 1016 ; AVX: # BB#0: 1017 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1018 ; AVX-NEXT: retq 1019 %v = insertelement <2 x double> undef, double %a, i32 0 1020 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 1021 ret <2 x double> %shuffle 1022 } 1023 1024 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) { 1025 ; SSE-LABEL: insert_mem_and_zero_v2f64: 1026 ; SSE: # BB#0: 1027 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1028 ; SSE-NEXT: retq 1029 ; 1030 ; AVX1-LABEL: insert_mem_and_zero_v2f64: 1031 ; AVX1: # BB#0: 1032 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1033 ; AVX1-NEXT: retq 1034 ; 1035 ; AVX2-LABEL: insert_mem_and_zero_v2f64: 1036 ; AVX2: # BB#0: 1037 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1038 ; AVX2-NEXT: retq 1039 ; 1040 ; AVX512VL-LABEL: insert_mem_and_zero_v2f64: 1041 ; AVX512VL: # BB#0: 1042 ; AVX512VL-NEXT: vmovsd (%rdi), %xmm0 1043 ; AVX512VL-NEXT: retq 1044 %a = load double, double* %ptr 1045 %v = insertelement <2 x double> undef, double %a, i32 0 1046 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 1047 ret <2 x double> %shuffle 1048 } 1049 1050 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) { 1051 ; SSE2-LABEL: insert_reg_lo_v2i64: 1052 ; SSE2: # BB#0: 1053 ; SSE2-NEXT: movd %rdi, %xmm1 1054 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1055 ; SSE2-NEXT: retq 1056 ; 1057 ; SSE3-LABEL: insert_reg_lo_v2i64: 1058 ; SSE3: # BB#0: 1059 ; SSE3-NEXT: movd %rdi, %xmm1 1060 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1061 ; SSE3-NEXT: retq 1062 ; 1063 ; SSSE3-LABEL: insert_reg_lo_v2i64: 1064 ; SSSE3: # BB#0: 1065 ; SSSE3-NEXT: movd %rdi, %xmm1 1066 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1067 ; SSSE3-NEXT: retq 1068 ; 1069 ; SSE41-LABEL: insert_reg_lo_v2i64: 1070 ; SSE41: # BB#0: 1071 ; SSE41-NEXT: movd %rdi, %xmm1 1072 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1073 ; SSE41-NEXT: retq 1074 ; 1075 ; AVX1-LABEL: insert_reg_lo_v2i64: 1076 ; AVX1: # BB#0: 1077 ; AVX1-NEXT: vmovq %rdi, %xmm1 1078 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1079 ; AVX1-NEXT: retq 1080 ; 1081 ; AVX2-LABEL: insert_reg_lo_v2i64: 1082 ; AVX2: # BB#0: 1083 ; AVX2-NEXT: vmovq %rdi, %xmm1 1084 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1085 ; AVX2-NEXT: retq 1086 ; 1087 ; AVX512VL-LABEL: insert_reg_lo_v2i64: 1088 ; AVX512VL: # BB#0: 1089 ; AVX512VL-NEXT: vmovq %rdi, %xmm1 1090 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1091 ; AVX512VL-NEXT: retq 1092 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1093 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 1094 ret <2 x i64> %shuffle 1095 } 1096 1097 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) { 1098 ; SSE2-LABEL: insert_mem_lo_v2i64: 1099 ; SSE2: # BB#0: 1100 ; SSE2-NEXT: movlpd (%rdi), %xmm0 1101 ; SSE2-NEXT: retq 1102 ; 1103 ; SSE3-LABEL: insert_mem_lo_v2i64: 1104 ; SSE3: # BB#0: 1105 ; SSE3-NEXT: movlpd (%rdi), %xmm0 1106 ; SSE3-NEXT: retq 1107 ; 1108 ; SSSE3-LABEL: insert_mem_lo_v2i64: 1109 ; SSSE3: # BB#0: 1110 ; SSSE3-NEXT: movlpd (%rdi), %xmm0 1111 ; SSSE3-NEXT: retq 1112 ; 1113 ; SSE41-LABEL: insert_mem_lo_v2i64: 1114 ; SSE41: # BB#0: 1115 ; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1116 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1117 ; SSE41-NEXT: retq 1118 ; 1119 ; AVX1-LABEL: insert_mem_lo_v2i64: 1120 ; AVX1: # BB#0: 1121 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1122 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 1123 ; AVX1-NEXT: retq 1124 ; 1125 ; AVX2-LABEL: insert_mem_lo_v2i64: 1126 ; AVX2: # BB#0: 1127 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1128 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1129 ; AVX2-NEXT: retq 1130 ; 1131 ; AVX512VL-LABEL: insert_mem_lo_v2i64: 1132 ; AVX512VL: # BB#0: 1133 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1 1134 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 1135 ; AVX512VL-NEXT: retq 1136 %a = load i64, i64* %ptr 1137 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1138 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 1139 ret <2 x i64> %shuffle 1140 } 1141 1142 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) { 1143 ; SSE-LABEL: insert_reg_hi_v2i64: 1144 ; SSE: # BB#0: 1145 ; SSE-NEXT: movd %rdi, %xmm1 1146 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1147 ; SSE-NEXT: retq 1148 ; 1149 ; AVX-LABEL: insert_reg_hi_v2i64: 1150 ; AVX: # BB#0: 1151 ; AVX-NEXT: vmovq %rdi, %xmm1 1152 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1153 ; AVX-NEXT: retq 1154 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1155 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1156 ret <2 x i64> %shuffle 1157 } 1158 1159 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { 1160 ; SSE-LABEL: insert_mem_hi_v2i64: 1161 ; SSE: # BB#0: 1162 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1163 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1164 ; SSE-NEXT: retq 1165 ; 1166 ; AVX1-LABEL: insert_mem_hi_v2i64: 1167 ; AVX1: # BB#0: 1168 ; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1169 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1170 ; AVX1-NEXT: retq 1171 ; 1172 ; AVX2-LABEL: insert_mem_hi_v2i64: 1173 ; AVX2: # BB#0: 1174 ; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1175 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1176 ; AVX2-NEXT: retq 1177 ; 1178 ; AVX512VL-LABEL: insert_mem_hi_v2i64: 1179 ; AVX512VL: # BB#0: 1180 ; AVX512VL-NEXT: vmovq (%rdi), %xmm1 1181 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1182 ; AVX512VL-NEXT: retq 1183 %a = load i64, i64* %ptr 1184 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1185 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1186 ret <2 x i64> %shuffle 1187 } 1188 1189 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) { 1190 ; SSE-LABEL: insert_reg_lo_v2f64: 1191 ; SSE: # BB#0: 1192 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1193 ; SSE-NEXT: movapd %xmm1, %xmm0 1194 ; SSE-NEXT: retq 1195 ; 1196 ; AVX1-LABEL: insert_reg_lo_v2f64: 1197 ; AVX1: # BB#0: 1198 ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1199 ; AVX1-NEXT: retq 1200 ; 1201 ; AVX2-LABEL: insert_reg_lo_v2f64: 1202 ; AVX2: # BB#0: 1203 ; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1204 ; AVX2-NEXT: retq 1205 ; 1206 ; AVX512VL-LABEL: insert_reg_lo_v2f64: 1207 ; AVX512VL: # BB#0: 1208 ; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0 1209 ; AVX512VL-NEXT: retq 1210 %v = insertelement <2 x double> undef, double %a, i32 0 1211 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1212 ret <2 x double> %shuffle 1213 } 1214 1215 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { 1216 ; SSE-LABEL: insert_mem_lo_v2f64: 1217 ; SSE: # BB#0: 1218 ; SSE-NEXT: movlpd (%rdi), %xmm0 1219 ; SSE-NEXT: retq 1220 ; 1221 ; AVX-LABEL: insert_mem_lo_v2f64: 1222 ; AVX: # BB#0: 1223 ; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 1224 ; AVX-NEXT: retq 1225 %a = load double, double* %ptr 1226 %v = insertelement <2 x double> undef, double %a, i32 0 1227 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1228 ret <2 x double> %shuffle 1229 } 1230 1231 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) { 1232 ; SSE-LABEL: insert_reg_hi_v2f64: 1233 ; SSE: # BB#0: 1234 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1235 ; SSE-NEXT: movapd %xmm1, %xmm0 1236 ; SSE-NEXT: retq 1237 ; 1238 ; AVX-LABEL: insert_reg_hi_v2f64: 1239 ; AVX: # BB#0: 1240 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1241 ; AVX-NEXT: retq 1242 %v = insertelement <2 x double> undef, double %a, i32 0 1243 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1244 ret <2 x double> %shuffle 1245 } 1246 1247 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { 1248 ; SSE-LABEL: insert_mem_hi_v2f64: 1249 ; SSE: # BB#0: 1250 ; SSE-NEXT: movhpd (%rdi), %xmm0 1251 ; SSE-NEXT: retq 1252 ; 1253 ; AVX-LABEL: insert_mem_hi_v2f64: 1254 ; AVX: # BB#0: 1255 ; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 1256 ; AVX-NEXT: retq 1257 %a = load double, double* %ptr 1258 %v = insertelement <2 x double> undef, double %a, i32 0 1259 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1260 ret <2 x double> %shuffle 1261 } 1262 1263 define <2 x double> @insert_dup_reg_v2f64(double %a) { 1264 ; SSE2-LABEL: insert_dup_reg_v2f64: 1265 ; SSE2: # BB#0: 1266 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1267 ; SSE2-NEXT: retq 1268 ; 1269 ; SSE3-LABEL: insert_dup_reg_v2f64: 1270 ; SSE3: # BB#0: 1271 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1272 ; SSE3-NEXT: retq 1273 ; 1274 ; SSSE3-LABEL: insert_dup_reg_v2f64: 1275 ; SSSE3: # BB#0: 1276 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1277 ; SSSE3-NEXT: retq 1278 ; 1279 ; SSE41-LABEL: insert_dup_reg_v2f64: 1280 ; SSE41: # BB#0: 1281 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1282 ; SSE41-NEXT: retq 1283 ; 1284 ; AVX-LABEL: insert_dup_reg_v2f64: 1285 ; AVX: # BB#0: 1286 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1287 ; AVX-NEXT: retq 1288 %v = insertelement <2 x double> undef, double %a, i32 0 1289 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1290 ret <2 x double> %shuffle 1291 } 1292 1293 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) { 1294 ; SSE2-LABEL: insert_dup_mem_v2f64: 1295 ; SSE2: # BB#0: 1296 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1297 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1298 ; SSE2-NEXT: retq 1299 ; 1300 ; SSE3-LABEL: insert_dup_mem_v2f64: 1301 ; SSE3: # BB#0: 1302 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1303 ; SSE3-NEXT: retq 1304 ; 1305 ; SSSE3-LABEL: insert_dup_mem_v2f64: 1306 ; SSSE3: # BB#0: 1307 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1308 ; SSSE3-NEXT: retq 1309 ; 1310 ; SSE41-LABEL: insert_dup_mem_v2f64: 1311 ; SSE41: # BB#0: 1312 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1313 ; SSE41-NEXT: retq 1314 ; 1315 ; AVX-LABEL: insert_dup_mem_v2f64: 1316 ; AVX: # BB#0: 1317 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1318 ; AVX-NEXT: retq 1319 %a = load double, double* %ptr 1320 %v = insertelement <2 x double> undef, double %a, i32 0 1321 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1322 ret <2 x double> %shuffle 1323 } 1324 1325 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind { 1326 ; SSE2-LABEL: insert_dup_mem128_v2f64: 1327 ; SSE2: # BB#0: 1328 ; SSE2-NEXT: movaps (%rdi), %xmm0 1329 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1330 ; SSE2-NEXT: retq 1331 ; 1332 ; SSE3-LABEL: insert_dup_mem128_v2f64: 1333 ; SSE3: # BB#0: 1334 ; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1335 ; SSE3-NEXT: retq 1336 ; 1337 ; SSSE3-LABEL: insert_dup_mem128_v2f64: 1338 ; SSSE3: # BB#0: 1339 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1340 ; SSSE3-NEXT: retq 1341 ; 1342 ; SSE41-LABEL: insert_dup_mem128_v2f64: 1343 ; SSE41: # BB#0: 1344 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1345 ; SSE41-NEXT: retq 1346 ; 1347 ; AVX-LABEL: insert_dup_mem128_v2f64: 1348 ; AVX: # BB#0: 1349 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1350 ; AVX-NEXT: retq 1351 %v = load <2 x double>, <2 x double>* %ptr 1352 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1353 ret <2 x double> %shuffle 1354 } 1355 1356 1357 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) { 1358 ; SSE-LABEL: insert_dup_mem_v2i64: 1359 ; SSE: # BB#0: 1360 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1361 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1362 ; SSE-NEXT: retq 1363 ; 1364 ; AVX1-LABEL: insert_dup_mem_v2i64: 1365 ; AVX1: # BB#0: 1366 ; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 1367 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1368 ; AVX1-NEXT: retq 1369 ; 1370 ; AVX2-LABEL: insert_dup_mem_v2i64: 1371 ; AVX2: # BB#0: 1372 ; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0 1373 ; AVX2-NEXT: retq 1374 ; 1375 ; AVX512VL-LABEL: insert_dup_mem_v2i64: 1376 ; AVX512VL: # BB#0: 1377 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0 1378 ; AVX512VL-NEXT: retq 1379 %tmp = load i64, i64* %ptr, align 1 1380 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 1381 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer 1382 ret <2 x i64> %tmp2 1383 } 1384 1385 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) { 1386 ; SSE-LABEL: shuffle_mem_v2f64_10: 1387 ; SSE: # BB#0: 1388 ; SSE-NEXT: movapd (%rdi), %xmm0 1389 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 1390 ; SSE-NEXT: retq 1391 ; 1392 ; AVX-LABEL: shuffle_mem_v2f64_10: 1393 ; AVX: # BB#0: 1394 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] 1395 ; AVX-NEXT: retq 1396 1397 %a = load <2 x double>, <2 x double>* %ptr 1398 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1399 ret <2 x double> %shuffle 1400 } 1401