Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
      9 
     10 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
     11 target triple = "x86_64-unknown-unknown"
     12 
     13 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
     14 ; SSE-LABEL: shuffle_v2i64_00:
     15 ; SSE:       # BB#0:
     16 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
     17 ; SSE-NEXT:    retq
     18 ;
     19 ; AVX1-LABEL: shuffle_v2i64_00:
     20 ; AVX1:       # BB#0:
     21 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
     22 ; AVX1-NEXT:    retq
     23 ;
     24 ; AVX2-LABEL: shuffle_v2i64_00:
     25 ; AVX2:       # BB#0:
     26 ; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
     27 ; AVX2-NEXT:    retq
     28 ;
     29 ; AVX512VL-LABEL: shuffle_v2i64_00:
     30 ; AVX512VL:       # BB#0:
     31 ; AVX512VL-NEXT:    vpbroadcastq %xmm0, %xmm0
     32 ; AVX512VL-NEXT:    retq
     33   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
     34   ret <2 x i64> %shuffle
     35 }
     36 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
     37 ; SSE-LABEL: shuffle_v2i64_10:
     38 ; SSE:       # BB#0:
     39 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     40 ; SSE-NEXT:    retq
     41 ;
     42 ; AVX-LABEL: shuffle_v2i64_10:
     43 ; AVX:       # BB#0:
     44 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     45 ; AVX-NEXT:    retq
     46   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
     47   ret <2 x i64> %shuffle
     48 }
     49 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
     50 ; SSE-LABEL: shuffle_v2i64_11:
     51 ; SSE:       # BB#0:
     52 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
     53 ; SSE-NEXT:    retq
     54 ;
     55 ; AVX-LABEL: shuffle_v2i64_11:
     56 ; AVX:       # BB#0:
     57 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
     58 ; AVX-NEXT:    retq
     59   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
     60   ret <2 x i64> %shuffle
     61 }
     62 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
     63 ; SSE-LABEL: shuffle_v2i64_22:
     64 ; SSE:       # BB#0:
     65 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
     66 ; SSE-NEXT:    retq
     67 ;
     68 ; AVX1-LABEL: shuffle_v2i64_22:
     69 ; AVX1:       # BB#0:
     70 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
     71 ; AVX1-NEXT:    retq
     72 ;
     73 ; AVX2-LABEL: shuffle_v2i64_22:
     74 ; AVX2:       # BB#0:
     75 ; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm0
     76 ; AVX2-NEXT:    retq
     77 ;
     78 ; AVX512VL-LABEL: shuffle_v2i64_22:
     79 ; AVX512VL:       # BB#0:
     80 ; AVX512VL-NEXT:    vpbroadcastq %xmm1, %xmm0
     81 ; AVX512VL-NEXT:    retq
     82   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
     83   ret <2 x i64> %shuffle
     84 }
     85 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
     86 ; SSE-LABEL: shuffle_v2i64_32:
     87 ; SSE:       # BB#0:
     88 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
     89 ; SSE-NEXT:    retq
     90 ;
     91 ; AVX-LABEL: shuffle_v2i64_32:
     92 ; AVX:       # BB#0:
     93 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
     94 ; AVX-NEXT:    retq
     95   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
     96   ret <2 x i64> %shuffle
     97 }
     98 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
     99 ; SSE-LABEL: shuffle_v2i64_33:
    100 ; SSE:       # BB#0:
    101 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
    102 ; SSE-NEXT:    retq
    103 ;
    104 ; AVX-LABEL: shuffle_v2i64_33:
    105 ; AVX:       # BB#0:
    106 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
    107 ; AVX-NEXT:    retq
    108   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
    109   ret <2 x i64> %shuffle
    110 }
    111 
    112 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
    113 ; SSE2-LABEL: shuffle_v2f64_00:
    114 ; SSE2:       # BB#0:
    115 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
    116 ; SSE2-NEXT:    retq
    117 ;
    118 ; SSE3-LABEL: shuffle_v2f64_00:
    119 ; SSE3:       # BB#0:
    120 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
    121 ; SSE3-NEXT:    retq
    122 ;
    123 ; SSSE3-LABEL: shuffle_v2f64_00:
    124 ; SSSE3:       # BB#0:
    125 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
    126 ; SSSE3-NEXT:    retq
    127 ;
    128 ; SSE41-LABEL: shuffle_v2f64_00:
    129 ; SSE41:       # BB#0:
    130 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
    131 ; SSE41-NEXT:    retq
    132 ;
    133 ; AVX-LABEL: shuffle_v2f64_00:
    134 ; AVX:       # BB#0:
    135 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    136 ; AVX-NEXT:    retq
    137   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
    138   ret <2 x double> %shuffle
    139 }
    140 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
    141 ; SSE-LABEL: shuffle_v2f64_10:
    142 ; SSE:       # BB#0:
    143 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    144 ; SSE-NEXT:    retq
    145 ;
    146 ; AVX-LABEL: shuffle_v2f64_10:
    147 ; AVX:       # BB#0:
    148 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    149 ; AVX-NEXT:    retq
    150 
    151   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
    152   ret <2 x double> %shuffle
    153 }
    154 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
    155 ; SSE-LABEL: shuffle_v2f64_11:
    156 ; SSE:       # BB#0:
    157 ; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
    158 ; SSE-NEXT:    retq
    159 ;
    160 ; AVX-LABEL: shuffle_v2f64_11:
    161 ; AVX:       # BB#0:
    162 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
    163 ; AVX-NEXT:    retq
    164   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
    165   ret <2 x double> %shuffle
    166 }
    167 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
    168 ; SSE2-LABEL: shuffle_v2f64_22:
    169 ; SSE2:       # BB#0:
    170 ; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0,0]
    171 ; SSE2-NEXT:    movaps %xmm1, %xmm0
    172 ; SSE2-NEXT:    retq
    173 ;
    174 ; SSE3-LABEL: shuffle_v2f64_22:
    175 ; SSE3:       # BB#0:
    176 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
    177 ; SSE3-NEXT:    retq
    178 ;
    179 ; SSSE3-LABEL: shuffle_v2f64_22:
    180 ; SSSE3:       # BB#0:
    181 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
    182 ; SSSE3-NEXT:    retq
    183 ;
    184 ; SSE41-LABEL: shuffle_v2f64_22:
    185 ; SSE41:       # BB#0:
    186 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
    187 ; SSE41-NEXT:    retq
    188 ;
    189 ; AVX-LABEL: shuffle_v2f64_22:
    190 ; AVX:       # BB#0:
    191 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm1[0,0]
    192 ; AVX-NEXT:    retq
    193   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
    194   ret <2 x double> %shuffle
    195 }
    196 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
    197 ; SSE-LABEL: shuffle_v2f64_32:
    198 ; SSE:       # BB#0:
    199 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    200 ; SSE-NEXT:    movapd %xmm1, %xmm0
    201 ; SSE-NEXT:    retq
    202 ;
    203 ; AVX-LABEL: shuffle_v2f64_32:
    204 ; AVX:       # BB#0:
    205 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
    206 ; AVX-NEXT:    retq
    207 
    208   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
    209   ret <2 x double> %shuffle
    210 }
    211 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
    212 ; SSE-LABEL: shuffle_v2f64_33:
    213 ; SSE:       # BB#0:
    214 ; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
    215 ; SSE-NEXT:    movaps %xmm1, %xmm0
    216 ; SSE-NEXT:    retq
    217 ;
    218 ; AVX-LABEL: shuffle_v2f64_33:
    219 ; AVX:       # BB#0:
    220 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,1]
    221 ; AVX-NEXT:    retq
    222   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
    223   ret <2 x double> %shuffle
    224 }
    225 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
    226 ; SSE2-LABEL: shuffle_v2f64_03:
    227 ; SSE2:       # BB#0:
    228 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    229 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    230 ; SSE2-NEXT:    retq
    231 ;
    232 ; SSE3-LABEL: shuffle_v2f64_03:
    233 ; SSE3:       # BB#0:
    234 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    235 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    236 ; SSE3-NEXT:    retq
    237 ;
    238 ; SSSE3-LABEL: shuffle_v2f64_03:
    239 ; SSSE3:       # BB#0:
    240 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    241 ; SSSE3-NEXT:    movapd %xmm1, %xmm0
    242 ; SSSE3-NEXT:    retq
    243 ;
    244 ; SSE41-LABEL: shuffle_v2f64_03:
    245 ; SSE41:       # BB#0:
    246 ; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
    247 ; SSE41-NEXT:    retq
    248 ;
    249 ; AVX-LABEL: shuffle_v2f64_03:
    250 ; AVX:       # BB#0:
    251 ; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
    252 ; AVX-NEXT:    retq
    253   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
    254   ret <2 x double> %shuffle
    255 }
    256 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
    257 ; SSE2-LABEL: shuffle_v2f64_21:
    258 ; SSE2:       # BB#0:
    259 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    260 ; SSE2-NEXT:    retq
    261 ;
    262 ; SSE3-LABEL: shuffle_v2f64_21:
    263 ; SSE3:       # BB#0:
    264 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    265 ; SSE3-NEXT:    retq
    266 ;
    267 ; SSSE3-LABEL: shuffle_v2f64_21:
    268 ; SSSE3:       # BB#0:
    269 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    270 ; SSSE3-NEXT:    retq
    271 ;
    272 ; SSE41-LABEL: shuffle_v2f64_21:
    273 ; SSE41:       # BB#0:
    274 ; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    275 ; SSE41-NEXT:    retq
    276 ;
    277 ; AVX-LABEL: shuffle_v2f64_21:
    278 ; AVX:       # BB#0:
    279 ; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    280 ; AVX-NEXT:    retq
    281   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
    282   ret <2 x double> %shuffle
    283 }
    284 
    285 
    286 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
    287 ; SSE-LABEL: shuffle_v2i64_02:
    288 ; SSE:       # BB#0:
    289 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    290 ; SSE-NEXT:    retq
    291 ;
    292 ; AVX-LABEL: shuffle_v2i64_02:
    293 ; AVX:       # BB#0:
    294 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    295 ; AVX-NEXT:    retq
    296   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
    297   ret <2 x i64> %shuffle
    298 }
    299 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    300 ; SSE-LABEL: shuffle_v2i64_02_copy:
    301 ; SSE:       # BB#0:
    302 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
    303 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    304 ; SSE-NEXT:    retq
    305 ;
    306 ; AVX-LABEL: shuffle_v2i64_02_copy:
    307 ; AVX:       # BB#0:
    308 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
    309 ; AVX-NEXT:    retq
    310   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
    311   ret <2 x i64> %shuffle
    312 }
    313 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
    314 ; SSE2-LABEL: shuffle_v2i64_03:
    315 ; SSE2:       # BB#0:
    316 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    317 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    318 ; SSE2-NEXT:    retq
    319 ;
    320 ; SSE3-LABEL: shuffle_v2i64_03:
    321 ; SSE3:       # BB#0:
    322 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    323 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    324 ; SSE3-NEXT:    retq
    325 ;
    326 ; SSSE3-LABEL: shuffle_v2i64_03:
    327 ; SSSE3:       # BB#0:
    328 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    329 ; SSSE3-NEXT:    movapd %xmm1, %xmm0
    330 ; SSSE3-NEXT:    retq
    331 ;
    332 ; SSE41-LABEL: shuffle_v2i64_03:
    333 ; SSE41:       # BB#0:
    334 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    335 ; SSE41-NEXT:    retq
    336 ;
    337 ; AVX1-LABEL: shuffle_v2i64_03:
    338 ; AVX1:       # BB#0:
    339 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    340 ; AVX1-NEXT:    retq
    341 ;
    342 ; AVX2-LABEL: shuffle_v2i64_03:
    343 ; AVX2:       # BB#0:
    344 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    345 ; AVX2-NEXT:    retq
    346 ;
    347 ; AVX512VL-LABEL: shuffle_v2i64_03:
    348 ; AVX512VL:       # BB#0:
    349 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    350 ; AVX512VL-NEXT:    retq
    351   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
    352   ret <2 x i64> %shuffle
    353 }
    354 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    355 ; SSE2-LABEL: shuffle_v2i64_03_copy:
    356 ; SSE2:       # BB#0:
    357 ; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
    358 ; SSE2-NEXT:    movapd %xmm2, %xmm0
    359 ; SSE2-NEXT:    retq
    360 ;
    361 ; SSE3-LABEL: shuffle_v2i64_03_copy:
    362 ; SSE3:       # BB#0:
    363 ; SSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
    364 ; SSE3-NEXT:    movapd %xmm2, %xmm0
    365 ; SSE3-NEXT:    retq
    366 ;
    367 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
    368 ; SSSE3:       # BB#0:
    369 ; SSSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
    370 ; SSSE3-NEXT:    movapd %xmm2, %xmm0
    371 ; SSSE3-NEXT:    retq
    372 ;
    373 ; SSE41-LABEL: shuffle_v2i64_03_copy:
    374 ; SSE41:       # BB#0:
    375 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
    376 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    377 ; SSE41-NEXT:    retq
    378 ;
    379 ; AVX1-LABEL: shuffle_v2i64_03_copy:
    380 ; AVX1:       # BB#0:
    381 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
    382 ; AVX1-NEXT:    retq
    383 ;
    384 ; AVX2-LABEL: shuffle_v2i64_03_copy:
    385 ; AVX2:       # BB#0:
    386 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
    387 ; AVX2-NEXT:    retq
    388 ;
    389 ; AVX512VL-LABEL: shuffle_v2i64_03_copy:
    390 ; AVX512VL:       # BB#0:
    391 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
    392 ; AVX512VL-NEXT:    retq
    393   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
    394   ret <2 x i64> %shuffle
    395 }
    396 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
    397 ; SSE2-LABEL: shuffle_v2i64_12:
    398 ; SSE2:       # BB#0:
    399 ; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    400 ; SSE2-NEXT:    retq
    401 ;
    402 ; SSE3-LABEL: shuffle_v2i64_12:
    403 ; SSE3:       # BB#0:
    404 ; SSE3-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    405 ; SSE3-NEXT:    retq
    406 ;
    407 ; SSSE3-LABEL: shuffle_v2i64_12:
    408 ; SSSE3:       # BB#0:
    409 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    410 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    411 ; SSSE3-NEXT:    retq
    412 ;
    413 ; SSE41-LABEL: shuffle_v2i64_12:
    414 ; SSE41:       # BB#0:
    415 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    416 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    417 ; SSE41-NEXT:    retq
    418 ;
    419 ; AVX-LABEL: shuffle_v2i64_12:
    420 ; AVX:       # BB#0:
    421 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    422 ; AVX-NEXT:    retq
    423   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
    424   ret <2 x i64> %shuffle
    425 }
    426 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    427 ; SSE2-LABEL: shuffle_v2i64_12_copy:
    428 ; SSE2:       # BB#0:
    429 ; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
    430 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    431 ; SSE2-NEXT:    retq
    432 ;
    433 ; SSE3-LABEL: shuffle_v2i64_12_copy:
    434 ; SSE3:       # BB#0:
    435 ; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
    436 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    437 ; SSE3-NEXT:    retq
    438 ;
    439 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
    440 ; SSSE3:       # BB#0:
    441 ; SSSE3-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
    442 ; SSSE3-NEXT:    movdqa %xmm2, %xmm0
    443 ; SSSE3-NEXT:    retq
    444 ;
    445 ; SSE41-LABEL: shuffle_v2i64_12_copy:
    446 ; SSE41:       # BB#0:
    447 ; SSE41-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
    448 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    449 ; SSE41-NEXT:    retq
    450 ;
    451 ; AVX-LABEL: shuffle_v2i64_12_copy:
    452 ; AVX:       # BB#0:
    453 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
    454 ; AVX-NEXT:    retq
    455   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
    456   ret <2 x i64> %shuffle
    457 }
    458 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
    459 ; SSE-LABEL: shuffle_v2i64_13:
    460 ; SSE:       # BB#0:
    461 ; SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    462 ; SSE-NEXT:    retq
    463 ;
    464 ; AVX-LABEL: shuffle_v2i64_13:
    465 ; AVX:       # BB#0:
    466 ; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    467 ; AVX-NEXT:    retq
    468   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    469   ret <2 x i64> %shuffle
    470 }
    471 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    472 ; SSE-LABEL: shuffle_v2i64_13_copy:
    473 ; SSE:       # BB#0:
    474 ; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
    475 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    476 ; SSE-NEXT:    retq
    477 ;
    478 ; AVX-LABEL: shuffle_v2i64_13_copy:
    479 ; AVX:       # BB#0:
    480 ; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
    481 ; AVX-NEXT:    retq
    482   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    483   ret <2 x i64> %shuffle
    484 }
    485 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
    486 ; SSE-LABEL: shuffle_v2i64_20:
    487 ; SSE:       # BB#0:
    488 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    489 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    490 ; SSE-NEXT:    retq
    491 ;
    492 ; AVX-LABEL: shuffle_v2i64_20:
    493 ; AVX:       # BB#0:
    494 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    495 ; AVX-NEXT:    retq
    496   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
    497   ret <2 x i64> %shuffle
    498 }
    499 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    500 ; SSE-LABEL: shuffle_v2i64_20_copy:
    501 ; SSE:       # BB#0:
    502 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
    503 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    504 ; SSE-NEXT:    retq
    505 ;
    506 ; AVX-LABEL: shuffle_v2i64_20_copy:
    507 ; AVX:       # BB#0:
    508 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
    509 ; AVX-NEXT:    retq
    510   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
    511   ret <2 x i64> %shuffle
    512 }
    513 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
    514 ; SSE2-LABEL: shuffle_v2i64_21:
    515 ; SSE2:       # BB#0:
    516 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    517 ; SSE2-NEXT:    retq
    518 ;
    519 ; SSE3-LABEL: shuffle_v2i64_21:
    520 ; SSE3:       # BB#0:
    521 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    522 ; SSE3-NEXT:    retq
    523 ;
    524 ; SSSE3-LABEL: shuffle_v2i64_21:
    525 ; SSSE3:       # BB#0:
    526 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    527 ; SSSE3-NEXT:    retq
    528 ;
    529 ; SSE41-LABEL: shuffle_v2i64_21:
    530 ; SSE41:       # BB#0:
    531 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    532 ; SSE41-NEXT:    retq
    533 ;
    534 ; AVX1-LABEL: shuffle_v2i64_21:
    535 ; AVX1:       # BB#0:
    536 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    537 ; AVX1-NEXT:    retq
    538 ;
    539 ; AVX2-LABEL: shuffle_v2i64_21:
    540 ; AVX2:       # BB#0:
    541 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    542 ; AVX2-NEXT:    retq
    543 ;
    544 ; AVX512VL-LABEL: shuffle_v2i64_21:
    545 ; AVX512VL:       # BB#0:
    546 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    547 ; AVX512VL-NEXT:    retq
    548   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
    549   ret <2 x i64> %shuffle
    550 }
    551 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    552 ; SSE2-LABEL: shuffle_v2i64_21_copy:
    553 ; SSE2:       # BB#0:
    554 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    555 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    556 ; SSE2-NEXT:    retq
    557 ;
    558 ; SSE3-LABEL: shuffle_v2i64_21_copy:
    559 ; SSE3:       # BB#0:
    560 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    561 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    562 ; SSE3-NEXT:    retq
    563 ;
    564 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
    565 ; SSSE3:       # BB#0:
    566 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    567 ; SSSE3-NEXT:    movapd %xmm1, %xmm0
    568 ; SSSE3-NEXT:    retq
    569 ;
    570 ; SSE41-LABEL: shuffle_v2i64_21_copy:
    571 ; SSE41:       # BB#0:
    572 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
    573 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    574 ; SSE41-NEXT:    retq
    575 ;
    576 ; AVX1-LABEL: shuffle_v2i64_21_copy:
    577 ; AVX1:       # BB#0:
    578 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
    579 ; AVX1-NEXT:    retq
    580 ;
    581 ; AVX2-LABEL: shuffle_v2i64_21_copy:
    582 ; AVX2:       # BB#0:
    583 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
    584 ; AVX2-NEXT:    retq
    585 ;
    586 ; AVX512VL-LABEL: shuffle_v2i64_21_copy:
    587 ; AVX512VL:       # BB#0:
    588 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
    589 ; AVX512VL-NEXT:    retq
    590   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
    591   ret <2 x i64> %shuffle
    592 }
    593 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
    594 ; SSE2-LABEL: shuffle_v2i64_30:
    595 ; SSE2:       # BB#0:
    596 ; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
    597 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    598 ; SSE2-NEXT:    retq
    599 ;
    600 ; SSE3-LABEL: shuffle_v2i64_30:
    601 ; SSE3:       # BB#0:
    602 ; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
    603 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    604 ; SSE3-NEXT:    retq
    605 ;
    606 ; SSSE3-LABEL: shuffle_v2i64_30:
    607 ; SSSE3:       # BB#0:
    608 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
    609 ; SSSE3-NEXT:    retq
    610 ;
    611 ; SSE41-LABEL: shuffle_v2i64_30:
    612 ; SSE41:       # BB#0:
    613 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
    614 ; SSE41-NEXT:    retq
    615 ;
    616 ; AVX-LABEL: shuffle_v2i64_30:
    617 ; AVX:       # BB#0:
    618 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
    619 ; AVX-NEXT:    retq
    620   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
    621   ret <2 x i64> %shuffle
    622 }
    623 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    624 ; SSE2-LABEL: shuffle_v2i64_30_copy:
    625 ; SSE2:       # BB#0:
    626 ; SSE2-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
    627 ; SSE2-NEXT:    movapd %xmm2, %xmm0
    628 ; SSE2-NEXT:    retq
    629 ;
    630 ; SSE3-LABEL: shuffle_v2i64_30_copy:
    631 ; SSE3:       # BB#0:
    632 ; SSE3-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
    633 ; SSE3-NEXT:    movapd %xmm2, %xmm0
    634 ; SSE3-NEXT:    retq
    635 ;
    636 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
    637 ; SSSE3:       # BB#0:
    638 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    639 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    640 ; SSSE3-NEXT:    retq
    641 ;
    642 ; SSE41-LABEL: shuffle_v2i64_30_copy:
    643 ; SSE41:       # BB#0:
    644 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    645 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    646 ; SSE41-NEXT:    retq
    647 ;
    648 ; AVX-LABEL: shuffle_v2i64_30_copy:
    649 ; AVX:       # BB#0:
    650 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    651 ; AVX-NEXT:    retq
    652   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
    653   ret <2 x i64> %shuffle
    654 }
    655 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
    656 ; SSE-LABEL: shuffle_v2i64_31:
    657 ; SSE:       # BB#0:
    658 ; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
    659 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    660 ; SSE-NEXT:    retq
    661 ;
    662 ; AVX-LABEL: shuffle_v2i64_31:
    663 ; AVX:       # BB#0:
    664 ; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
    665 ; AVX-NEXT:    retq
    666   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
    667   ret <2 x i64> %shuffle
    668 }
    669 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    670 ; SSE-LABEL: shuffle_v2i64_31_copy:
    671 ; SSE:       # BB#0:
    672 ; SSE-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
    673 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    674 ; SSE-NEXT:    retq
    675 ;
    676 ; AVX-LABEL: shuffle_v2i64_31_copy:
    677 ; AVX:       # BB#0:
    678 ; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
    679 ; AVX-NEXT:    retq
    680   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
    681   ret <2 x i64> %shuffle
    682 }
    683 
    684 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
    685 ; SSE-LABEL: shuffle_v2i64_0z:
    686 ; SSE:       # BB#0:
    687 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    688 ; SSE-NEXT:    retq
    689 ;
    690 ; AVX-LABEL: shuffle_v2i64_0z:
    691 ; AVX:       # BB#0:
    692 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
    693 ; AVX-NEXT:    retq
    694   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
    695   ret <2 x i64> %shuffle
    696 }
    697 
    698 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
    699 ; SSE-LABEL: shuffle_v2i64_1z:
    700 ; SSE:       # BB#0:
    701 ; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
    702 ; SSE-NEXT:    retq
    703 ;
    704 ; AVX-LABEL: shuffle_v2i64_1z:
    705 ; AVX:       # BB#0:
    706 ; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
    707 ; AVX-NEXT:    retq
    708   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
    709   ret <2 x i64> %shuffle
    710 }
    711 
    712 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
    713 ; SSE-LABEL: shuffle_v2i64_z0:
    714 ; SSE:       # BB#0:
    715 ; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
    716 ; SSE-NEXT:    retq
    717 ;
    718 ; AVX-LABEL: shuffle_v2i64_z0:
    719 ; AVX:       # BB#0:
    720 ; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
    721 ; AVX-NEXT:    retq
    722   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
    723   ret <2 x i64> %shuffle
    724 }
    725 
    726 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
    727 ; SSE2-LABEL: shuffle_v2i64_z1:
    728 ; SSE2:       # BB#0:
    729 ; SSE2-NEXT:    xorpd %xmm1, %xmm1
    730 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    731 ; SSE2-NEXT:    retq
    732 ;
    733 ; SSE3-LABEL: shuffle_v2i64_z1:
    734 ; SSE3:       # BB#0:
    735 ; SSE3-NEXT:    xorpd %xmm1, %xmm1
    736 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    737 ; SSE3-NEXT:    retq
    738 ;
    739 ; SSSE3-LABEL: shuffle_v2i64_z1:
    740 ; SSSE3:       # BB#0:
    741 ; SSSE3-NEXT:    xorpd %xmm1, %xmm1
    742 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    743 ; SSSE3-NEXT:    retq
    744 ;
    745 ; SSE41-LABEL: shuffle_v2i64_z1:
    746 ; SSE41:       # BB#0:
    747 ; SSE41-NEXT:    pxor %xmm1, %xmm1
    748 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    749 ; SSE41-NEXT:    retq
    750 ;
    751 ; AVX1-LABEL: shuffle_v2i64_z1:
    752 ; AVX1:       # BB#0:
    753 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    754 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    755 ; AVX1-NEXT:    retq
    756 ;
    757 ; AVX2-LABEL: shuffle_v2i64_z1:
    758 ; AVX2:       # BB#0:
    759 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    760 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    761 ; AVX2-NEXT:    retq
    762 ;
    763 ; AVX512VL-LABEL: shuffle_v2i64_z1:
    764 ; AVX512VL:       # BB#0:
    765 ; AVX512VL-NEXT:    vpxord %xmm1, %xmm1, %xmm1
    766 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    767 ; AVX512VL-NEXT:    retq
    768   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
    769   ret <2 x i64> %shuffle
    770 }
    771 
    772 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
    773 ; SSE-LABEL: shuffle_v2f64_0z:
    774 ; SSE:       # BB#0:
    775 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    776 ; SSE-NEXT:    retq
    777 ;
    778 ; AVX-LABEL: shuffle_v2f64_0z:
    779 ; AVX:       # BB#0:
    780 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
    781 ; AVX-NEXT:    retq
    782   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
    783   ret <2 x double> %shuffle
    784 }
    785 
    786 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
    787 ; SSE-LABEL: shuffle_v2f64_1z:
    788 ; SSE:       # BB#0:
    789 ; SSE-NEXT:    xorpd %xmm1, %xmm1
    790 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    791 ; SSE-NEXT:    retq
    792 ;
    793 ; AVX1-LABEL: shuffle_v2f64_1z:
    794 ; AVX1:       # BB#0:
    795 ; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    796 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    797 ; AVX1-NEXT:    retq
    798 ;
    799 ; AVX2-LABEL: shuffle_v2f64_1z:
    800 ; AVX2:       # BB#0:
    801 ; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    802 ; AVX2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    803 ; AVX2-NEXT:    retq
    804 ;
    805 ; AVX512VL-LABEL: shuffle_v2f64_1z:
    806 ; AVX512VL:       # BB#0:
    807 ; AVX512VL-NEXT:    vpxord %xmm1, %xmm1, %xmm1
    808 ; AVX512VL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    809 ; AVX512VL-NEXT:    retq
    810   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
    811   ret <2 x double> %shuffle
    812 }
    813 
    814 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
    815 ; SSE-LABEL: shuffle_v2f64_z0:
    816 ; SSE:       # BB#0:
    817 ; SSE-NEXT:    xorpd %xmm1, %xmm1
    818 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    819 ; SSE-NEXT:    movapd %xmm1, %xmm0
    820 ; SSE-NEXT:    retq
    821 ;
    822 ; AVX1-LABEL: shuffle_v2f64_z0:
    823 ; AVX1:       # BB#0:
    824 ; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    825 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    826 ; AVX1-NEXT:    retq
    827 ;
    828 ; AVX2-LABEL: shuffle_v2f64_z0:
    829 ; AVX2:       # BB#0:
    830 ; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    831 ; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    832 ; AVX2-NEXT:    retq
    833 ;
    834 ; AVX512VL-LABEL: shuffle_v2f64_z0:
    835 ; AVX512VL:       # BB#0:
    836 ; AVX512VL-NEXT:    vpxord %xmm1, %xmm1, %xmm1
    837 ; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    838 ; AVX512VL-NEXT:    retq
    839   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
    840   ret <2 x double> %shuffle
    841 }
    842 
    843 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
    844 ; SSE2-LABEL: shuffle_v2f64_z1:
    845 ; SSE2:       # BB#0:
    846 ; SSE2-NEXT:    xorpd %xmm1, %xmm1
    847 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    848 ; SSE2-NEXT:    retq
    849 ;
    850 ; SSE3-LABEL: shuffle_v2f64_z1:
    851 ; SSE3:       # BB#0:
    852 ; SSE3-NEXT:    xorpd %xmm1, %xmm1
    853 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    854 ; SSE3-NEXT:    retq
    855 ;
    856 ; SSSE3-LABEL: shuffle_v2f64_z1:
    857 ; SSSE3:       # BB#0:
    858 ; SSSE3-NEXT:    xorpd %xmm1, %xmm1
    859 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    860 ; SSSE3-NEXT:    retq
    861 ;
    862 ; SSE41-LABEL: shuffle_v2f64_z1:
    863 ; SSE41:       # BB#0:
    864 ; SSE41-NEXT:    xorpd %xmm1, %xmm1
    865 ; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    866 ; SSE41-NEXT:    retq
    867 ;
    868 ; AVX1-LABEL: shuffle_v2f64_z1:
    869 ; AVX1:       # BB#0:
    870 ; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    871 ; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    872 ; AVX1-NEXT:    retq
    873 ;
    874 ; AVX2-LABEL: shuffle_v2f64_z1:
    875 ; AVX2:       # BB#0:
    876 ; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    877 ; AVX2-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    878 ; AVX2-NEXT:    retq
    879 ;
    880 ; AVX512VL-LABEL: shuffle_v2f64_z1:
    881 ; AVX512VL:       # BB#0:
    882 ; AVX512VL-NEXT:    vpxord %xmm1, %xmm1, %xmm1
    883 ; AVX512VL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    884 ; AVX512VL-NEXT:    retq
    885   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
    886   ret <2 x double> %shuffle
    887 }
    888 
    889 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
    890 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
    891 ; SSE:       # BB#0:
    892 ; SSE-NEXT:    xorpd %xmm1, %xmm1
    893 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    894 ; SSE-NEXT:    retq
    895 ;
    896 ; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
    897 ; AVX1:       # BB#0:
    898 ; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    899 ; AVX1-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    900 ; AVX1-NEXT:    retq
    901 ;
    902 ; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
    903 ; AVX2:       # BB#0:
    904 ; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    905 ; AVX2-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    906 ; AVX2-NEXT:    retq
    907 ;
    908 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
    909 ; AVX512VL:       # BB#0:
    910 ; AVX512VL-NEXT:    vpxord %xmm1, %xmm1, %xmm1
    911 ; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    912 ; AVX512VL-NEXT:    retq
    913   %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
    914   %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
    915   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
    916   %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
    917   ret <2 x double> %bitcast64
    918 }
    919 
    920 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
    921 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
    922 ; SSE2:       # BB#0:
    923 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    924 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    925 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
    926 ; SSE2-NEXT:    retq
    927 ;
    928 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
    929 ; SSE3:       # BB#0:
    930 ; SSE3-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    931 ; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    932 ; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
    933 ; SSE3-NEXT:    retq
    934 ;
    935 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
    936 ; SSSE3:       # BB#0:
    937 ; SSSE3-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    938 ; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    939 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
    940 ; SSSE3-NEXT:    retq
    941 ;
    942 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
    943 ; SSE41:       # BB#0:
    944 ; SSE41-NEXT:    pxor %xmm1, %xmm1
    945 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
    946 ; SSE41-NEXT:    retq
    947 ;
    948 ; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
    949 ; AVX1:       # BB#0:
    950 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    951 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
    952 ; AVX1-NEXT:    retq
    953 ;
    954 ; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
    955 ; AVX2:       # BB#0:
    956 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    957 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    958 ; AVX2-NEXT:    retq
    959 ;
    960 ; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
    961 ; AVX512VL:       # BB#0:
    962 ; AVX512VL-NEXT:    vpxord %xmm1, %xmm1, %xmm1
    963 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    964 ; AVX512VL-NEXT:    retq
    965   %bitcast32 = bitcast <2 x i64> %x to <4 x float>
    966   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
    967   %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
    968   %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
    969   ret <2 x i64> %and
    970 }
    971 
    972 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
    973 ; SSE-LABEL: insert_reg_and_zero_v2i64:
    974 ; SSE:       # BB#0:
    975 ; SSE-NEXT:    movd %rdi, %xmm0
    976 ; SSE-NEXT:    retq
    977 ;
    978 ; AVX-LABEL: insert_reg_and_zero_v2i64:
    979 ; AVX:       # BB#0:
    980 ; AVX-NEXT:    vmovq %rdi, %xmm0
    981 ; AVX-NEXT:    retq
    982   %v = insertelement <2 x i64> undef, i64 %a, i32 0
    983   %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
    984   ret <2 x i64> %shuffle
    985 }
    986 
    987 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
    988 ; SSE-LABEL: insert_mem_and_zero_v2i64:
    989 ; SSE:       # BB#0:
    990 ; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    991 ; SSE-NEXT:    retq
    992 ;
    993 ; AVX-LABEL: insert_mem_and_zero_v2i64:
    994 ; AVX:       # BB#0:
    995 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    996 ; AVX-NEXT:    retq
    997   %a = load i64, i64* %ptr
    998   %v = insertelement <2 x i64> undef, i64 %a, i32 0
    999   %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
   1000   ret <2 x i64> %shuffle
   1001 }
   1002 
   1003 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
   1004 ; SSE-LABEL: insert_reg_and_zero_v2f64:
   1005 ; SSE:       # BB#0:
   1006 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
   1007 ; SSE-NEXT:    retq
   1008 ;
   1009 ; AVX-LABEL: insert_reg_and_zero_v2f64:
   1010 ; AVX:       # BB#0:
   1011 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
   1012 ; AVX-NEXT:    retq
   1013   %v = insertelement <2 x double> undef, double %a, i32 0
   1014   %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
   1015   ret <2 x double> %shuffle
   1016 }
   1017 
   1018 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
   1019 ; SSE-LABEL: insert_mem_and_zero_v2f64:
   1020 ; SSE:       # BB#0:
   1021 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1022 ; SSE-NEXT:    retq
   1023 ;
   1024 ; AVX-LABEL: insert_mem_and_zero_v2f64:
   1025 ; AVX:       # BB#0:
   1026 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1027 ; AVX-NEXT:    retq
   1028   %a = load double, double* %ptr
   1029   %v = insertelement <2 x double> undef, double %a, i32 0
   1030   %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
   1031   ret <2 x double> %shuffle
   1032 }
   1033 
   1034 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
   1035 ; SSE2-LABEL: insert_reg_lo_v2i64:
   1036 ; SSE2:       # BB#0:
   1037 ; SSE2-NEXT:    movd %rdi, %xmm1
   1038 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
   1039 ; SSE2-NEXT:    retq
   1040 ;
   1041 ; SSE3-LABEL: insert_reg_lo_v2i64:
   1042 ; SSE3:       # BB#0:
   1043 ; SSE3-NEXT:    movd %rdi, %xmm1
   1044 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
   1045 ; SSE3-NEXT:    retq
   1046 ;
   1047 ; SSSE3-LABEL: insert_reg_lo_v2i64:
   1048 ; SSSE3:       # BB#0:
   1049 ; SSSE3-NEXT:    movd %rdi, %xmm1
   1050 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
   1051 ; SSSE3-NEXT:    retq
   1052 ;
   1053 ; SSE41-LABEL: insert_reg_lo_v2i64:
   1054 ; SSE41:       # BB#0:
   1055 ; SSE41-NEXT:    movd %rdi, %xmm1
   1056 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
   1057 ; SSE41-NEXT:    retq
   1058 ;
   1059 ; AVX1-LABEL: insert_reg_lo_v2i64:
   1060 ; AVX1:       # BB#0:
   1061 ; AVX1-NEXT:    vmovq %rdi, %xmm1
   1062 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
   1063 ; AVX1-NEXT:    retq
   1064 ;
   1065 ; AVX2-LABEL: insert_reg_lo_v2i64:
   1066 ; AVX2:       # BB#0:
   1067 ; AVX2-NEXT:    vmovq %rdi, %xmm1
   1068 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
   1069 ; AVX2-NEXT:    retq
   1070 ;
   1071 ; AVX512VL-LABEL: insert_reg_lo_v2i64:
   1072 ; AVX512VL:       # BB#0:
   1073 ; AVX512VL-NEXT:    vmovq %rdi, %xmm1
   1074 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
   1075 ; AVX512VL-NEXT:    retq
   1076   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   1077   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
   1078   ret <2 x i64> %shuffle
   1079 }
   1080 
   1081 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
   1082 ; SSE2-LABEL: insert_mem_lo_v2i64:
   1083 ; SSE2:       # BB#0:
   1084 ; SSE2-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   1085 ; SSE2-NEXT:    retq
   1086 ;
   1087 ; SSE3-LABEL: insert_mem_lo_v2i64:
   1088 ; SSE3:       # BB#0:
   1089 ; SSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   1090 ; SSE3-NEXT:    retq
   1091 ;
   1092 ; SSSE3-LABEL: insert_mem_lo_v2i64:
   1093 ; SSSE3:       # BB#0:
   1094 ; SSSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   1095 ; SSSE3-NEXT:    retq
   1096 ;
   1097 ; SSE41-LABEL: insert_mem_lo_v2i64:
   1098 ; SSE41:       # BB#0:
   1099 ; SSE41-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
   1100 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
   1101 ; SSE41-NEXT:    retq
   1102 ;
   1103 ; AVX1-LABEL: insert_mem_lo_v2i64:
   1104 ; AVX1:       # BB#0:
   1105 ; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
   1106 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
   1107 ; AVX1-NEXT:    retq
   1108 ;
   1109 ; AVX2-LABEL: insert_mem_lo_v2i64:
   1110 ; AVX2:       # BB#0:
   1111 ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
   1112 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
   1113 ; AVX2-NEXT:    retq
   1114 ;
   1115 ; AVX512VL-LABEL: insert_mem_lo_v2i64:
   1116 ; AVX512VL:       # BB#0:
   1117 ; AVX512VL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
   1118 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
   1119 ; AVX512VL-NEXT:    retq
   1120   %a = load i64, i64* %ptr
   1121   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   1122   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
   1123   ret <2 x i64> %shuffle
   1124 }
   1125 
   1126 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
   1127 ; SSE-LABEL: insert_reg_hi_v2i64:
   1128 ; SSE:       # BB#0:
   1129 ; SSE-NEXT:    movd %rdi, %xmm1
   1130 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1131 ; SSE-NEXT:    retq
   1132 ;
   1133 ; AVX-LABEL: insert_reg_hi_v2i64:
   1134 ; AVX:       # BB#0:
   1135 ; AVX-NEXT:    vmovq %rdi, %xmm1
   1136 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1137 ; AVX-NEXT:    retq
   1138   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   1139   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
   1140   ret <2 x i64> %shuffle
   1141 }
   1142 
   1143 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
   1144 ; SSE-LABEL: insert_mem_hi_v2i64:
   1145 ; SSE:       # BB#0:
   1146 ; SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
   1147 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1148 ; SSE-NEXT:    retq
   1149 ;
   1150 ; AVX-LABEL: insert_mem_hi_v2i64:
   1151 ; AVX:       # BB#0:
   1152 ; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
   1153 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1154 ; AVX-NEXT:    retq
   1155   %a = load i64, i64* %ptr
   1156   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   1157   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
   1158   ret <2 x i64> %shuffle
   1159 }
   1160 
   1161 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
   1162 ; SSE-LABEL: insert_reg_lo_v2f64:
   1163 ; SSE:       # BB#0:
   1164 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
   1165 ; SSE-NEXT:    movapd %xmm1, %xmm0
   1166 ; SSE-NEXT:    retq
   1167 ;
   1168 ; AVX-LABEL: insert_reg_lo_v2f64:
   1169 ; AVX:       # BB#0:
   1170 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
   1171 ; AVX-NEXT:    retq
   1172   %v = insertelement <2 x double> undef, double %a, i32 0
   1173   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
   1174   ret <2 x double> %shuffle
   1175 }
   1176 
   1177 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
   1178 ; SSE-LABEL: insert_mem_lo_v2f64:
   1179 ; SSE:       # BB#0:
   1180 ; SSE-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   1181 ; SSE-NEXT:    retq
   1182 ;
   1183 ; AVX-LABEL: insert_mem_lo_v2f64:
   1184 ; AVX:       # BB#0:
   1185 ; AVX-NEXT:    vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   1186 ; AVX-NEXT:    retq
   1187   %a = load double, double* %ptr
   1188   %v = insertelement <2 x double> undef, double %a, i32 0
   1189   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
   1190   ret <2 x double> %shuffle
   1191 }
   1192 
   1193 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
   1194 ; SSE-LABEL: insert_reg_hi_v2f64:
   1195 ; SSE:       # BB#0:
   1196 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
   1197 ; SSE-NEXT:    movapd %xmm1, %xmm0
   1198 ; SSE-NEXT:    retq
   1199 ;
   1200 ; AVX-LABEL: insert_reg_hi_v2f64:
   1201 ; AVX:       # BB#0:
   1202 ; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
   1203 ; AVX-NEXT:    retq
   1204   %v = insertelement <2 x double> undef, double %a, i32 0
   1205   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
   1206   ret <2 x double> %shuffle
   1207 }
   1208 
   1209 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
   1210 ; SSE-LABEL: insert_mem_hi_v2f64:
   1211 ; SSE:       # BB#0:
   1212 ; SSE-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
   1213 ; SSE-NEXT:    retq
   1214 ;
   1215 ; AVX-LABEL: insert_mem_hi_v2f64:
   1216 ; AVX:       # BB#0:
   1217 ; AVX-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
   1218 ; AVX-NEXT:    retq
   1219   %a = load double, double* %ptr
   1220   %v = insertelement <2 x double> undef, double %a, i32 0
   1221   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
   1222   ret <2 x double> %shuffle
   1223 }
   1224 
   1225 define <2 x double> @insert_dup_reg_v2f64(double %a) {
   1226 ; SSE2-LABEL: insert_dup_reg_v2f64:
   1227 ; SSE2:       # BB#0:
   1228 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
   1229 ; SSE2-NEXT:    retq
   1230 ;
   1231 ; SSE3-LABEL: insert_dup_reg_v2f64:
   1232 ; SSE3:       # BB#0:
   1233 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
   1234 ; SSE3-NEXT:    retq
   1235 ;
   1236 ; SSSE3-LABEL: insert_dup_reg_v2f64:
   1237 ; SSSE3:       # BB#0:
   1238 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
   1239 ; SSSE3-NEXT:    retq
   1240 ;
   1241 ; SSE41-LABEL: insert_dup_reg_v2f64:
   1242 ; SSE41:       # BB#0:
   1243 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
   1244 ; SSE41-NEXT:    retq
   1245 ;
   1246 ; AVX-LABEL: insert_dup_reg_v2f64:
   1247 ; AVX:       # BB#0:
   1248 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
   1249 ; AVX-NEXT:    retq
   1250   %v = insertelement <2 x double> undef, double %a, i32 0
   1251   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   1252   ret <2 x double> %shuffle
   1253 }
   1254 
   1255 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
   1256 ; SSE2-LABEL: insert_dup_mem_v2f64:
   1257 ; SSE2:       # BB#0:
   1258 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1259 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
   1260 ; SSE2-NEXT:    retq
   1261 ;
   1262 ; SSE3-LABEL: insert_dup_mem_v2f64:
   1263 ; SSE3:       # BB#0:
   1264 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1265 ; SSE3-NEXT:    retq
   1266 ;
   1267 ; SSSE3-LABEL: insert_dup_mem_v2f64:
   1268 ; SSSE3:       # BB#0:
   1269 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1270 ; SSSE3-NEXT:    retq
   1271 ;
   1272 ; SSE41-LABEL: insert_dup_mem_v2f64:
   1273 ; SSE41:       # BB#0:
   1274 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1275 ; SSE41-NEXT:    retq
   1276 ;
   1277 ; AVX-LABEL: insert_dup_mem_v2f64:
   1278 ; AVX:       # BB#0:
   1279 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
   1280 ; AVX-NEXT:    retq
   1281   %a = load double, double* %ptr
   1282   %v = insertelement <2 x double> undef, double %a, i32 0
   1283   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   1284   ret <2 x double> %shuffle
   1285 }
   1286 
   1287 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
   1288 ; SSE2-LABEL: insert_dup_mem128_v2f64:
   1289 ; SSE2:       # BB#0:
   1290 ; SSE2-NEXT:    movaps (%rdi), %xmm0
   1291 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
   1292 ; SSE2-NEXT:    retq
   1293 ;
   1294 ; SSE3-LABEL: insert_dup_mem128_v2f64:
   1295 ; SSE3:       # BB#0:
   1296 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1297 ; SSE3-NEXT:    retq
   1298 ;
   1299 ; SSSE3-LABEL: insert_dup_mem128_v2f64:
   1300 ; SSSE3:       # BB#0:
   1301 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1302 ; SSSE3-NEXT:    retq
   1303 ;
   1304 ; SSE41-LABEL: insert_dup_mem128_v2f64:
   1305 ; SSE41:       # BB#0:
   1306 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1307 ; SSE41-NEXT:    retq
   1308 ;
   1309 ; AVX-LABEL: insert_dup_mem128_v2f64:
   1310 ; AVX:       # BB#0:
   1311 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
   1312 ; AVX-NEXT:    retq
   1313   %v = load  <2 x double>,  <2 x double>* %ptr
   1314   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   1315   ret <2 x double> %shuffle
   1316 }
   1317 
   1318 
   1319 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
   1320 ; SSE-LABEL: insert_dup_mem_v2i64:
   1321 ; SSE:       # BB#0:
   1322 ; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
   1323 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   1324 ; SSE-NEXT:    retq
   1325 ;
   1326 ; AVX1-LABEL: insert_dup_mem_v2i64:
   1327 ; AVX1:       # BB#0:
   1328 ; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   1329 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   1330 ; AVX1-NEXT:    retq
   1331 ;
   1332 ; AVX2-LABEL: insert_dup_mem_v2i64:
   1333 ; AVX2:       # BB#0:
   1334 ; AVX2-NEXT:    vpbroadcastq (%rdi), %xmm0
   1335 ; AVX2-NEXT:    retq
   1336 ;
   1337 ; AVX512VL-LABEL: insert_dup_mem_v2i64:
   1338 ; AVX512VL:       # BB#0:
   1339 ; AVX512VL-NEXT:    vpbroadcastq (%rdi), %xmm0
   1340 ; AVX512VL-NEXT:    retq
   1341   %tmp = load i64, i64* %ptr, align 1
   1342   %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
   1343   %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
   1344   ret <2 x i64> %tmp2
   1345 }
   1346 
   1347 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
   1348 ; SSE-LABEL: shuffle_mem_v2f64_10:
   1349 ; SSE:       # BB#0:
   1350 ; SSE-NEXT:    movapd (%rdi), %xmm0
   1351 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
   1352 ; SSE-NEXT:    retq
   1353 ;
   1354 ; AVX-LABEL: shuffle_mem_v2f64_10:
   1355 ; AVX:       # BB#0:
   1356 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = mem[1,0]
   1357 ; AVX-NEXT:    retq
   1358 
   1359   %a = load <2 x double>, <2 x double>* %ptr
   1360   %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   1361   ret <2 x double> %shuffle
   1362 }
   1363