Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
      9 
     10 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
     11 target triple = "x86_64-unknown-unknown"
     12 
     13 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
     14 ; SSE-LABEL: shuffle_v2i64_00:
     15 ; SSE:       # BB#0:
     16 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
     17 ; SSE-NEXT:    retq
     18 ;
     19 ; AVX1-LABEL: shuffle_v2i64_00:
     20 ; AVX1:       # BB#0:
     21 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
     22 ; AVX1-NEXT:    retq
     23 ;
     24 ; AVX2-LABEL: shuffle_v2i64_00:
     25 ; AVX2:       # BB#0:
     26 ; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
     27 ; AVX2-NEXT:    retq
     28 ;
     29 ; AVX512VL-LABEL: shuffle_v2i64_00:
     30 ; AVX512VL:       # BB#0:
     31 ; AVX512VL-NEXT:    vpbroadcastq %xmm0, %xmm0
     32 ; AVX512VL-NEXT:    retq
     33   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
     34   ret <2 x i64> %shuffle
     35 }
     36 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
     37 ; SSE-LABEL: shuffle_v2i64_10:
     38 ; SSE:       # BB#0:
     39 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     40 ; SSE-NEXT:    retq
     41 ;
     42 ; AVX-LABEL: shuffle_v2i64_10:
     43 ; AVX:       # BB#0:
     44 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     45 ; AVX-NEXT:    retq
     46   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
     47   ret <2 x i64> %shuffle
     48 }
     49 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
     50 ; SSE-LABEL: shuffle_v2i64_11:
     51 ; SSE:       # BB#0:
     52 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
     53 ; SSE-NEXT:    retq
     54 ;
     55 ; AVX-LABEL: shuffle_v2i64_11:
     56 ; AVX:       # BB#0:
     57 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
     58 ; AVX-NEXT:    retq
     59   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
     60   ret <2 x i64> %shuffle
     61 }
     62 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
     63 ; SSE-LABEL: shuffle_v2i64_22:
     64 ; SSE:       # BB#0:
     65 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
     66 ; SSE-NEXT:    retq
     67 ;
     68 ; AVX1-LABEL: shuffle_v2i64_22:
     69 ; AVX1:       # BB#0:
     70 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
     71 ; AVX1-NEXT:    retq
     72 ;
     73 ; AVX2-LABEL: shuffle_v2i64_22:
     74 ; AVX2:       # BB#0:
     75 ; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm0
     76 ; AVX2-NEXT:    retq
     77 ;
     78 ; AVX512VL-LABEL: shuffle_v2i64_22:
     79 ; AVX512VL:       # BB#0:
     80 ; AVX512VL-NEXT:    vpbroadcastq %xmm1, %xmm0
     81 ; AVX512VL-NEXT:    retq
     82   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
     83   ret <2 x i64> %shuffle
     84 }
     85 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
     86 ; SSE-LABEL: shuffle_v2i64_32:
     87 ; SSE:       # BB#0:
     88 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
     89 ; SSE-NEXT:    retq
     90 ;
     91 ; AVX-LABEL: shuffle_v2i64_32:
     92 ; AVX:       # BB#0:
     93 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
     94 ; AVX-NEXT:    retq
     95   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
     96   ret <2 x i64> %shuffle
     97 }
     98 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
     99 ; SSE-LABEL: shuffle_v2i64_33:
    100 ; SSE:       # BB#0:
    101 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
    102 ; SSE-NEXT:    retq
    103 ;
    104 ; AVX-LABEL: shuffle_v2i64_33:
    105 ; AVX:       # BB#0:
    106 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
    107 ; AVX-NEXT:    retq
    108   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
    109   ret <2 x i64> %shuffle
    110 }
    111 
    112 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
    113 ; SSE2-LABEL: shuffle_v2f64_00:
    114 ; SSE2:       # BB#0:
    115 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
    116 ; SSE2-NEXT:    retq
    117 ;
    118 ; SSE3-LABEL: shuffle_v2f64_00:
    119 ; SSE3:       # BB#0:
    120 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
    121 ; SSE3-NEXT:    retq
    122 ;
    123 ; SSSE3-LABEL: shuffle_v2f64_00:
    124 ; SSSE3:       # BB#0:
    125 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
    126 ; SSSE3-NEXT:    retq
    127 ;
    128 ; SSE41-LABEL: shuffle_v2f64_00:
    129 ; SSE41:       # BB#0:
    130 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
    131 ; SSE41-NEXT:    retq
    132 ;
    133 ; AVX-LABEL: shuffle_v2f64_00:
    134 ; AVX:       # BB#0:
    135 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    136 ; AVX-NEXT:    retq
    137   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
    138   ret <2 x double> %shuffle
    139 }
    140 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
    141 ; SSE-LABEL: shuffle_v2f64_10:
    142 ; SSE:       # BB#0:
    143 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    144 ; SSE-NEXT:    retq
    145 ;
    146 ; AVX-LABEL: shuffle_v2f64_10:
    147 ; AVX:       # BB#0:
    148 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    149 ; AVX-NEXT:    retq
    150 
    151   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
    152   ret <2 x double> %shuffle
    153 }
    154 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
    155 ; SSE-LABEL: shuffle_v2f64_11:
    156 ; SSE:       # BB#0:
    157 ; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
    158 ; SSE-NEXT:    retq
    159 ;
    160 ; AVX-LABEL: shuffle_v2f64_11:
    161 ; AVX:       # BB#0:
    162 ; AVX-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
    163 ; AVX-NEXT:    retq
    164   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
    165   ret <2 x double> %shuffle
    166 }
    167 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
    168 ; SSE2-LABEL: shuffle_v2f64_22:
    169 ; SSE2:       # BB#0:
    170 ; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0,0]
    171 ; SSE2-NEXT:    movaps %xmm1, %xmm0
    172 ; SSE2-NEXT:    retq
    173 ;
    174 ; SSE3-LABEL: shuffle_v2f64_22:
    175 ; SSE3:       # BB#0:
    176 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
    177 ; SSE3-NEXT:    retq
    178 ;
    179 ; SSSE3-LABEL: shuffle_v2f64_22:
    180 ; SSSE3:       # BB#0:
    181 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
    182 ; SSSE3-NEXT:    retq
    183 ;
    184 ; SSE41-LABEL: shuffle_v2f64_22:
    185 ; SSE41:       # BB#0:
    186 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
    187 ; SSE41-NEXT:    retq
    188 ;
    189 ; AVX-LABEL: shuffle_v2f64_22:
    190 ; AVX:       # BB#0:
    191 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm1[0,0]
    192 ; AVX-NEXT:    retq
    193   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
    194   ret <2 x double> %shuffle
    195 }
    196 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
    197 ; SSE-LABEL: shuffle_v2f64_32:
    198 ; SSE:       # BB#0:
    199 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    200 ; SSE-NEXT:    movapd %xmm1, %xmm0
    201 ; SSE-NEXT:    retq
    202 ;
    203 ; AVX-LABEL: shuffle_v2f64_32:
    204 ; AVX:       # BB#0:
    205 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
    206 ; AVX-NEXT:    retq
    207 
    208   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
    209   ret <2 x double> %shuffle
    210 }
    211 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
    212 ; SSE-LABEL: shuffle_v2f64_33:
    213 ; SSE:       # BB#0:
    214 ; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
    215 ; SSE-NEXT:    movaps %xmm1, %xmm0
    216 ; SSE-NEXT:    retq
    217 ;
    218 ; AVX-LABEL: shuffle_v2f64_33:
    219 ; AVX:       # BB#0:
    220 ; AVX-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
    221 ; AVX-NEXT:    retq
    222   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
    223   ret <2 x double> %shuffle
    224 }
    225 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
    226 ; SSE2-LABEL: shuffle_v2f64_03:
    227 ; SSE2:       # BB#0:
    228 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    229 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    230 ; SSE2-NEXT:    retq
    231 ;
    232 ; SSE3-LABEL: shuffle_v2f64_03:
    233 ; SSE3:       # BB#0:
    234 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    235 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    236 ; SSE3-NEXT:    retq
    237 ;
    238 ; SSSE3-LABEL: shuffle_v2f64_03:
    239 ; SSSE3:       # BB#0:
    240 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    241 ; SSSE3-NEXT:    movapd %xmm1, %xmm0
    242 ; SSSE3-NEXT:    retq
    243 ;
    244 ; SSE41-LABEL: shuffle_v2f64_03:
    245 ; SSE41:       # BB#0:
    246 ; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
    247 ; SSE41-NEXT:    retq
    248 ;
    249 ; AVX-LABEL: shuffle_v2f64_03:
    250 ; AVX:       # BB#0:
    251 ; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
    252 ; AVX-NEXT:    retq
    253   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
    254   ret <2 x double> %shuffle
    255 }
    256 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
    257 ; SSE2-LABEL: shuffle_v2f64_21:
    258 ; SSE2:       # BB#0:
    259 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    260 ; SSE2-NEXT:    retq
    261 ;
    262 ; SSE3-LABEL: shuffle_v2f64_21:
    263 ; SSE3:       # BB#0:
    264 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    265 ; SSE3-NEXT:    retq
    266 ;
    267 ; SSSE3-LABEL: shuffle_v2f64_21:
    268 ; SSSE3:       # BB#0:
    269 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    270 ; SSSE3-NEXT:    retq
    271 ;
    272 ; SSE41-LABEL: shuffle_v2f64_21:
    273 ; SSE41:       # BB#0:
    274 ; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    275 ; SSE41-NEXT:    retq
    276 ;
    277 ; AVX-LABEL: shuffle_v2f64_21:
    278 ; AVX:       # BB#0:
    279 ; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    280 ; AVX-NEXT:    retq
    281   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
    282   ret <2 x double> %shuffle
    283 }
    284 
    285 
    286 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
    287 ; SSE-LABEL: shuffle_v2i64_02:
    288 ; SSE:       # BB#0:
    289 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    290 ; SSE-NEXT:    retq
    291 ;
    292 ; AVX-LABEL: shuffle_v2i64_02:
    293 ; AVX:       # BB#0:
    294 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    295 ; AVX-NEXT:    retq
    296   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
    297   ret <2 x i64> %shuffle
    298 }
    299 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    300 ; SSE-LABEL: shuffle_v2i64_02_copy:
    301 ; SSE:       # BB#0:
    302 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
    303 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    304 ; SSE-NEXT:    retq
    305 ;
    306 ; AVX-LABEL: shuffle_v2i64_02_copy:
    307 ; AVX:       # BB#0:
    308 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
    309 ; AVX-NEXT:    retq
    310   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
    311   ret <2 x i64> %shuffle
    312 }
    313 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
    314 ; SSE2-LABEL: shuffle_v2i64_03:
    315 ; SSE2:       # BB#0:
    316 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    317 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    318 ; SSE2-NEXT:    retq
    319 ;
    320 ; SSE3-LABEL: shuffle_v2i64_03:
    321 ; SSE3:       # BB#0:
    322 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    323 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    324 ; SSE3-NEXT:    retq
    325 ;
    326 ; SSSE3-LABEL: shuffle_v2i64_03:
    327 ; SSSE3:       # BB#0:
    328 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    329 ; SSSE3-NEXT:    movapd %xmm1, %xmm0
    330 ; SSSE3-NEXT:    retq
    331 ;
    332 ; SSE41-LABEL: shuffle_v2i64_03:
    333 ; SSE41:       # BB#0:
    334 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    335 ; SSE41-NEXT:    retq
    336 ;
    337 ; AVX1-LABEL: shuffle_v2i64_03:
    338 ; AVX1:       # BB#0:
    339 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    340 ; AVX1-NEXT:    retq
    341 ;
    342 ; AVX2-LABEL: shuffle_v2i64_03:
    343 ; AVX2:       # BB#0:
    344 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    345 ; AVX2-NEXT:    retq
    346 ;
    347 ; AVX512VL-LABEL: shuffle_v2i64_03:
    348 ; AVX512VL:       # BB#0:
    349 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    350 ; AVX512VL-NEXT:    retq
    351   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
    352   ret <2 x i64> %shuffle
    353 }
    354 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    355 ; SSE2-LABEL: shuffle_v2i64_03_copy:
    356 ; SSE2:       # BB#0:
    357 ; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
    358 ; SSE2-NEXT:    movapd %xmm2, %xmm0
    359 ; SSE2-NEXT:    retq
    360 ;
    361 ; SSE3-LABEL: shuffle_v2i64_03_copy:
    362 ; SSE3:       # BB#0:
    363 ; SSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
    364 ; SSE3-NEXT:    movapd %xmm2, %xmm0
    365 ; SSE3-NEXT:    retq
    366 ;
    367 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
    368 ; SSSE3:       # BB#0:
    369 ; SSSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
    370 ; SSSE3-NEXT:    movapd %xmm2, %xmm0
    371 ; SSSE3-NEXT:    retq
    372 ;
    373 ; SSE41-LABEL: shuffle_v2i64_03_copy:
    374 ; SSE41:       # BB#0:
    375 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
    376 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    377 ; SSE41-NEXT:    retq
    378 ;
    379 ; AVX1-LABEL: shuffle_v2i64_03_copy:
    380 ; AVX1:       # BB#0:
    381 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
    382 ; AVX1-NEXT:    retq
    383 ;
    384 ; AVX2-LABEL: shuffle_v2i64_03_copy:
    385 ; AVX2:       # BB#0:
    386 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
    387 ; AVX2-NEXT:    retq
    388 ;
    389 ; AVX512VL-LABEL: shuffle_v2i64_03_copy:
    390 ; AVX512VL:       # BB#0:
    391 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
    392 ; AVX512VL-NEXT:    retq
    393   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
    394   ret <2 x i64> %shuffle
    395 }
    396 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
    397 ; SSE2-LABEL: shuffle_v2i64_12:
    398 ; SSE2:       # BB#0:
    399 ; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    400 ; SSE2-NEXT:    retq
    401 ;
    402 ; SSE3-LABEL: shuffle_v2i64_12:
    403 ; SSE3:       # BB#0:
    404 ; SSE3-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    405 ; SSE3-NEXT:    retq
    406 ;
    407 ; SSSE3-LABEL: shuffle_v2i64_12:
    408 ; SSSE3:       # BB#0:
    409 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    410 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    411 ; SSSE3-NEXT:    retq
    412 ;
    413 ; SSE41-LABEL: shuffle_v2i64_12:
    414 ; SSE41:       # BB#0:
    415 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    416 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    417 ; SSE41-NEXT:    retq
    418 ;
    419 ; AVX-LABEL: shuffle_v2i64_12:
    420 ; AVX:       # BB#0:
    421 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    422 ; AVX-NEXT:    retq
    423   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
    424   ret <2 x i64> %shuffle
    425 }
    426 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    427 ; SSE2-LABEL: shuffle_v2i64_12_copy:
    428 ; SSE2:       # BB#0:
    429 ; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
    430 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    431 ; SSE2-NEXT:    retq
    432 ;
    433 ; SSE3-LABEL: shuffle_v2i64_12_copy:
    434 ; SSE3:       # BB#0:
    435 ; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
    436 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    437 ; SSE3-NEXT:    retq
    438 ;
    439 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
    440 ; SSSE3:       # BB#0:
    441 ; SSSE3-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
    442 ; SSSE3-NEXT:    movdqa %xmm2, %xmm0
    443 ; SSSE3-NEXT:    retq
    444 ;
    445 ; SSE41-LABEL: shuffle_v2i64_12_copy:
    446 ; SSE41:       # BB#0:
    447 ; SSE41-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
    448 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    449 ; SSE41-NEXT:    retq
    450 ;
    451 ; AVX-LABEL: shuffle_v2i64_12_copy:
    452 ; AVX:       # BB#0:
    453 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
    454 ; AVX-NEXT:    retq
    455   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
    456   ret <2 x i64> %shuffle
    457 }
    458 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
    459 ; SSE-LABEL: shuffle_v2i64_13:
    460 ; SSE:       # BB#0:
    461 ; SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    462 ; SSE-NEXT:    retq
    463 ;
    464 ; AVX-LABEL: shuffle_v2i64_13:
    465 ; AVX:       # BB#0:
    466 ; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    467 ; AVX-NEXT:    retq
    468   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    469   ret <2 x i64> %shuffle
    470 }
    471 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    472 ; SSE-LABEL: shuffle_v2i64_13_copy:
    473 ; SSE:       # BB#0:
    474 ; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
    475 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    476 ; SSE-NEXT:    retq
    477 ;
    478 ; AVX-LABEL: shuffle_v2i64_13_copy:
    479 ; AVX:       # BB#0:
    480 ; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
    481 ; AVX-NEXT:    retq
    482   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    483   ret <2 x i64> %shuffle
    484 }
    485 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
    486 ; SSE-LABEL: shuffle_v2i64_20:
    487 ; SSE:       # BB#0:
    488 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    489 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    490 ; SSE-NEXT:    retq
    491 ;
    492 ; AVX-LABEL: shuffle_v2i64_20:
    493 ; AVX:       # BB#0:
    494 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    495 ; AVX-NEXT:    retq
    496   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
    497   ret <2 x i64> %shuffle
    498 }
    499 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    500 ; SSE-LABEL: shuffle_v2i64_20_copy:
    501 ; SSE:       # BB#0:
    502 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
    503 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    504 ; SSE-NEXT:    retq
    505 ;
    506 ; AVX-LABEL: shuffle_v2i64_20_copy:
    507 ; AVX:       # BB#0:
    508 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
    509 ; AVX-NEXT:    retq
    510   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
    511   ret <2 x i64> %shuffle
    512 }
    513 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
    514 ; SSE2-LABEL: shuffle_v2i64_21:
    515 ; SSE2:       # BB#0:
    516 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    517 ; SSE2-NEXT:    retq
    518 ;
    519 ; SSE3-LABEL: shuffle_v2i64_21:
    520 ; SSE3:       # BB#0:
    521 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    522 ; SSE3-NEXT:    retq
    523 ;
    524 ; SSSE3-LABEL: shuffle_v2i64_21:
    525 ; SSSE3:       # BB#0:
    526 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    527 ; SSSE3-NEXT:    retq
    528 ;
    529 ; SSE41-LABEL: shuffle_v2i64_21:
    530 ; SSE41:       # BB#0:
    531 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    532 ; SSE41-NEXT:    retq
    533 ;
    534 ; AVX1-LABEL: shuffle_v2i64_21:
    535 ; AVX1:       # BB#0:
    536 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    537 ; AVX1-NEXT:    retq
    538 ;
    539 ; AVX2-LABEL: shuffle_v2i64_21:
    540 ; AVX2:       # BB#0:
    541 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    542 ; AVX2-NEXT:    retq
    543 ;
    544 ; AVX512VL-LABEL: shuffle_v2i64_21:
    545 ; AVX512VL:       # BB#0:
    546 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    547 ; AVX512VL-NEXT:    retq
    548   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
    549   ret <2 x i64> %shuffle
    550 }
    551 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    552 ; SSE2-LABEL: shuffle_v2i64_21_copy:
    553 ; SSE2:       # BB#0:
    554 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    555 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    556 ; SSE2-NEXT:    retq
    557 ;
    558 ; SSE3-LABEL: shuffle_v2i64_21_copy:
    559 ; SSE3:       # BB#0:
    560 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    561 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    562 ; SSE3-NEXT:    retq
    563 ;
    564 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
    565 ; SSSE3:       # BB#0:
    566 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    567 ; SSSE3-NEXT:    movapd %xmm1, %xmm0
    568 ; SSSE3-NEXT:    retq
    569 ;
    570 ; SSE41-LABEL: shuffle_v2i64_21_copy:
    571 ; SSE41:       # BB#0:
    572 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
    573 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    574 ; SSE41-NEXT:    retq
    575 ;
    576 ; AVX1-LABEL: shuffle_v2i64_21_copy:
    577 ; AVX1:       # BB#0:
    578 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
    579 ; AVX1-NEXT:    retq
    580 ;
    581 ; AVX2-LABEL: shuffle_v2i64_21_copy:
    582 ; AVX2:       # BB#0:
    583 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
    584 ; AVX2-NEXT:    retq
    585 ;
    586 ; AVX512VL-LABEL: shuffle_v2i64_21_copy:
    587 ; AVX512VL:       # BB#0:
    588 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
    589 ; AVX512VL-NEXT:    retq
    590   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
    591   ret <2 x i64> %shuffle
    592 }
    593 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
    594 ; SSE2-LABEL: shuffle_v2i64_30:
    595 ; SSE2:       # BB#0:
    596 ; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
    597 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    598 ; SSE2-NEXT:    retq
    599 ;
    600 ; SSE3-LABEL: shuffle_v2i64_30:
    601 ; SSE3:       # BB#0:
    602 ; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
    603 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    604 ; SSE3-NEXT:    retq
    605 ;
    606 ; SSSE3-LABEL: shuffle_v2i64_30:
    607 ; SSSE3:       # BB#0:
    608 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
    609 ; SSSE3-NEXT:    retq
    610 ;
    611 ; SSE41-LABEL: shuffle_v2i64_30:
    612 ; SSE41:       # BB#0:
    613 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
    614 ; SSE41-NEXT:    retq
    615 ;
    616 ; AVX-LABEL: shuffle_v2i64_30:
    617 ; AVX:       # BB#0:
    618 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
    619 ; AVX-NEXT:    retq
    620   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
    621   ret <2 x i64> %shuffle
    622 }
    623 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    624 ; SSE2-LABEL: shuffle_v2i64_30_copy:
    625 ; SSE2:       # BB#0:
    626 ; SSE2-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
    627 ; SSE2-NEXT:    movapd %xmm2, %xmm0
    628 ; SSE2-NEXT:    retq
    629 ;
    630 ; SSE3-LABEL: shuffle_v2i64_30_copy:
    631 ; SSE3:       # BB#0:
    632 ; SSE3-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
    633 ; SSE3-NEXT:    movapd %xmm2, %xmm0
    634 ; SSE3-NEXT:    retq
    635 ;
    636 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
    637 ; SSSE3:       # BB#0:
    638 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    639 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    640 ; SSSE3-NEXT:    retq
    641 ;
    642 ; SSE41-LABEL: shuffle_v2i64_30_copy:
    643 ; SSE41:       # BB#0:
    644 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    645 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    646 ; SSE41-NEXT:    retq
    647 ;
    648 ; AVX-LABEL: shuffle_v2i64_30_copy:
    649 ; AVX:       # BB#0:
    650 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    651 ; AVX-NEXT:    retq
    652   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
    653   ret <2 x i64> %shuffle
    654 }
    655 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
    656 ; SSE-LABEL: shuffle_v2i64_31:
    657 ; SSE:       # BB#0:
    658 ; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
    659 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    660 ; SSE-NEXT:    retq
    661 ;
    662 ; AVX-LABEL: shuffle_v2i64_31:
    663 ; AVX:       # BB#0:
    664 ; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
    665 ; AVX-NEXT:    retq
    666   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
    667   ret <2 x i64> %shuffle
    668 }
    669 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    670 ; SSE-LABEL: shuffle_v2i64_31_copy:
    671 ; SSE:       # BB#0:
    672 ; SSE-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
    673 ; SSE-NEXT:    movdqa %xmm2, %xmm0
    674 ; SSE-NEXT:    retq
    675 ;
    676 ; AVX-LABEL: shuffle_v2i64_31_copy:
    677 ; AVX:       # BB#0:
    678 ; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
    679 ; AVX-NEXT:    retq
    680   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
    681   ret <2 x i64> %shuffle
    682 }
    683 
    684 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
    685 ; SSE-LABEL: shuffle_v2i64_0z:
    686 ; SSE:       # BB#0:
    687 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    688 ; SSE-NEXT:    retq
    689 ;
    690 ; AVX-LABEL: shuffle_v2i64_0z:
    691 ; AVX:       # BB#0:
    692 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
    693 ; AVX-NEXT:    retq
    694   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
    695   ret <2 x i64> %shuffle
    696 }
    697 
    698 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
    699 ; SSE-LABEL: shuffle_v2i64_1z:
    700 ; SSE:       # BB#0:
    701 ; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
    702 ; SSE-NEXT:    retq
    703 ;
    704 ; AVX-LABEL: shuffle_v2i64_1z:
    705 ; AVX:       # BB#0:
    706 ; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
    707 ; AVX-NEXT:    retq
    708   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
    709   ret <2 x i64> %shuffle
    710 }
    711 
    712 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
    713 ; SSE-LABEL: shuffle_v2i64_z0:
    714 ; SSE:       # BB#0:
    715 ; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
    716 ; SSE-NEXT:    retq
    717 ;
    718 ; AVX-LABEL: shuffle_v2i64_z0:
    719 ; AVX:       # BB#0:
    720 ; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
    721 ; AVX-NEXT:    retq
    722   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
    723   ret <2 x i64> %shuffle
    724 }
    725 
    726 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
    727 ; SSE2-LABEL: shuffle_v2i64_z1:
    728 ; SSE2:       # BB#0:
    729 ; SSE2-NEXT:    xorpd %xmm1, %xmm1
    730 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    731 ; SSE2-NEXT:    retq
    732 ;
    733 ; SSE3-LABEL: shuffle_v2i64_z1:
    734 ; SSE3:       # BB#0:
    735 ; SSE3-NEXT:    xorpd %xmm1, %xmm1
    736 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    737 ; SSE3-NEXT:    retq
    738 ;
    739 ; SSSE3-LABEL: shuffle_v2i64_z1:
    740 ; SSSE3:       # BB#0:
    741 ; SSSE3-NEXT:    xorpd %xmm1, %xmm1
    742 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    743 ; SSSE3-NEXT:    retq
    744 ;
    745 ; SSE41-LABEL: shuffle_v2i64_z1:
    746 ; SSE41:       # BB#0:
    747 ; SSE41-NEXT:    pxor %xmm1, %xmm1
    748 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    749 ; SSE41-NEXT:    retq
    750 ;
    751 ; AVX1-LABEL: shuffle_v2i64_z1:
    752 ; AVX1:       # BB#0:
    753 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    754 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
    755 ; AVX1-NEXT:    retq
    756 ;
    757 ; AVX2-LABEL: shuffle_v2i64_z1:
    758 ; AVX2:       # BB#0:
    759 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    760 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    761 ; AVX2-NEXT:    retq
    762 ;
    763 ; AVX512VL-LABEL: shuffle_v2i64_z1:
    764 ; AVX512VL:       # BB#0:
    765 ; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    766 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    767 ; AVX512VL-NEXT:    retq
    768   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
    769   ret <2 x i64> %shuffle
    770 }
    771 
    772 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
    773 ; SSE-LABEL: shuffle_v2f64_0z:
    774 ; SSE:       # BB#0:
    775 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    776 ; SSE-NEXT:    retq
    777 ;
    778 ; AVX-LABEL: shuffle_v2f64_0z:
    779 ; AVX:       # BB#0:
    780 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
    781 ; AVX-NEXT:    retq
    782   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
    783   ret <2 x double> %shuffle
    784 }
    785 
    786 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
    787 ; SSE-LABEL: shuffle_v2f64_1z:
    788 ; SSE:       # BB#0:
    789 ; SSE-NEXT:    xorpd %xmm1, %xmm1
    790 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    791 ; SSE-NEXT:    retq
    792 ;
    793 ; AVX1-LABEL: shuffle_v2f64_1z:
    794 ; AVX1:       # BB#0:
    795 ; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    796 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    797 ; AVX1-NEXT:    retq
    798 ;
    799 ; AVX2-LABEL: shuffle_v2f64_1z:
    800 ; AVX2:       # BB#0:
    801 ; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    802 ; AVX2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    803 ; AVX2-NEXT:    retq
    804 ;
    805 ; AVX512VL-LABEL: shuffle_v2f64_1z:
    806 ; AVX512VL:       # BB#0:
    807 ; AVX512VL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    808 ; AVX512VL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    809 ; AVX512VL-NEXT:    retq
    810   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
    811   ret <2 x double> %shuffle
    812 }
    813 
    814 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
    815 ; SSE-LABEL: shuffle_v2f64_z0:
    816 ; SSE:       # BB#0:
    817 ; SSE-NEXT:    xorpd %xmm1, %xmm1
    818 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    819 ; SSE-NEXT:    movapd %xmm1, %xmm0
    820 ; SSE-NEXT:    retq
    821 ;
    822 ; AVX1-LABEL: shuffle_v2f64_z0:
    823 ; AVX1:       # BB#0:
    824 ; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    825 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    826 ; AVX1-NEXT:    retq
    827 ;
    828 ; AVX2-LABEL: shuffle_v2f64_z0:
    829 ; AVX2:       # BB#0:
    830 ; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    831 ; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    832 ; AVX2-NEXT:    retq
    833 ;
    834 ; AVX512VL-LABEL: shuffle_v2f64_z0:
    835 ; AVX512VL:       # BB#0:
    836 ; AVX512VL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    837 ; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    838 ; AVX512VL-NEXT:    retq
    839   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
    840   ret <2 x double> %shuffle
    841 }
    842 
    843 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
    844 ; SSE2-LABEL: shuffle_v2f64_z1:
    845 ; SSE2:       # BB#0:
    846 ; SSE2-NEXT:    xorpd %xmm1, %xmm1
    847 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    848 ; SSE2-NEXT:    retq
    849 ;
    850 ; SSE3-LABEL: shuffle_v2f64_z1:
    851 ; SSE3:       # BB#0:
    852 ; SSE3-NEXT:    xorpd %xmm1, %xmm1
    853 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    854 ; SSE3-NEXT:    retq
    855 ;
    856 ; SSSE3-LABEL: shuffle_v2f64_z1:
    857 ; SSSE3:       # BB#0:
    858 ; SSSE3-NEXT:    xorpd %xmm1, %xmm1
    859 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    860 ; SSSE3-NEXT:    retq
    861 ;
    862 ; SSE41-LABEL: shuffle_v2f64_z1:
    863 ; SSE41:       # BB#0:
    864 ; SSE41-NEXT:    xorpd %xmm1, %xmm1
    865 ; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    866 ; SSE41-NEXT:    retq
    867 ;
    868 ; AVX-LABEL: shuffle_v2f64_z1:
    869 ; AVX:       # BB#0:
    870 ; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    871 ; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    872 ; AVX-NEXT:    retq
    873   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
    874   ret <2 x double> %shuffle
    875 }
    876 
    877 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
    878 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
    879 ; SSE:       # BB#0:
    880 ; SSE-NEXT:    xorpd %xmm1, %xmm1
    881 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    882 ; SSE-NEXT:    retq
    883 ;
    884 ; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
    885 ; AVX1:       # BB#0:
    886 ; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    887 ; AVX1-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    888 ; AVX1-NEXT:    retq
    889 ;
    890 ; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
    891 ; AVX2:       # BB#0:
    892 ; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    893 ; AVX2-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    894 ; AVX2-NEXT:    retq
    895 ;
    896 ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
    897 ; AVX512VL:       # BB#0:
    898 ; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    899 ; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    900 ; AVX512VL-NEXT:    retq
    901   %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
    902   %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
    903   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
    904   %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
    905   ret <2 x double> %bitcast64
    906 }
    907 
    908 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
    909 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
    910 ; SSE2:       # BB#0:
    911 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    912 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    913 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
    914 ; SSE2-NEXT:    retq
    915 ;
    916 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
    917 ; SSE3:       # BB#0:
    918 ; SSE3-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    919 ; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    920 ; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
    921 ; SSE3-NEXT:    retq
    922 ;
    923 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
    924 ; SSSE3:       # BB#0:
    925 ; SSSE3-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    926 ; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    927 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
    928 ; SSSE3-NEXT:    retq
    929 ;
    930 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
    931 ; SSE41:       # BB#0:
    932 ; SSE41-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    933 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    934 ; SSE41-NEXT:    xorps %xmm1, %xmm1
    935 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
    936 ; SSE41-NEXT:    retq
    937 ;
    938 ; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
    939 ; AVX1:       # BB#0:
    940 ; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    941 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    942 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    943 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
    944 ; AVX1-NEXT:    retq
    945 ;
    946 ; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
    947 ; AVX2:       # BB#0:
    948 ; AVX2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    949 ; AVX2-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    950 ; AVX2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    951 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    952 ; AVX2-NEXT:    retq
    953 ;
    954 ; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
    955 ; AVX512VL:       # BB#0:
    956 ; AVX512VL-NEXT:    vmovss {{.*}}(%rip), %xmm1
    957 ; AVX512VL-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    958 ; AVX512VL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    959 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    960 ; AVX512VL-NEXT:    retq
    961   %bitcast32 = bitcast <2 x i64> %x to <4 x float>
    962   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
    963   %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
    964   %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
    965   ret <2 x i64> %and
    966 }
    967 
    968 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
    969 ; SSE-LABEL: insert_reg_and_zero_v2i64:
    970 ; SSE:       # BB#0:
    971 ; SSE-NEXT:    movd %rdi, %xmm0
    972 ; SSE-NEXT:    retq
    973 ;
    974 ; AVX-LABEL: insert_reg_and_zero_v2i64:
    975 ; AVX:       # BB#0:
    976 ; AVX-NEXT:    vmovq %rdi, %xmm0
    977 ; AVX-NEXT:    retq
    978   %v = insertelement <2 x i64> undef, i64 %a, i32 0
    979   %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
    980   ret <2 x i64> %shuffle
    981 }
    982 
    983 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
    984 ; SSE-LABEL: insert_mem_and_zero_v2i64:
    985 ; SSE:       # BB#0:
    986 ; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    987 ; SSE-NEXT:    retq
    988 ;
    989 ; AVX1-LABEL: insert_mem_and_zero_v2i64:
    990 ; AVX1:       # BB#0:
    991 ; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    992 ; AVX1-NEXT:    retq
    993 ;
    994 ; AVX2-LABEL: insert_mem_and_zero_v2i64:
    995 ; AVX2:       # BB#0:
    996 ; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    997 ; AVX2-NEXT:    retq
    998 ;
    999 ; AVX512VL-LABEL: insert_mem_and_zero_v2i64:
   1000 ; AVX512VL:       # BB#0:
   1001 ; AVX512VL-NEXT:    vmovq (%rdi), %xmm0
   1002 ; AVX512VL-NEXT:    retq
   1003   %a = load i64, i64* %ptr
   1004   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   1005   %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
   1006   ret <2 x i64> %shuffle
   1007 }
   1008 
   1009 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
   1010 ; SSE-LABEL: insert_reg_and_zero_v2f64:
   1011 ; SSE:       # BB#0:
   1012 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
   1013 ; SSE-NEXT:    retq
   1014 ;
   1015 ; AVX-LABEL: insert_reg_and_zero_v2f64:
   1016 ; AVX:       # BB#0:
   1017 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
   1018 ; AVX-NEXT:    retq
   1019   %v = insertelement <2 x double> undef, double %a, i32 0
   1020   %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
   1021   ret <2 x double> %shuffle
   1022 }
   1023 
   1024 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
   1025 ; SSE-LABEL: insert_mem_and_zero_v2f64:
   1026 ; SSE:       # BB#0:
   1027 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1028 ; SSE-NEXT:    retq
   1029 ;
   1030 ; AVX1-LABEL: insert_mem_and_zero_v2f64:
   1031 ; AVX1:       # BB#0:
   1032 ; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1033 ; AVX1-NEXT:    retq
   1034 ;
   1035 ; AVX2-LABEL: insert_mem_and_zero_v2f64:
   1036 ; AVX2:       # BB#0:
   1037 ; AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1038 ; AVX2-NEXT:    retq
   1039 ;
   1040 ; AVX512VL-LABEL: insert_mem_and_zero_v2f64:
   1041 ; AVX512VL:       # BB#0:
   1042 ; AVX512VL-NEXT:    vmovsd (%rdi), %xmm0
   1043 ; AVX512VL-NEXT:    retq
   1044   %a = load double, double* %ptr
   1045   %v = insertelement <2 x double> undef, double %a, i32 0
   1046   %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
   1047   ret <2 x double> %shuffle
   1048 }
   1049 
   1050 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
   1051 ; SSE2-LABEL: insert_reg_lo_v2i64:
   1052 ; SSE2:       # BB#0:
   1053 ; SSE2-NEXT:    movd %rdi, %xmm1
   1054 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
   1055 ; SSE2-NEXT:    retq
   1056 ;
   1057 ; SSE3-LABEL: insert_reg_lo_v2i64:
   1058 ; SSE3:       # BB#0:
   1059 ; SSE3-NEXT:    movd %rdi, %xmm1
   1060 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
   1061 ; SSE3-NEXT:    retq
   1062 ;
   1063 ; SSSE3-LABEL: insert_reg_lo_v2i64:
   1064 ; SSSE3:       # BB#0:
   1065 ; SSSE3-NEXT:    movd %rdi, %xmm1
   1066 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
   1067 ; SSSE3-NEXT:    retq
   1068 ;
   1069 ; SSE41-LABEL: insert_reg_lo_v2i64:
   1070 ; SSE41:       # BB#0:
   1071 ; SSE41-NEXT:    movd %rdi, %xmm1
   1072 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
   1073 ; SSE41-NEXT:    retq
   1074 ;
   1075 ; AVX1-LABEL: insert_reg_lo_v2i64:
   1076 ; AVX1:       # BB#0:
   1077 ; AVX1-NEXT:    vmovq %rdi, %xmm1
   1078 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
   1079 ; AVX1-NEXT:    retq
   1080 ;
   1081 ; AVX2-LABEL: insert_reg_lo_v2i64:
   1082 ; AVX2:       # BB#0:
   1083 ; AVX2-NEXT:    vmovq %rdi, %xmm1
   1084 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
   1085 ; AVX2-NEXT:    retq
   1086 ;
   1087 ; AVX512VL-LABEL: insert_reg_lo_v2i64:
   1088 ; AVX512VL:       # BB#0:
   1089 ; AVX512VL-NEXT:    vmovq %rdi, %xmm1
   1090 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
   1091 ; AVX512VL-NEXT:    retq
   1092   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   1093   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
   1094   ret <2 x i64> %shuffle
   1095 }
   1096 
   1097 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
   1098 ; SSE2-LABEL: insert_mem_lo_v2i64:
   1099 ; SSE2:       # BB#0:
   1100 ; SSE2-NEXT:    movlpd (%rdi), %xmm0
   1101 ; SSE2-NEXT:    retq
   1102 ;
   1103 ; SSE3-LABEL: insert_mem_lo_v2i64:
   1104 ; SSE3:       # BB#0:
   1105 ; SSE3-NEXT:    movlpd (%rdi), %xmm0
   1106 ; SSE3-NEXT:    retq
   1107 ;
   1108 ; SSSE3-LABEL: insert_mem_lo_v2i64:
   1109 ; SSSE3:       # BB#0:
   1110 ; SSSE3-NEXT:    movlpd (%rdi), %xmm0
   1111 ; SSSE3-NEXT:    retq
   1112 ;
   1113 ; SSE41-LABEL: insert_mem_lo_v2i64:
   1114 ; SSE41:       # BB#0:
   1115 ; SSE41-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
   1116 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
   1117 ; SSE41-NEXT:    retq
   1118 ;
   1119 ; AVX1-LABEL: insert_mem_lo_v2i64:
   1120 ; AVX1:       # BB#0:
   1121 ; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
   1122 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
   1123 ; AVX1-NEXT:    retq
   1124 ;
   1125 ; AVX2-LABEL: insert_mem_lo_v2i64:
   1126 ; AVX2:       # BB#0:
   1127 ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
   1128 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
   1129 ; AVX2-NEXT:    retq
   1130 ;
   1131 ; AVX512VL-LABEL: insert_mem_lo_v2i64:
   1132 ; AVX512VL:       # BB#0:
   1133 ; AVX512VL-NEXT:    vmovq (%rdi), %xmm1
   1134 ; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
   1135 ; AVX512VL-NEXT:    retq
   1136   %a = load i64, i64* %ptr
   1137   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   1138   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
   1139   ret <2 x i64> %shuffle
   1140 }
   1141 
   1142 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
   1143 ; SSE-LABEL: insert_reg_hi_v2i64:
   1144 ; SSE:       # BB#0:
   1145 ; SSE-NEXT:    movd %rdi, %xmm1
   1146 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1147 ; SSE-NEXT:    retq
   1148 ;
   1149 ; AVX-LABEL: insert_reg_hi_v2i64:
   1150 ; AVX:       # BB#0:
   1151 ; AVX-NEXT:    vmovq %rdi, %xmm1
   1152 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1153 ; AVX-NEXT:    retq
   1154   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   1155   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
   1156   ret <2 x i64> %shuffle
   1157 }
   1158 
   1159 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
   1160 ; SSE-LABEL: insert_mem_hi_v2i64:
   1161 ; SSE:       # BB#0:
   1162 ; SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
   1163 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1164 ; SSE-NEXT:    retq
   1165 ;
   1166 ; AVX1-LABEL: insert_mem_hi_v2i64:
   1167 ; AVX1:       # BB#0:
   1168 ; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
   1169 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1170 ; AVX1-NEXT:    retq
   1171 ;
   1172 ; AVX2-LABEL: insert_mem_hi_v2i64:
   1173 ; AVX2:       # BB#0:
   1174 ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
   1175 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1176 ; AVX2-NEXT:    retq
   1177 ;
   1178 ; AVX512VL-LABEL: insert_mem_hi_v2i64:
   1179 ; AVX512VL:       # BB#0:
   1180 ; AVX512VL-NEXT:    vmovq (%rdi), %xmm1
   1181 ; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1182 ; AVX512VL-NEXT:    retq
   1183   %a = load i64, i64* %ptr
   1184   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   1185   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
   1186   ret <2 x i64> %shuffle
   1187 }
   1188 
   1189 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
   1190 ; SSE-LABEL: insert_reg_lo_v2f64:
   1191 ; SSE:       # BB#0:
   1192 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
   1193 ; SSE-NEXT:    movapd %xmm1, %xmm0
   1194 ; SSE-NEXT:    retq
   1195 ;
   1196 ; AVX1-LABEL: insert_reg_lo_v2f64:
   1197 ; AVX1:       # BB#0:
   1198 ; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
   1199 ; AVX1-NEXT:    retq
   1200 ;
   1201 ; AVX2-LABEL: insert_reg_lo_v2f64:
   1202 ; AVX2:       # BB#0:
   1203 ; AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
   1204 ; AVX2-NEXT:    retq
   1205 ;
   1206 ; AVX512VL-LABEL: insert_reg_lo_v2f64:
   1207 ; AVX512VL:       # BB#0:
   1208 ; AVX512VL-NEXT:    vmovsd %xmm0, %xmm1, %xmm0
   1209 ; AVX512VL-NEXT:    retq
   1210   %v = insertelement <2 x double> undef, double %a, i32 0
   1211   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
   1212   ret <2 x double> %shuffle
   1213 }
   1214 
   1215 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
   1216 ; SSE-LABEL: insert_mem_lo_v2f64:
   1217 ; SSE:       # BB#0:
   1218 ; SSE-NEXT:    movlpd (%rdi), %xmm0
   1219 ; SSE-NEXT:    retq
   1220 ;
   1221 ; AVX-LABEL: insert_mem_lo_v2f64:
   1222 ; AVX:       # BB#0:
   1223 ; AVX-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0
   1224 ; AVX-NEXT:    retq
   1225   %a = load double, double* %ptr
   1226   %v = insertelement <2 x double> undef, double %a, i32 0
   1227   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
   1228   ret <2 x double> %shuffle
   1229 }
   1230 
   1231 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
   1232 ; SSE-LABEL: insert_reg_hi_v2f64:
   1233 ; SSE:       # BB#0:
   1234 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
   1235 ; SSE-NEXT:    movapd %xmm1, %xmm0
   1236 ; SSE-NEXT:    retq
   1237 ;
   1238 ; AVX-LABEL: insert_reg_hi_v2f64:
   1239 ; AVX:       # BB#0:
   1240 ; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
   1241 ; AVX-NEXT:    retq
   1242   %v = insertelement <2 x double> undef, double %a, i32 0
   1243   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
   1244   ret <2 x double> %shuffle
   1245 }
   1246 
   1247 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
   1248 ; SSE-LABEL: insert_mem_hi_v2f64:
   1249 ; SSE:       # BB#0:
   1250 ; SSE-NEXT:    movhpd (%rdi), %xmm0
   1251 ; SSE-NEXT:    retq
   1252 ;
   1253 ; AVX-LABEL: insert_mem_hi_v2f64:
   1254 ; AVX:       # BB#0:
   1255 ; AVX-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0
   1256 ; AVX-NEXT:    retq
   1257   %a = load double, double* %ptr
   1258   %v = insertelement <2 x double> undef, double %a, i32 0
   1259   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
   1260   ret <2 x double> %shuffle
   1261 }
   1262 
   1263 define <2 x double> @insert_dup_reg_v2f64(double %a) {
   1264 ; SSE2-LABEL: insert_dup_reg_v2f64:
   1265 ; SSE2:       # BB#0:
   1266 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
   1267 ; SSE2-NEXT:    retq
   1268 ;
   1269 ; SSE3-LABEL: insert_dup_reg_v2f64:
   1270 ; SSE3:       # BB#0:
   1271 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
   1272 ; SSE3-NEXT:    retq
   1273 ;
   1274 ; SSSE3-LABEL: insert_dup_reg_v2f64:
   1275 ; SSSE3:       # BB#0:
   1276 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
   1277 ; SSSE3-NEXT:    retq
   1278 ;
   1279 ; SSE41-LABEL: insert_dup_reg_v2f64:
   1280 ; SSE41:       # BB#0:
   1281 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
   1282 ; SSE41-NEXT:    retq
   1283 ;
   1284 ; AVX-LABEL: insert_dup_reg_v2f64:
   1285 ; AVX:       # BB#0:
   1286 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
   1287 ; AVX-NEXT:    retq
   1288   %v = insertelement <2 x double> undef, double %a, i32 0
   1289   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   1290   ret <2 x double> %shuffle
   1291 }
   1292 
   1293 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
   1294 ; SSE2-LABEL: insert_dup_mem_v2f64:
   1295 ; SSE2:       # BB#0:
   1296 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1297 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
   1298 ; SSE2-NEXT:    retq
   1299 ;
   1300 ; SSE3-LABEL: insert_dup_mem_v2f64:
   1301 ; SSE3:       # BB#0:
   1302 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1303 ; SSE3-NEXT:    retq
   1304 ;
   1305 ; SSSE3-LABEL: insert_dup_mem_v2f64:
   1306 ; SSSE3:       # BB#0:
   1307 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1308 ; SSSE3-NEXT:    retq
   1309 ;
   1310 ; SSE41-LABEL: insert_dup_mem_v2f64:
   1311 ; SSE41:       # BB#0:
   1312 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1313 ; SSE41-NEXT:    retq
   1314 ;
   1315 ; AVX-LABEL: insert_dup_mem_v2f64:
   1316 ; AVX:       # BB#0:
   1317 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
   1318 ; AVX-NEXT:    retq
   1319   %a = load double, double* %ptr
   1320   %v = insertelement <2 x double> undef, double %a, i32 0
   1321   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   1322   ret <2 x double> %shuffle
   1323 }
   1324 
   1325 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
   1326 ; SSE2-LABEL: insert_dup_mem128_v2f64:
   1327 ; SSE2:       # BB#0:
   1328 ; SSE2-NEXT:    movaps (%rdi), %xmm0
   1329 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
   1330 ; SSE2-NEXT:    retq
   1331 ;
   1332 ; SSE3-LABEL: insert_dup_mem128_v2f64:
   1333 ; SSE3:       # BB#0:
   1334 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1335 ; SSE3-NEXT:    retq
   1336 ;
   1337 ; SSSE3-LABEL: insert_dup_mem128_v2f64:
   1338 ; SSSE3:       # BB#0:
   1339 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1340 ; SSSE3-NEXT:    retq
   1341 ;
   1342 ; SSE41-LABEL: insert_dup_mem128_v2f64:
   1343 ; SSE41:       # BB#0:
   1344 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1345 ; SSE41-NEXT:    retq
   1346 ;
   1347 ; AVX-LABEL: insert_dup_mem128_v2f64:
   1348 ; AVX:       # BB#0:
   1349 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
   1350 ; AVX-NEXT:    retq
   1351   %v = load  <2 x double>,  <2 x double>* %ptr
   1352   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   1353   ret <2 x double> %shuffle
   1354 }
   1355 
   1356 
   1357 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
   1358 ; SSE-LABEL: insert_dup_mem_v2i64:
   1359 ; SSE:       # BB#0:
   1360 ; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
   1361 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   1362 ; SSE-NEXT:    retq
   1363 ;
   1364 ; AVX1-LABEL: insert_dup_mem_v2i64:
   1365 ; AVX1:       # BB#0:
   1366 ; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   1367 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   1368 ; AVX1-NEXT:    retq
   1369 ;
   1370 ; AVX2-LABEL: insert_dup_mem_v2i64:
   1371 ; AVX2:       # BB#0:
   1372 ; AVX2-NEXT:    vpbroadcastq (%rdi), %xmm0
   1373 ; AVX2-NEXT:    retq
   1374 ;
   1375 ; AVX512VL-LABEL: insert_dup_mem_v2i64:
   1376 ; AVX512VL:       # BB#0:
   1377 ; AVX512VL-NEXT:    vpbroadcastq (%rdi), %xmm0
   1378 ; AVX512VL-NEXT:    retq
   1379   %tmp = load i64, i64* %ptr, align 1
   1380   %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
   1381   %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
   1382   ret <2 x i64> %tmp2
   1383 }
   1384 
   1385 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
   1386 ; SSE-LABEL: shuffle_mem_v2f64_10:
   1387 ; SSE:       # BB#0:
   1388 ; SSE-NEXT:    movapd (%rdi), %xmm0
   1389 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
   1390 ; SSE-NEXT:    retq
   1391 ;
   1392 ; AVX-LABEL: shuffle_mem_v2f64_10:
   1393 ; AVX:       # BB#0:
   1394 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = mem[1,0]
   1395 ; AVX-NEXT:    retq
   1396 
   1397   %a = load <2 x double>, <2 x double>* %ptr
   1398   %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   1399   ret <2 x double> %shuffle
   1400 }
   1401