Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
      9 
     10 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
     11 ; SSE-LABEL: shuffle_v2i64_00:
     12 ; SSE:       # %bb.0:
     13 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
     14 ; SSE-NEXT:    retq
     15 ;
     16 ; AVX1-LABEL: shuffle_v2i64_00:
     17 ; AVX1:       # %bb.0:
     18 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
     19 ; AVX1-NEXT:    retq
     20 ;
     21 ; AVX2-LABEL: shuffle_v2i64_00:
     22 ; AVX2:       # %bb.0:
     23 ; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
     24 ; AVX2-NEXT:    retq
     25 ;
     26 ; AVX512VL-LABEL: shuffle_v2i64_00:
     27 ; AVX512VL:       # %bb.0:
     28 ; AVX512VL-NEXT:    vpbroadcastq %xmm0, %xmm0
     29 ; AVX512VL-NEXT:    retq
     30   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
     31   ret <2 x i64> %shuffle
     32 }
     33 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
     34 ; SSE-LABEL: shuffle_v2i64_10:
     35 ; SSE:       # %bb.0:
     36 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     37 ; SSE-NEXT:    retq
     38 ;
     39 ; AVX-LABEL: shuffle_v2i64_10:
     40 ; AVX:       # %bb.0:
     41 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
     42 ; AVX-NEXT:    retq
     43   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
     44   ret <2 x i64> %shuffle
     45 }
     46 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
     47 ; SSE-LABEL: shuffle_v2i64_11:
     48 ; SSE:       # %bb.0:
     49 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
     50 ; SSE-NEXT:    retq
     51 ;
     52 ; AVX-LABEL: shuffle_v2i64_11:
     53 ; AVX:       # %bb.0:
     54 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
     55 ; AVX-NEXT:    retq
     56   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
     57   ret <2 x i64> %shuffle
     58 }
     59 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
     60 ; SSE-LABEL: shuffle_v2i64_22:
     61 ; SSE:       # %bb.0:
     62 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
     63 ; SSE-NEXT:    retq
     64 ;
     65 ; AVX1-LABEL: shuffle_v2i64_22:
     66 ; AVX1:       # %bb.0:
     67 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[0,1,0,1]
     68 ; AVX1-NEXT:    retq
     69 ;
     70 ; AVX2-LABEL: shuffle_v2i64_22:
     71 ; AVX2:       # %bb.0:
     72 ; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm0
     73 ; AVX2-NEXT:    retq
     74 ;
     75 ; AVX512VL-LABEL: shuffle_v2i64_22:
     76 ; AVX512VL:       # %bb.0:
     77 ; AVX512VL-NEXT:    vpbroadcastq %xmm1, %xmm0
     78 ; AVX512VL-NEXT:    retq
     79   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
     80   ret <2 x i64> %shuffle
     81 }
     82 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
     83 ; SSE-LABEL: shuffle_v2i64_32:
     84 ; SSE:       # %bb.0:
     85 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
     86 ; SSE-NEXT:    retq
     87 ;
     88 ; AVX-LABEL: shuffle_v2i64_32:
     89 ; AVX:       # %bb.0:
     90 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[2,3,0,1]
     91 ; AVX-NEXT:    retq
     92   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
     93   ret <2 x i64> %shuffle
     94 }
     95 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
     96 ; SSE-LABEL: shuffle_v2i64_33:
     97 ; SSE:       # %bb.0:
     98 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
     99 ; SSE-NEXT:    retq
    100 ;
    101 ; AVX-LABEL: shuffle_v2i64_33:
    102 ; AVX:       # %bb.0:
    103 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[2,3,2,3]
    104 ; AVX-NEXT:    retq
    105   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
    106   ret <2 x i64> %shuffle
    107 }
    108 
    109 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
    110 ; SSE2-LABEL: shuffle_v2f64_00:
    111 ; SSE2:       # %bb.0:
    112 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
    113 ; SSE2-NEXT:    retq
    114 ;
    115 ; SSE3-LABEL: shuffle_v2f64_00:
    116 ; SSE3:       # %bb.0:
    117 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
    118 ; SSE3-NEXT:    retq
    119 ;
    120 ; SSSE3-LABEL: shuffle_v2f64_00:
    121 ; SSSE3:       # %bb.0:
    122 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
    123 ; SSSE3-NEXT:    retq
    124 ;
    125 ; SSE41-LABEL: shuffle_v2f64_00:
    126 ; SSE41:       # %bb.0:
    127 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
    128 ; SSE41-NEXT:    retq
    129 ;
    130 ; AVX-LABEL: shuffle_v2f64_00:
    131 ; AVX:       # %bb.0:
    132 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    133 ; AVX-NEXT:    retq
    134   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
    135   ret <2 x double> %shuffle
    136 }
    137 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
    138 ; SSE-LABEL: shuffle_v2f64_10:
    139 ; SSE:       # %bb.0:
    140 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
    141 ; SSE-NEXT:    retq
    142 ;
    143 ; AVX-LABEL: shuffle_v2f64_10:
    144 ; AVX:       # %bb.0:
    145 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
    146 ; AVX-NEXT:    retq
    147 
    148   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
    149   ret <2 x double> %shuffle
    150 }
    151 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
    152 ; SSE-LABEL: shuffle_v2f64_11:
    153 ; SSE:       # %bb.0:
    154 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
    155 ; SSE-NEXT:    retq
    156 ;
    157 ; AVX-LABEL: shuffle_v2f64_11:
    158 ; AVX:       # %bb.0:
    159 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
    160 ; AVX-NEXT:    retq
    161   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
    162   ret <2 x double> %shuffle
    163 }
    164 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
    165 ; SSE2-LABEL: shuffle_v2f64_22:
    166 ; SSE2:       # %bb.0:
    167 ; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0,0]
    168 ; SSE2-NEXT:    movaps %xmm1, %xmm0
    169 ; SSE2-NEXT:    retq
    170 ;
    171 ; SSE3-LABEL: shuffle_v2f64_22:
    172 ; SSE3:       # %bb.0:
    173 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
    174 ; SSE3-NEXT:    retq
    175 ;
    176 ; SSSE3-LABEL: shuffle_v2f64_22:
    177 ; SSSE3:       # %bb.0:
    178 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
    179 ; SSSE3-NEXT:    retq
    180 ;
    181 ; SSE41-LABEL: shuffle_v2f64_22:
    182 ; SSE41:       # %bb.0:
    183 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
    184 ; SSE41-NEXT:    retq
    185 ;
    186 ; AVX-LABEL: shuffle_v2f64_22:
    187 ; AVX:       # %bb.0:
    188 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm1[0,0]
    189 ; AVX-NEXT:    retq
    190   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
    191   ret <2 x double> %shuffle
    192 }
    193 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
    194 ; SSE-LABEL: shuffle_v2f64_32:
    195 ; SSE:       # %bb.0:
    196 ; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
    197 ; SSE-NEXT:    movapd %xmm1, %xmm0
    198 ; SSE-NEXT:    retq
    199 ;
    200 ; AVX-LABEL: shuffle_v2f64_32:
    201 ; AVX:       # %bb.0:
    202 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
    203 ; AVX-NEXT:    retq
    204 
    205   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
    206   ret <2 x double> %shuffle
    207 }
    208 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
    209 ; SSE-LABEL: shuffle_v2f64_33:
    210 ; SSE:       # %bb.0:
    211 ; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
    212 ; SSE-NEXT:    movaps %xmm1, %xmm0
    213 ; SSE-NEXT:    retq
    214 ;
    215 ; AVX-LABEL: shuffle_v2f64_33:
    216 ; AVX:       # %bb.0:
    217 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,1]
    218 ; AVX-NEXT:    retq
    219   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
    220   ret <2 x double> %shuffle
    221 }
    222 define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
    223 ; SSE2-LABEL: shuffle_v2f64_03:
    224 ; SSE2:       # %bb.0:
    225 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    226 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    227 ; SSE2-NEXT:    retq
    228 ;
    229 ; SSE3-LABEL: shuffle_v2f64_03:
    230 ; SSE3:       # %bb.0:
    231 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    232 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    233 ; SSE3-NEXT:    retq
    234 ;
    235 ; SSSE3-LABEL: shuffle_v2f64_03:
    236 ; SSSE3:       # %bb.0:
    237 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    238 ; SSSE3-NEXT:    movapd %xmm1, %xmm0
    239 ; SSSE3-NEXT:    retq
    240 ;
    241 ; SSE41-LABEL: shuffle_v2f64_03:
    242 ; SSE41:       # %bb.0:
    243 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    244 ; SSE41-NEXT:    retq
    245 ;
    246 ; AVX-LABEL: shuffle_v2f64_03:
    247 ; AVX:       # %bb.0:
    248 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    249 ; AVX-NEXT:    retq
    250   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
    251   ret <2 x double> %shuffle
    252 }
    253 define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
    254 ; SSE2-LABEL: shuffle_v2f64_21:
    255 ; SSE2:       # %bb.0:
    256 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    257 ; SSE2-NEXT:    retq
    258 ;
    259 ; SSE3-LABEL: shuffle_v2f64_21:
    260 ; SSE3:       # %bb.0:
    261 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    262 ; SSE3-NEXT:    retq
    263 ;
    264 ; SSSE3-LABEL: shuffle_v2f64_21:
    265 ; SSSE3:       # %bb.0:
    266 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    267 ; SSSE3-NEXT:    retq
    268 ;
    269 ; SSE41-LABEL: shuffle_v2f64_21:
    270 ; SSE41:       # %bb.0:
    271 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    272 ; SSE41-NEXT:    retq
    273 ;
    274 ; AVX-LABEL: shuffle_v2f64_21:
    275 ; AVX:       # %bb.0:
    276 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    277 ; AVX-NEXT:    retq
    278   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
    279   ret <2 x double> %shuffle
    280 }
    281 define <2 x double> @shuffle_v2f64_u2(<2 x double> %a, <2 x double> %b) {
    282 ; SSE2-LABEL: shuffle_v2f64_u2:
    283 ; SSE2:       # %bb.0:
    284 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    285 ; SSE2-NEXT:    retq
    286 ;
    287 ; SSE3-LABEL: shuffle_v2f64_u2:
    288 ; SSE3:       # %bb.0:
    289 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
    290 ; SSE3-NEXT:    retq
    291 ;
    292 ; SSSE3-LABEL: shuffle_v2f64_u2:
    293 ; SSSE3:       # %bb.0:
    294 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
    295 ; SSSE3-NEXT:    retq
    296 ;
    297 ; SSE41-LABEL: shuffle_v2f64_u2:
    298 ; SSE41:       # %bb.0:
    299 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm1[0,0]
    300 ; SSE41-NEXT:    retq
    301 ;
    302 ; AVX-LABEL: shuffle_v2f64_u2:
    303 ; AVX:       # %bb.0:
    304 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm1[0,0]
    305 ; AVX-NEXT:    retq
    306   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 undef, i32 2>
    307   ret <2 x double> %shuffle
    308 }
    309 define <2 x double> @shuffle_v2f64_3u(<2 x double> %a, <2 x double> %b) {
    310 ; SSE-LABEL: shuffle_v2f64_3u:
    311 ; SSE:       # %bb.0:
    312 ; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
    313 ; SSE-NEXT:    movaps %xmm1, %xmm0
    314 ; SSE-NEXT:    retq
    315 ;
    316 ; AVX-LABEL: shuffle_v2f64_3u:
    317 ; AVX:       # %bb.0:
    318 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
    319 ; AVX-NEXT:    retq
    320   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 undef>
    321   ret <2 x double> %shuffle
    322 }
    323 
    324 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
    325 ; SSE-LABEL: shuffle_v2i64_02:
    326 ; SSE:       # %bb.0:
    327 ; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    328 ; SSE-NEXT:    retq
    329 ;
    330 ; AVX-LABEL: shuffle_v2i64_02:
    331 ; AVX:       # %bb.0:
    332 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    333 ; AVX-NEXT:    retq
    334   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
    335   ret <2 x i64> %shuffle
    336 }
    337 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    338 ; SSE-LABEL: shuffle_v2i64_02_copy:
    339 ; SSE:       # %bb.0:
    340 ; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
    341 ; SSE-NEXT:    movaps %xmm1, %xmm0
    342 ; SSE-NEXT:    retq
    343 ;
    344 ; AVX-LABEL: shuffle_v2i64_02_copy:
    345 ; AVX:       # %bb.0:
    346 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm2[0]
    347 ; AVX-NEXT:    retq
    348   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
    349   ret <2 x i64> %shuffle
    350 }
    351 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
    352 ; SSE2-LABEL: shuffle_v2i64_03:
    353 ; SSE2:       # %bb.0:
    354 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    355 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    356 ; SSE2-NEXT:    retq
    357 ;
    358 ; SSE3-LABEL: shuffle_v2i64_03:
    359 ; SSE3:       # %bb.0:
    360 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    361 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    362 ; SSE3-NEXT:    retq
    363 ;
    364 ; SSSE3-LABEL: shuffle_v2i64_03:
    365 ; SSSE3:       # %bb.0:
    366 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    367 ; SSSE3-NEXT:    movapd %xmm1, %xmm0
    368 ; SSSE3-NEXT:    retq
    369 ;
    370 ; SSE41-LABEL: shuffle_v2i64_03:
    371 ; SSE41:       # %bb.0:
    372 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    373 ; SSE41-NEXT:    retq
    374 ;
    375 ; AVX-LABEL: shuffle_v2i64_03:
    376 ; AVX:       # %bb.0:
    377 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    378 ; AVX-NEXT:    retq
    379   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
    380   ret <2 x i64> %shuffle
    381 }
    382 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    383 ; SSE2-LABEL: shuffle_v2i64_03_copy:
    384 ; SSE2:       # %bb.0:
    385 ; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
    386 ; SSE2-NEXT:    movapd %xmm2, %xmm0
    387 ; SSE2-NEXT:    retq
    388 ;
    389 ; SSE3-LABEL: shuffle_v2i64_03_copy:
    390 ; SSE3:       # %bb.0:
    391 ; SSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
    392 ; SSE3-NEXT:    movapd %xmm2, %xmm0
    393 ; SSE3-NEXT:    retq
    394 ;
    395 ; SSSE3-LABEL: shuffle_v2i64_03_copy:
    396 ; SSSE3:       # %bb.0:
    397 ; SSSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
    398 ; SSSE3-NEXT:    movapd %xmm2, %xmm0
    399 ; SSSE3-NEXT:    retq
    400 ;
    401 ; SSE41-LABEL: shuffle_v2i64_03_copy:
    402 ; SSE41:       # %bb.0:
    403 ; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
    404 ; SSE41-NEXT:    movaps %xmm1, %xmm0
    405 ; SSE41-NEXT:    retq
    406 ;
    407 ; AVX-LABEL: shuffle_v2i64_03_copy:
    408 ; AVX:       # %bb.0:
    409 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
    410 ; AVX-NEXT:    retq
    411   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
    412   ret <2 x i64> %shuffle
    413 }
    414 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
    415 ; SSE2-LABEL: shuffle_v2i64_12:
    416 ; SSE2:       # %bb.0:
    417 ; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    418 ; SSE2-NEXT:    retq
    419 ;
    420 ; SSE3-LABEL: shuffle_v2i64_12:
    421 ; SSE3:       # %bb.0:
    422 ; SSE3-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    423 ; SSE3-NEXT:    retq
    424 ;
    425 ; SSSE3-LABEL: shuffle_v2i64_12:
    426 ; SSSE3:       # %bb.0:
    427 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    428 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    429 ; SSSE3-NEXT:    retq
    430 ;
    431 ; SSE41-LABEL: shuffle_v2i64_12:
    432 ; SSE41:       # %bb.0:
    433 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    434 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    435 ; SSE41-NEXT:    retq
    436 ;
    437 ; AVX-LABEL: shuffle_v2i64_12:
    438 ; AVX:       # %bb.0:
    439 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    440 ; AVX-NEXT:    retq
    441   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
    442   ret <2 x i64> %shuffle
    443 }
    444 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    445 ; SSE2-LABEL: shuffle_v2i64_12_copy:
    446 ; SSE2:       # %bb.0:
    447 ; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
    448 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    449 ; SSE2-NEXT:    retq
    450 ;
    451 ; SSE3-LABEL: shuffle_v2i64_12_copy:
    452 ; SSE3:       # %bb.0:
    453 ; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
    454 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    455 ; SSE3-NEXT:    retq
    456 ;
    457 ; SSSE3-LABEL: shuffle_v2i64_12_copy:
    458 ; SSSE3:       # %bb.0:
    459 ; SSSE3-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
    460 ; SSSE3-NEXT:    movdqa %xmm2, %xmm0
    461 ; SSSE3-NEXT:    retq
    462 ;
    463 ; SSE41-LABEL: shuffle_v2i64_12_copy:
    464 ; SSE41:       # %bb.0:
    465 ; SSE41-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
    466 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    467 ; SSE41-NEXT:    retq
    468 ;
    469 ; AVX-LABEL: shuffle_v2i64_12_copy:
    470 ; AVX:       # %bb.0:
    471 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
    472 ; AVX-NEXT:    retq
    473   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
    474   ret <2 x i64> %shuffle
    475 }
    476 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
    477 ; SSE-LABEL: shuffle_v2i64_13:
    478 ; SSE:       # %bb.0:
    479 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    480 ; SSE-NEXT:    retq
    481 ;
    482 ; AVX-LABEL: shuffle_v2i64_13:
    483 ; AVX:       # %bb.0:
    484 ; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    485 ; AVX-NEXT:    retq
    486   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    487   ret <2 x i64> %shuffle
    488 }
    489 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    490 ; SSE-LABEL: shuffle_v2i64_13_copy:
    491 ; SSE:       # %bb.0:
    492 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
    493 ; SSE-NEXT:    movaps %xmm1, %xmm0
    494 ; SSE-NEXT:    retq
    495 ;
    496 ; AVX-LABEL: shuffle_v2i64_13_copy:
    497 ; AVX:       # %bb.0:
    498 ; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm2[1]
    499 ; AVX-NEXT:    retq
    500   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    501   ret <2 x i64> %shuffle
    502 }
    503 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
    504 ; SSE-LABEL: shuffle_v2i64_20:
    505 ; SSE:       # %bb.0:
    506 ; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    507 ; SSE-NEXT:    movaps %xmm1, %xmm0
    508 ; SSE-NEXT:    retq
    509 ;
    510 ; AVX-LABEL: shuffle_v2i64_20:
    511 ; AVX:       # %bb.0:
    512 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    513 ; AVX-NEXT:    retq
    514   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
    515   ret <2 x i64> %shuffle
    516 }
    517 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    518 ; SSE-LABEL: shuffle_v2i64_20_copy:
    519 ; SSE:       # %bb.0:
    520 ; SSE-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
    521 ; SSE-NEXT:    movaps %xmm2, %xmm0
    522 ; SSE-NEXT:    retq
    523 ;
    524 ; AVX-LABEL: shuffle_v2i64_20_copy:
    525 ; AVX:       # %bb.0:
    526 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm1[0]
    527 ; AVX-NEXT:    retq
    528   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
    529   ret <2 x i64> %shuffle
    530 }
    531 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
    532 ; SSE2-LABEL: shuffle_v2i64_21:
    533 ; SSE2:       # %bb.0:
    534 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    535 ; SSE2-NEXT:    retq
    536 ;
    537 ; SSE3-LABEL: shuffle_v2i64_21:
    538 ; SSE3:       # %bb.0:
    539 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    540 ; SSE3-NEXT:    retq
    541 ;
    542 ; SSSE3-LABEL: shuffle_v2i64_21:
    543 ; SSSE3:       # %bb.0:
    544 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    545 ; SSSE3-NEXT:    retq
    546 ;
    547 ; SSE41-LABEL: shuffle_v2i64_21:
    548 ; SSE41:       # %bb.0:
    549 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    550 ; SSE41-NEXT:    retq
    551 ;
    552 ; AVX-LABEL: shuffle_v2i64_21:
    553 ; AVX:       # %bb.0:
    554 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    555 ; AVX-NEXT:    retq
    556   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
    557   ret <2 x i64> %shuffle
    558 }
    559 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    560 ; SSE2-LABEL: shuffle_v2i64_21_copy:
    561 ; SSE2:       # %bb.0:
    562 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    563 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    564 ; SSE2-NEXT:    retq
    565 ;
    566 ; SSE3-LABEL: shuffle_v2i64_21_copy:
    567 ; SSE3:       # %bb.0:
    568 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    569 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    570 ; SSE3-NEXT:    retq
    571 ;
    572 ; SSSE3-LABEL: shuffle_v2i64_21_copy:
    573 ; SSSE3:       # %bb.0:
    574 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    575 ; SSSE3-NEXT:    movapd %xmm1, %xmm0
    576 ; SSSE3-NEXT:    retq
    577 ;
    578 ; SSE41-LABEL: shuffle_v2i64_21_copy:
    579 ; SSE41:       # %bb.0:
    580 ; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
    581 ; SSE41-NEXT:    movaps %xmm1, %xmm0
    582 ; SSE41-NEXT:    retq
    583 ;
    584 ; AVX-LABEL: shuffle_v2i64_21_copy:
    585 ; AVX:       # %bb.0:
    586 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
    587 ; AVX-NEXT:    retq
    588   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
    589   ret <2 x i64> %shuffle
    590 }
    591 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
    592 ; SSE2-LABEL: shuffle_v2i64_30:
    593 ; SSE2:       # %bb.0:
    594 ; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
    595 ; SSE2-NEXT:    movapd %xmm1, %xmm0
    596 ; SSE2-NEXT:    retq
    597 ;
    598 ; SSE3-LABEL: shuffle_v2i64_30:
    599 ; SSE3:       # %bb.0:
    600 ; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
    601 ; SSE3-NEXT:    movapd %xmm1, %xmm0
    602 ; SSE3-NEXT:    retq
    603 ;
    604 ; SSSE3-LABEL: shuffle_v2i64_30:
    605 ; SSSE3:       # %bb.0:
    606 ; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
    607 ; SSSE3-NEXT:    retq
    608 ;
    609 ; SSE41-LABEL: shuffle_v2i64_30:
    610 ; SSE41:       # %bb.0:
    611 ; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
    612 ; SSE41-NEXT:    retq
    613 ;
    614 ; AVX-LABEL: shuffle_v2i64_30:
    615 ; AVX:       # %bb.0:
    616 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
    617 ; AVX-NEXT:    retq
    618   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
    619   ret <2 x i64> %shuffle
    620 }
    621 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    622 ; SSE2-LABEL: shuffle_v2i64_30_copy:
    623 ; SSE2:       # %bb.0:
    624 ; SSE2-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
    625 ; SSE2-NEXT:    movapd %xmm2, %xmm0
    626 ; SSE2-NEXT:    retq
    627 ;
    628 ; SSE3-LABEL: shuffle_v2i64_30_copy:
    629 ; SSE3:       # %bb.0:
    630 ; SSE3-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
    631 ; SSE3-NEXT:    movapd %xmm2, %xmm0
    632 ; SSE3-NEXT:    retq
    633 ;
    634 ; SSSE3-LABEL: shuffle_v2i64_30_copy:
    635 ; SSSE3:       # %bb.0:
    636 ; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    637 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    638 ; SSSE3-NEXT:    retq
    639 ;
    640 ; SSE41-LABEL: shuffle_v2i64_30_copy:
    641 ; SSE41:       # %bb.0:
    642 ; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    643 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    644 ; SSE41-NEXT:    retq
    645 ;
    646 ; AVX-LABEL: shuffle_v2i64_30_copy:
    647 ; AVX:       # %bb.0:
    648 ; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
    649 ; AVX-NEXT:    retq
    650   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
    651   ret <2 x i64> %shuffle
    652 }
    653 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
    654 ; SSE-LABEL: shuffle_v2i64_31:
    655 ; SSE:       # %bb.0:
    656 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
    657 ; SSE-NEXT:    movaps %xmm1, %xmm0
    658 ; SSE-NEXT:    retq
    659 ;
    660 ; AVX-LABEL: shuffle_v2i64_31:
    661 ; AVX:       # %bb.0:
    662 ; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
    663 ; AVX-NEXT:    retq
    664   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
    665   ret <2 x i64> %shuffle
    666 }
    667 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
    668 ; SSE-LABEL: shuffle_v2i64_31_copy:
    669 ; SSE:       # %bb.0:
    670 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
    671 ; SSE-NEXT:    movaps %xmm2, %xmm0
    672 ; SSE-NEXT:    retq
    673 ;
    674 ; AVX-LABEL: shuffle_v2i64_31_copy:
    675 ; AVX:       # %bb.0:
    676 ; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm2[1],xmm1[1]
    677 ; AVX-NEXT:    retq
    678   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
    679   ret <2 x i64> %shuffle
    680 }
    681 
    682 define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
    683 ; SSE-LABEL: shuffle_v2i64_0z:
    684 ; SSE:       # %bb.0:
    685 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    686 ; SSE-NEXT:    retq
    687 ;
    688 ; AVX-LABEL: shuffle_v2i64_0z:
    689 ; AVX:       # %bb.0:
    690 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
    691 ; AVX-NEXT:    retq
    692   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
    693   ret <2 x i64> %shuffle
    694 }
    695 
    696 define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
    697 ; SSE-LABEL: shuffle_v2i64_1z:
    698 ; SSE:       # %bb.0:
    699 ; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
    700 ; SSE-NEXT:    retq
    701 ;
    702 ; AVX-LABEL: shuffle_v2i64_1z:
    703 ; AVX:       # %bb.0:
    704 ; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
    705 ; AVX-NEXT:    retq
    706   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
    707   ret <2 x i64> %shuffle
    708 }
    709 
    710 define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
    711 ; SSE-LABEL: shuffle_v2i64_z0:
    712 ; SSE:       # %bb.0:
    713 ; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
    714 ; SSE-NEXT:    retq
    715 ;
    716 ; AVX-LABEL: shuffle_v2i64_z0:
    717 ; AVX:       # %bb.0:
    718 ; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
    719 ; AVX-NEXT:    retq
    720   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
    721   ret <2 x i64> %shuffle
    722 }
    723 
    724 define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
    725 ; SSE2-LABEL: shuffle_v2i64_z1:
    726 ; SSE2:       # %bb.0:
    727 ; SSE2-NEXT:    xorpd %xmm1, %xmm1
    728 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    729 ; SSE2-NEXT:    retq
    730 ;
    731 ; SSE3-LABEL: shuffle_v2i64_z1:
    732 ; SSE3:       # %bb.0:
    733 ; SSE3-NEXT:    xorpd %xmm1, %xmm1
    734 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    735 ; SSE3-NEXT:    retq
    736 ;
    737 ; SSSE3-LABEL: shuffle_v2i64_z1:
    738 ; SSSE3:       # %bb.0:
    739 ; SSSE3-NEXT:    xorpd %xmm1, %xmm1
    740 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    741 ; SSSE3-NEXT:    retq
    742 ;
    743 ; SSE41-LABEL: shuffle_v2i64_z1:
    744 ; SSE41:       # %bb.0:
    745 ; SSE41-NEXT:    xorps %xmm1, %xmm1
    746 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    747 ; SSE41-NEXT:    retq
    748 ;
    749 ; AVX-LABEL: shuffle_v2i64_z1:
    750 ; AVX:       # %bb.0:
    751 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    752 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    753 ; AVX-NEXT:    retq
    754   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
    755   ret <2 x i64> %shuffle
    756 }
    757 
    758 define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
    759 ; SSE-LABEL: shuffle_v2f64_0z:
    760 ; SSE:       # %bb.0:
    761 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    762 ; SSE-NEXT:    retq
    763 ;
    764 ; AVX-LABEL: shuffle_v2f64_0z:
    765 ; AVX:       # %bb.0:
    766 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
    767 ; AVX-NEXT:    retq
    768   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
    769   ret <2 x double> %shuffle
    770 }
    771 
    772 define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
    773 ; SSE-LABEL: shuffle_v2f64_1z:
    774 ; SSE:       # %bb.0:
    775 ; SSE-NEXT:    xorps %xmm1, %xmm1
    776 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    777 ; SSE-NEXT:    retq
    778 ;
    779 ; AVX-LABEL: shuffle_v2f64_1z:
    780 ; AVX:       # %bb.0:
    781 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    782 ; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    783 ; AVX-NEXT:    retq
    784   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
    785   ret <2 x double> %shuffle
    786 }
    787 
    788 define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
    789 ; SSE-LABEL: shuffle_v2f64_z0:
    790 ; SSE:       # %bb.0:
    791 ; SSE-NEXT:    xorps %xmm1, %xmm1
    792 ; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    793 ; SSE-NEXT:    movaps %xmm1, %xmm0
    794 ; SSE-NEXT:    retq
    795 ;
    796 ; AVX-LABEL: shuffle_v2f64_z0:
    797 ; AVX:       # %bb.0:
    798 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    799 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    800 ; AVX-NEXT:    retq
    801   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
    802   ret <2 x double> %shuffle
    803 }
    804 
    805 define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
    806 ; SSE2-LABEL: shuffle_v2f64_z1:
    807 ; SSE2:       # %bb.0:
    808 ; SSE2-NEXT:    xorpd %xmm1, %xmm1
    809 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    810 ; SSE2-NEXT:    retq
    811 ;
    812 ; SSE3-LABEL: shuffle_v2f64_z1:
    813 ; SSE3:       # %bb.0:
    814 ; SSE3-NEXT:    xorpd %xmm1, %xmm1
    815 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    816 ; SSE3-NEXT:    retq
    817 ;
    818 ; SSSE3-LABEL: shuffle_v2f64_z1:
    819 ; SSSE3:       # %bb.0:
    820 ; SSSE3-NEXT:    xorpd %xmm1, %xmm1
    821 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    822 ; SSSE3-NEXT:    retq
    823 ;
    824 ; SSE41-LABEL: shuffle_v2f64_z1:
    825 ; SSE41:       # %bb.0:
    826 ; SSE41-NEXT:    xorps %xmm1, %xmm1
    827 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    828 ; SSE41-NEXT:    retq
    829 ;
    830 ; AVX-LABEL: shuffle_v2f64_z1:
    831 ; AVX:       # %bb.0:
    832 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    833 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
    834 ; AVX-NEXT:    retq
    835   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
    836   ret <2 x double> %shuffle
    837 }
    838 
    839 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
    840 ; SSE-LABEL: shuffle_v2f64_bitcast_1z:
    841 ; SSE:       # %bb.0:
    842 ; SSE-NEXT:    xorps %xmm1, %xmm1
    843 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    844 ; SSE-NEXT:    retq
    845 ;
    846 ; AVX-LABEL: shuffle_v2f64_bitcast_1z:
    847 ; AVX:       # %bb.0:
    848 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    849 ; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
    850 ; AVX-NEXT:    retq
    851   %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
    852   %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
    853   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
    854   %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
    855   ret <2 x double> %bitcast64
    856 }
    857 
    858 define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
    859 ; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
    860 ; SSE2:       # %bb.0:
    861 ; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
    862 ; SSE2-NEXT:    retq
    863 ;
    864 ; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
    865 ; SSE3:       # %bb.0:
    866 ; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
    867 ; SSE3-NEXT:    retq
    868 ;
    869 ; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
    870 ; SSSE3:       # %bb.0:
    871 ; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
    872 ; SSSE3-NEXT:    retq
    873 ;
    874 ; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
    875 ; SSE41:       # %bb.0:
    876 ; SSE41-NEXT:    xorps %xmm1, %xmm1
    877 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    878 ; SSE41-NEXT:    retq
    879 ;
    880 ; AVX-LABEL: shuffle_v2i64_bitcast_z123:
    881 ; AVX:       # %bb.0:
    882 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    883 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    884 ; AVX-NEXT:    retq
    885   %bitcast32 = bitcast <2 x i64> %x to <4 x float>
    886   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
    887   %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
    888   %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
    889   ret <2 x i64> %and
    890 }
    891 
    892 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
    893 ; SSE-LABEL: insert_reg_and_zero_v2i64:
    894 ; SSE:       # %bb.0:
    895 ; SSE-NEXT:    movq %rdi, %xmm0
    896 ; SSE-NEXT:    retq
    897 ;
    898 ; AVX-LABEL: insert_reg_and_zero_v2i64:
    899 ; AVX:       # %bb.0:
    900 ; AVX-NEXT:    vmovq %rdi, %xmm0
    901 ; AVX-NEXT:    retq
    902   %v = insertelement <2 x i64> undef, i64 %a, i32 0
    903   %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
    904   ret <2 x i64> %shuffle
    905 }
    906 
    907 define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
    908 ; SSE-LABEL: insert_mem_and_zero_v2i64:
    909 ; SSE:       # %bb.0:
    910 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    911 ; SSE-NEXT:    retq
    912 ;
    913 ; AVX-LABEL: insert_mem_and_zero_v2i64:
    914 ; AVX:       # %bb.0:
    915 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    916 ; AVX-NEXT:    retq
    917   %a = load i64, i64* %ptr
    918   %v = insertelement <2 x i64> undef, i64 %a, i32 0
    919   %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
    920   ret <2 x i64> %shuffle
    921 }
    922 
    923 define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
    924 ; SSE-LABEL: insert_reg_and_zero_v2f64:
    925 ; SSE:       # %bb.0:
    926 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    927 ; SSE-NEXT:    retq
    928 ;
    929 ; AVX-LABEL: insert_reg_and_zero_v2f64:
    930 ; AVX:       # %bb.0:
    931 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
    932 ; AVX-NEXT:    retq
    933   %v = insertelement <2 x double> undef, double %a, i32 0
    934   %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
    935   ret <2 x double> %shuffle
    936 }
    937 
    938 define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
    939 ; SSE-LABEL: insert_mem_and_zero_v2f64:
    940 ; SSE:       # %bb.0:
    941 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    942 ; SSE-NEXT:    retq
    943 ;
    944 ; AVX-LABEL: insert_mem_and_zero_v2f64:
    945 ; AVX:       # %bb.0:
    946 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    947 ; AVX-NEXT:    retq
    948   %a = load double, double* %ptr
    949   %v = insertelement <2 x double> undef, double %a, i32 0
    950   %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
    951   ret <2 x double> %shuffle
    952 }
    953 
    954 define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
    955 ; SSE2-LABEL: insert_reg_lo_v2i64:
    956 ; SSE2:       # %bb.0:
    957 ; SSE2-NEXT:    movq %rdi, %xmm1
    958 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    959 ; SSE2-NEXT:    retq
    960 ;
    961 ; SSE3-LABEL: insert_reg_lo_v2i64:
    962 ; SSE3:       # %bb.0:
    963 ; SSE3-NEXT:    movq %rdi, %xmm1
    964 ; SSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    965 ; SSE3-NEXT:    retq
    966 ;
    967 ; SSSE3-LABEL: insert_reg_lo_v2i64:
    968 ; SSSE3:       # %bb.0:
    969 ; SSSE3-NEXT:    movq %rdi, %xmm1
    970 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    971 ; SSSE3-NEXT:    retq
    972 ;
    973 ; SSE41-LABEL: insert_reg_lo_v2i64:
    974 ; SSE41:       # %bb.0:
    975 ; SSE41-NEXT:    pinsrq $0, %rdi, %xmm0
    976 ; SSE41-NEXT:    retq
    977 ;
    978 ; AVX-LABEL: insert_reg_lo_v2i64:
    979 ; AVX:       # %bb.0:
    980 ; AVX-NEXT:    vpinsrq $0, %rdi, %xmm0, %xmm0
    981 ; AVX-NEXT:    retq
    982   %v = insertelement <2 x i64> undef, i64 %a, i32 0
    983   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
    984   ret <2 x i64> %shuffle
    985 }
    986 
    987 define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
    988 ; SSE2-LABEL: insert_mem_lo_v2i64:
    989 ; SSE2:       # %bb.0:
    990 ; SSE2-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
    991 ; SSE2-NEXT:    retq
    992 ;
    993 ; SSE3-LABEL: insert_mem_lo_v2i64:
    994 ; SSE3:       # %bb.0:
    995 ; SSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
    996 ; SSE3-NEXT:    retq
    997 ;
    998 ; SSSE3-LABEL: insert_mem_lo_v2i64:
    999 ; SSSE3:       # %bb.0:
   1000 ; SSSE3-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   1001 ; SSSE3-NEXT:    retq
   1002 ;
   1003 ; SSE41-LABEL: insert_mem_lo_v2i64:
   1004 ; SSE41:       # %bb.0:
   1005 ; SSE41-NEXT:    pinsrq $0, (%rdi), %xmm0
   1006 ; SSE41-NEXT:    retq
   1007 ;
   1008 ; AVX-LABEL: insert_mem_lo_v2i64:
   1009 ; AVX:       # %bb.0:
   1010 ; AVX-NEXT:    vpinsrq $0, (%rdi), %xmm0, %xmm0
   1011 ; AVX-NEXT:    retq
   1012   %a = load i64, i64* %ptr
   1013   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   1014   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
   1015   ret <2 x i64> %shuffle
   1016 }
   1017 
   1018 define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
   1019 ; SSE2-LABEL: insert_reg_hi_v2i64:
   1020 ; SSE2:       # %bb.0:
   1021 ; SSE2-NEXT:    movq %rdi, %xmm1
   1022 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1023 ; SSE2-NEXT:    retq
   1024 ;
   1025 ; SSE3-LABEL: insert_reg_hi_v2i64:
   1026 ; SSE3:       # %bb.0:
   1027 ; SSE3-NEXT:    movq %rdi, %xmm1
   1028 ; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1029 ; SSE3-NEXT:    retq
   1030 ;
   1031 ; SSSE3-LABEL: insert_reg_hi_v2i64:
   1032 ; SSSE3:       # %bb.0:
   1033 ; SSSE3-NEXT:    movq %rdi, %xmm1
   1034 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1035 ; SSSE3-NEXT:    retq
   1036 ;
   1037 ; SSE41-LABEL: insert_reg_hi_v2i64:
   1038 ; SSE41:       # %bb.0:
   1039 ; SSE41-NEXT:    pinsrq $1, %rdi, %xmm0
   1040 ; SSE41-NEXT:    retq
   1041 ;
   1042 ; AVX-LABEL: insert_reg_hi_v2i64:
   1043 ; AVX:       # %bb.0:
   1044 ; AVX-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0
   1045 ; AVX-NEXT:    retq
   1046   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   1047   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
   1048   ret <2 x i64> %shuffle
   1049 }
   1050 
   1051 define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
   1052 ; SSE2-LABEL: insert_mem_hi_v2i64:
   1053 ; SSE2:       # %bb.0:
   1054 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
   1055 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1056 ; SSE2-NEXT:    retq
   1057 ;
   1058 ; SSE3-LABEL: insert_mem_hi_v2i64:
   1059 ; SSE3:       # %bb.0:
   1060 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
   1061 ; SSE3-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1062 ; SSE3-NEXT:    retq
   1063 ;
   1064 ; SSSE3-LABEL: insert_mem_hi_v2i64:
   1065 ; SSSE3:       # %bb.0:
   1066 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
   1067 ; SSSE3-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1068 ; SSSE3-NEXT:    retq
   1069 ;
   1070 ; SSE41-LABEL: insert_mem_hi_v2i64:
   1071 ; SSE41:       # %bb.0:
   1072 ; SSE41-NEXT:    pinsrq $1, (%rdi), %xmm0
   1073 ; SSE41-NEXT:    retq
   1074 ;
   1075 ; AVX-LABEL: insert_mem_hi_v2i64:
   1076 ; AVX:       # %bb.0:
   1077 ; AVX-NEXT:    vpinsrq $1, (%rdi), %xmm0, %xmm0
   1078 ; AVX-NEXT:    retq
   1079   %a = load i64, i64* %ptr
   1080   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   1081   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
   1082   ret <2 x i64> %shuffle
   1083 }
   1084 
   1085 define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
   1086 ; SSE2-LABEL: insert_reg_lo_v2f64:
   1087 ; SSE2:       # %bb.0:
   1088 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
   1089 ; SSE2-NEXT:    movapd %xmm1, %xmm0
   1090 ; SSE2-NEXT:    retq
   1091 ;
   1092 ; SSE3-LABEL: insert_reg_lo_v2f64:
   1093 ; SSE3:       # %bb.0:
   1094 ; SSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
   1095 ; SSE3-NEXT:    movapd %xmm1, %xmm0
   1096 ; SSE3-NEXT:    retq
   1097 ;
   1098 ; SSSE3-LABEL: insert_reg_lo_v2f64:
   1099 ; SSSE3:       # %bb.0:
   1100 ; SSSE3-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
   1101 ; SSSE3-NEXT:    movapd %xmm1, %xmm0
   1102 ; SSSE3-NEXT:    retq
   1103 ;
   1104 ; SSE41-LABEL: insert_reg_lo_v2f64:
   1105 ; SSE41:       # %bb.0:
   1106 ; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
   1107 ; SSE41-NEXT:    retq
   1108 ;
   1109 ; AVX-LABEL: insert_reg_lo_v2f64:
   1110 ; AVX:       # %bb.0:
   1111 ; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
   1112 ; AVX-NEXT:    retq
   1113   %v = insertelement <2 x double> undef, double %a, i32 0
   1114   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
   1115   ret <2 x double> %shuffle
   1116 }
   1117 
   1118 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
   1119 ; SSE-LABEL: insert_mem_lo_v2f64:
   1120 ; SSE:       # %bb.0:
   1121 ; SSE-NEXT:    movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   1122 ; SSE-NEXT:    retq
   1123 ;
   1124 ; AVX-LABEL: insert_mem_lo_v2f64:
   1125 ; AVX:       # %bb.0:
   1126 ; AVX-NEXT:    vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
   1127 ; AVX-NEXT:    retq
   1128   %a = load double, double* %ptr
   1129   %v = insertelement <2 x double> undef, double %a, i32 0
   1130   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
   1131   ret <2 x double> %shuffle
   1132 }
   1133 
   1134 define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
   1135 ; SSE-LABEL: insert_reg_hi_v2f64:
   1136 ; SSE:       # %bb.0:
   1137 ; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
   1138 ; SSE-NEXT:    movaps %xmm1, %xmm0
   1139 ; SSE-NEXT:    retq
   1140 ;
   1141 ; AVX-LABEL: insert_reg_hi_v2f64:
   1142 ; AVX:       # %bb.0:
   1143 ; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
   1144 ; AVX-NEXT:    retq
   1145   %v = insertelement <2 x double> undef, double %a, i32 0
   1146   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
   1147   ret <2 x double> %shuffle
   1148 }
   1149 
   1150 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
   1151 ; SSE-LABEL: insert_mem_hi_v2f64:
   1152 ; SSE:       # %bb.0:
   1153 ; SSE-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
   1154 ; SSE-NEXT:    retq
   1155 ;
   1156 ; AVX-LABEL: insert_mem_hi_v2f64:
   1157 ; AVX:       # %bb.0:
   1158 ; AVX-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
   1159 ; AVX-NEXT:    retq
   1160   %a = load double, double* %ptr
   1161   %v = insertelement <2 x double> undef, double %a, i32 0
   1162   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
   1163   ret <2 x double> %shuffle
   1164 }
   1165 
   1166 define <2 x double> @insert_dup_reg_v2f64(double %a) {
   1167 ; SSE2-LABEL: insert_dup_reg_v2f64:
   1168 ; SSE2:       # %bb.0:
   1169 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
   1170 ; SSE2-NEXT:    retq
   1171 ;
   1172 ; SSE3-LABEL: insert_dup_reg_v2f64:
   1173 ; SSE3:       # %bb.0:
   1174 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
   1175 ; SSE3-NEXT:    retq
   1176 ;
   1177 ; SSSE3-LABEL: insert_dup_reg_v2f64:
   1178 ; SSSE3:       # %bb.0:
   1179 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
   1180 ; SSSE3-NEXT:    retq
   1181 ;
   1182 ; SSE41-LABEL: insert_dup_reg_v2f64:
   1183 ; SSE41:       # %bb.0:
   1184 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
   1185 ; SSE41-NEXT:    retq
   1186 ;
   1187 ; AVX-LABEL: insert_dup_reg_v2f64:
   1188 ; AVX:       # %bb.0:
   1189 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
   1190 ; AVX-NEXT:    retq
   1191   %v = insertelement <2 x double> undef, double %a, i32 0
   1192   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   1193   ret <2 x double> %shuffle
   1194 }
   1195 
   1196 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
   1197 ; SSE2-LABEL: insert_dup_mem_v2f64:
   1198 ; SSE2:       # %bb.0:
   1199 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1200 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
   1201 ; SSE2-NEXT:    retq
   1202 ;
   1203 ; SSE3-LABEL: insert_dup_mem_v2f64:
   1204 ; SSE3:       # %bb.0:
   1205 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1206 ; SSE3-NEXT:    retq
   1207 ;
   1208 ; SSSE3-LABEL: insert_dup_mem_v2f64:
   1209 ; SSSE3:       # %bb.0:
   1210 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1211 ; SSSE3-NEXT:    retq
   1212 ;
   1213 ; SSE41-LABEL: insert_dup_mem_v2f64:
   1214 ; SSE41:       # %bb.0:
   1215 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1216 ; SSE41-NEXT:    retq
   1217 ;
   1218 ; AVX-LABEL: insert_dup_mem_v2f64:
   1219 ; AVX:       # %bb.0:
   1220 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
   1221 ; AVX-NEXT:    retq
   1222   %a = load double, double* %ptr
   1223   %v = insertelement <2 x double> undef, double %a, i32 0
   1224   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   1225   ret <2 x double> %shuffle
   1226 }
   1227 
   1228 define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
   1229 ; SSE2-LABEL: insert_dup_mem128_v2f64:
   1230 ; SSE2:       # %bb.0:
   1231 ; SSE2-NEXT:    movaps (%rdi), %xmm0
   1232 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
   1233 ; SSE2-NEXT:    retq
   1234 ;
   1235 ; SSE3-LABEL: insert_dup_mem128_v2f64:
   1236 ; SSE3:       # %bb.0:
   1237 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1238 ; SSE3-NEXT:    retq
   1239 ;
   1240 ; SSSE3-LABEL: insert_dup_mem128_v2f64:
   1241 ; SSSE3:       # %bb.0:
   1242 ; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1243 ; SSSE3-NEXT:    retq
   1244 ;
   1245 ; SSE41-LABEL: insert_dup_mem128_v2f64:
   1246 ; SSE41:       # %bb.0:
   1247 ; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
   1248 ; SSE41-NEXT:    retq
   1249 ;
   1250 ; AVX-LABEL: insert_dup_mem128_v2f64:
   1251 ; AVX:       # %bb.0:
   1252 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
   1253 ; AVX-NEXT:    retq
   1254   %v = load  <2 x double>,  <2 x double>* %ptr
   1255   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   1256   ret <2 x double> %shuffle
   1257 }
   1258 
   1259 
   1260 define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
   1261 ; SSE-LABEL: insert_dup_mem_v2i64:
   1262 ; SSE:       # %bb.0:
   1263 ; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
   1264 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
   1265 ; SSE-NEXT:    retq
   1266 ;
   1267 ; AVX1-LABEL: insert_dup_mem_v2i64:
   1268 ; AVX1:       # %bb.0:
   1269 ; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1270 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
   1271 ; AVX1-NEXT:    retq
   1272 ;
   1273 ; AVX2-LABEL: insert_dup_mem_v2i64:
   1274 ; AVX2:       # %bb.0:
   1275 ; AVX2-NEXT:    vpbroadcastq (%rdi), %xmm0
   1276 ; AVX2-NEXT:    retq
   1277 ;
   1278 ; AVX512VL-LABEL: insert_dup_mem_v2i64:
   1279 ; AVX512VL:       # %bb.0:
   1280 ; AVX512VL-NEXT:    vpbroadcastq (%rdi), %xmm0
   1281 ; AVX512VL-NEXT:    retq
   1282   %tmp = load i64, i64* %ptr, align 1
   1283   %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
   1284   %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
   1285   ret <2 x i64> %tmp2
   1286 }
   1287 
   1288 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
   1289 ; SSE-LABEL: shuffle_mem_v2f64_10:
   1290 ; SSE:       # %bb.0:
   1291 ; SSE-NEXT:    movapd (%rdi), %xmm0
   1292 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
   1293 ; SSE-NEXT:    retq
   1294 ;
   1295 ; AVX-LABEL: shuffle_mem_v2f64_10:
   1296 ; AVX:       # %bb.0:
   1297 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = mem[1,0]
   1298 ; AVX-NEXT:    retq
   1299 
   1300   %a = load <2 x double>, <2 x double>* %ptr
   1301   %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   1302   ret <2 x double> %shuffle
   1303 }
   1304 
   1305 define <2 x double> @shuffle_mem_v2f64_31(<2 x double> %a, <2 x double>* %b) {
   1306 ; SSE-LABEL: shuffle_mem_v2f64_31:
   1307 ; SSE:       # %bb.0:
   1308 ; SSE-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
   1309 ; SSE-NEXT:    retq
   1310 ;
   1311 ; AVX-LABEL: shuffle_mem_v2f64_31:
   1312 ; AVX:       # %bb.0:
   1313 ; AVX-NEXT:    vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
   1314 ; AVX-NEXT:    retq
   1315   %c = load <2 x double>, <2 x double>* %b
   1316   %f = shufflevector <2 x double> %a, <2 x double> %c, <2 x i32> <i32 3, i32 1>
   1317   ret <2 x double> %f
   1318 }
   1319