Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=AMD10H
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER1
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER2
      5 
      6 ;
      7 ; EXTRQI
      8 ;
      9 
     10 ; A length of zero is equivalent to a bit length of 64.
     11 define <2 x i64> @extrqi_len0_idx0(<2 x i64> %a) {
     12 ; ALL-LABEL: extrqi_len0_idx0:
     13 ; ALL:       # %bb.0:
     14 ; ALL-NEXT:    retq
     15   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 0, i8 0)
     16   ret <2 x i64> %1
     17 }
     18 
     19 define <2 x i64> @extrqi_len8_idx16(<2 x i64> %a) {
     20 ; ALL-LABEL: extrqi_len8_idx16:
     21 ; ALL:       # %bb.0:
     22 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
     23 ; ALL-NEXT:    retq
     24   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 8, i8 16)
     25   ret <2 x i64> %1
     26 }
     27 
     28 ; If the length + index exceeds the bottom 64 bits the result is undefined.
     29 define <2 x i64> @extrqi_len32_idx48(<2 x i64> %a) {
     30 ; ALL-LABEL: extrqi_len32_idx48:
     31 ; ALL:       # %bb.0:
     32 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
     33 ; ALL-NEXT:    retq
     34   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 32, i8 48)
     35   ret <2 x i64> %1
     36 }
     37 
     38 define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) {
     39 ; AMD10H-LABEL: shuf_0zzzuuuuuuuuuuuu:
     40 ; AMD10H:       # %bb.0:
     41 ; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
     42 ; AMD10H-NEXT:    retq
     43 ;
     44 ; BTVER1-LABEL: shuf_0zzzuuuuuuuuuuuu:
     45 ; BTVER1:       # %bb.0:
     46 ; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
     47 ; BTVER1-NEXT:    retq
     48 ;
     49 ; BTVER2-LABEL: shuf_0zzzuuuuuuuuuuuu:
     50 ; BTVER2:       # %bb.0:
     51 ; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
     52 ; BTVER2-NEXT:    retq
     53   %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     54   ret <16 x i8> %s
     55 }
     56 
     57 define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) {
     58 ; AMD10H-LABEL: shuf_0zzzzzzz1zzzzzzz:
     59 ; AMD10H:       # %bb.0:
     60 ; AMD10H-NEXT:    movdqa %xmm0, %xmm1
     61 ; AMD10H-NEXT:    extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
     62 ; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
     63 ; AMD10H-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     64 ; AMD10H-NEXT:    retq
     65 ;
     66 ; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz:
     67 ; BTVER1:       # %bb.0:
     68 ; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
     69 ; BTVER1-NEXT:    retq
     70 ;
     71 ; BTVER2-LABEL: shuf_0zzzzzzz1zzzzzzz:
     72 ; BTVER2:       # %bb.0:
     73 ; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
     74 ; BTVER2-NEXT:    retq
     75   %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
     76   ret <16 x i8> %s
     77 }
     78 
     79 define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) {
     80 ; AMD10H-LABEL: shuf_2zzzzzzz3zzzzzzz:
     81 ; AMD10H:       # %bb.0:
     82 ; AMD10H-NEXT:    movdqa %xmm0, %xmm1
     83 ; AMD10H-NEXT:    extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
     84 ; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
     85 ; AMD10H-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     86 ; AMD10H-NEXT:    retq
     87 ;
     88 ; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz:
     89 ; BTVER1:       # %bb.0:
     90 ; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
     91 ; BTVER1-NEXT:    retq
     92 ;
     93 ; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz:
     94 ; BTVER2:       # %bb.0:
     95 ; BTVER2-NEXT:    vpsrld $16, %xmm0, %xmm0
     96 ; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
     97 ; BTVER2-NEXT:    retq
     98   %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
     99   ret <16 x i8> %s
    100 }
    101 
    102 define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) {
    103 ; AMD10H-LABEL: shuf_01zzuuuuuuuuuuuu:
    104 ; AMD10H:       # %bb.0:
    105 ; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    106 ; AMD10H-NEXT:    retq
    107 ;
    108 ; BTVER1-LABEL: shuf_01zzuuuuuuuuuuuu:
    109 ; BTVER1:       # %bb.0:
    110 ; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    111 ; BTVER1-NEXT:    retq
    112 ;
    113 ; BTVER2-LABEL: shuf_01zzuuuuuuuuuuuu:
    114 ; BTVER2:       # %bb.0:
    115 ; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    116 ; BTVER2-NEXT:    retq
    117   %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    118   ret <16 x i8> %s
    119 }
    120 
    121 define <16 x i8> @shuf_01zzzzzz23zzzzzz(<16 x i8> %a0) {
    122 ; AMD10H-LABEL: shuf_01zzzzzz23zzzzzz:
    123 ; AMD10H:       # %bb.0:
    124 ; AMD10H-NEXT:    movdqa %xmm0, %xmm1
    125 ; AMD10H-NEXT:    extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
    126 ; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    127 ; AMD10H-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    128 ; AMD10H-NEXT:    retq
    129 ;
    130 ; BTVER1-LABEL: shuf_01zzzzzz23zzzzzz:
    131 ; BTVER1:       # %bb.0:
    132 ; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
    133 ; BTVER1-NEXT:    retq
    134 ;
    135 ; BTVER2-LABEL: shuf_01zzzzzz23zzzzzz:
    136 ; BTVER2:       # %bb.0:
    137 ; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    138 ; BTVER2-NEXT:    retq
    139   %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
    140   ret <16 x i8> %s
    141 }
    142 
    143 define <16 x i8> @shuf_1zzzuuuuuuuuuuuu(<16 x i8> %a0) {
    144 ; ALL-LABEL: shuf_1zzzuuuuuuuuuuuu:
    145 ; ALL:       # %bb.0:
    146 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    147 ; ALL-NEXT:    retq
    148   %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    149   ret <16 x i8> %s
    150 }
    151 
    152 define <8 x i16> @shuf_1zzzuuuu(<8 x i16> %a0) {
    153 ; ALL-LABEL: shuf_1zzzuuuu:
    154 ; ALL:       # %bb.0:
    155 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    156 ; ALL-NEXT:    retq
    157   %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
    158   ret <8 x i16> %s
    159 }
    160 
    161 define <8 x i16> @shuf_12zzuuuu(<8 x i16> %a0) {
    162 ; ALL-LABEL: shuf_12zzuuuu:
    163 ; ALL:       # %bb.0:
    164 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3,4,5],zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    165 ; ALL-NEXT:    retq
    166   %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
    167   ret <8 x i16> %s
    168 }
    169 
    170 define <8 x i16> @shuf_012zuuuu(<8 x i16> %a0) {
    171 ; AMD10H-LABEL: shuf_012zuuuu:
    172 ; AMD10H:       # %bb.0:
    173 ; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
    174 ; AMD10H-NEXT:    retq
    175 ;
    176 ; BTVER1-LABEL: shuf_012zuuuu:
    177 ; BTVER1:       # %bb.0:
    178 ; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
    179 ; BTVER1-NEXT:    retq
    180 ;
    181 ; BTVER2-LABEL: shuf_012zuuuu:
    182 ; BTVER2:       # %bb.0:
    183 ; BTVER2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    184 ; BTVER2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
    185 ; BTVER2-NEXT:    retq
    186   %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
    187   ret <8 x i16> %s
    188 }
    189 
    190 define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) {
    191 ; AMD10H-LABEL: shuf_0zzz1zzz:
    192 ; AMD10H:       # %bb.0:
    193 ; AMD10H-NEXT:    movdqa %xmm0, %xmm1
    194 ; AMD10H-NEXT:    extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
    195 ; AMD10H-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    196 ; AMD10H-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    197 ; AMD10H-NEXT:    retq
    198 ;
    199 ; BTVER1-LABEL: shuf_0zzz1zzz:
    200 ; BTVER1:       # %bb.0:
    201 ; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
    202 ; BTVER1-NEXT:    retq
    203 ;
    204 ; BTVER2-LABEL: shuf_0zzz1zzz:
    205 ; BTVER2:       # %bb.0:
    206 ; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    207 ; BTVER2-NEXT:    retq
    208   %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8>
    209   ret <8 x i16> %s
    210 }
    211 
    212 define <4 x i32> @shuf_0z1z(<4 x i32> %a0) {
    213 ; AMD10H-LABEL: shuf_0z1z:
    214 ; AMD10H:       # %bb.0:
    215 ; AMD10H-NEXT:    xorps %xmm1, %xmm1
    216 ; AMD10H-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    217 ; AMD10H-NEXT:    retq
    218 ;
    219 ; BTVER1-LABEL: shuf_0z1z:
    220 ; BTVER1:       # %bb.0:
    221 ; BTVER1-NEXT:    xorps %xmm1, %xmm1
    222 ; BTVER1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    223 ; BTVER1-NEXT:    retq
    224 ;
    225 ; BTVER2-LABEL: shuf_0z1z:
    226 ; BTVER2:       # %bb.0:
    227 ; BTVER2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    228 ; BTVER2-NEXT:    retq
    229   %s = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
    230   ret <4 x i32> %s
    231 }
    232 
    233 ;
    234 ; INSERTQI
    235 ;
    236 
    237 ; A length of zero is equivalent to a bit length of 64.
    238 define <2 x i64> @insertqi_len0_idx0(<2 x i64> %a, <2 x i64> %b) {
    239 ; AMD10H-LABEL: insertqi_len0_idx0:
    240 ; AMD10H:       # %bb.0:
    241 ; AMD10H-NEXT:    movaps %xmm1, %xmm0
    242 ; AMD10H-NEXT:    retq
    243 ;
    244 ; BTVER1-LABEL: insertqi_len0_idx0:
    245 ; BTVER1:       # %bb.0:
    246 ; BTVER1-NEXT:    movaps %xmm1, %xmm0
    247 ; BTVER1-NEXT:    retq
    248 ;
    249 ; BTVER2-LABEL: insertqi_len0_idx0:
    250 ; BTVER2:       # %bb.0:
    251 ; BTVER2-NEXT:    vmovaps %xmm1, %xmm0
    252 ; BTVER2-NEXT:    retq
    253   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 0, i8 0)
    254   ret <2 x i64> %1
    255 }
    256 
    257 define <2 x i64> @insertqi_len8_idx16(<2 x i64> %a, <2 x i64> %b) {
    258 ; ALL-LABEL: insertqi_len8_idx16:
    259 ; ALL:       # %bb.0:
    260 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
    261 ; ALL-NEXT:    retq
    262   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 8, i8 16)
    263   ret <2 x i64> %1
    264 }
    265 
    266 ; If the length + index exceeds the bottom 64 bits the result is undefined
    267 define <2 x i64> @insertqi_len32_idx48(<2 x i64> %a, <2 x i64> %b) {
    268 ; ALL-LABEL: insertqi_len32_idx48:
    269 ; ALL:       # %bb.0:
    270 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
    271 ; ALL-NEXT:    retq
    272   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 32, i8 48)
    273   ret <2 x i64> %1
    274 }
    275 
    276 define <16 x i8> @shuf_0_0_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
    277 ; ALL-LABEL: shuf_0_0_2_3_uuuu_uuuu_uuuu:
    278 ; ALL:       # %bb.0:
    279 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
    280 ; ALL-NEXT:    retq
    281   %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 0, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    282   ret <16 x i8> %s
    283 }
    284 
    285 define <16 x i8> @shuf_0_16_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
    286 ; ALL-LABEL: shuf_0_16_2_3_uuuu_uuuu_uuuu:
    287 ; ALL:       # %bb.0:
    288 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3,4,5,6,7,u,u,u,u,u,u,u,u]
    289 ; ALL-NEXT:    retq
    290   %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    291   ret <16 x i8> %s
    292 }
    293 
    294 define <16 x i8> @shuf_16_1_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
    295 ; ALL-LABEL: shuf_16_1_2_3_uuuu_uuuu_uuuu:
    296 ; ALL:       # %bb.0:
    297 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
    298 ; ALL-NEXT:    retq
    299   %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    300   ret <16 x i8> %s
    301 }
    302 
    303 define <8 x i16> @shuf_0823uuuu(<8 x i16> %a0, <8 x i16> %a1) {
    304 ; ALL-LABEL: shuf_0823uuuu:
    305 ; ALL:       # %bb.0:
    306 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1],xmm0[4,5,6,7,u,u,u,u,u,u,u,u]
    307 ; ALL-NEXT:    retq
    308   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    309   ret <8 x i16> %s
    310 }
    311 
    312 define <8 x i16> @shuf_0183uuuu(<8 x i16> %a0, <8 x i16> %a1) {
    313 ; ALL-LABEL: shuf_0183uuuu:
    314 ; ALL:       # %bb.0:
    315 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[0,1],xmm0[6,7,u,u,u,u,u,u,u,u]
    316 ; ALL-NEXT:    retq
    317   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    318   ret <8 x i16> %s
    319 }
    320 
    321 define <8 x i16> @shuf_0128uuuu(<8 x i16> %a0, <8 x i16> %a1) {
    322 ; ALL-LABEL: shuf_0128uuuu:
    323 ; ALL:       # %bb.0:
    324 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[0,1],xmm0[u,u,u,u,u,u,u,u]
    325 ; ALL-NEXT:    retq
    326   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
    327   ret <8 x i16> %s
    328 }
    329 
    330 define <8 x i16> @shuf_0893uuuu(<8 x i16> %a0, <8 x i16> %a1) {
    331 ; ALL-LABEL: shuf_0893uuuu:
    332 ; ALL:       # %bb.0:
    333 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u]
    334 ; ALL-NEXT:    retq
    335   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    336   ret <8 x i16> %s
    337 }
    338 
    339 define <8 x i16> @shuf_089Auuuu(<8 x i16> %a0, <8 x i16> %a1) {
    340 ; ALL-LABEL: shuf_089Auuuu:
    341 ; ALL:       # %bb.0:
    342 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3,4,5],xmm0[u,u,u,u,u,u,u,u]
    343 ; ALL-NEXT:    retq
    344   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
    345   ret <8 x i16> %s
    346 }
    347 
    348 define <8 x i16> @shuf_089uuuuu(<8 x i16> %a0, <8 x i16> %a1) {
    349 ; ALL-LABEL: shuf_089uuuuu:
    350 ; ALL:       # %bb.0:
    351 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u]
    352 ; ALL-NEXT:    retq
    353   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    354   ret <8 x i16> %s
    355 }
    356 
    357 ;
    358 ; Special Cases
    359 ;
    360 
    361 ; Out of range.
    362 define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) {
    363 ; AMD10H-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
    364 ; AMD10H:       # %bb.0:
    365 ; AMD10H-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    366 ; AMD10H-NEXT:    andpd {{.*}}(%rip), %xmm0
    367 ; AMD10H-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    368 ; AMD10H-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
    369 ; AMD10H-NEXT:    packuswb %xmm0, %xmm0
    370 ; AMD10H-NEXT:    retq
    371 ;
    372 ; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
    373 ; BTVER1:       # %bb.0:
    374 ; BTVER1-NEXT:    psrld $16, %xmm1
    375 ; BTVER1-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    376 ; BTVER1-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    377 ; BTVER1-NEXT:    retq
    378 ;
    379 ; BTVER2-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
    380 ; BTVER2:       # %bb.0:
    381 ; BTVER2-NEXT:    vpsrld $16, %xmm1, %xmm1
    382 ; BTVER2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    383 ; BTVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    384 ; BTVER2-NEXT:    retq
    385   %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    386   ret <16 x i8> %1
    387 }
    388 
    389 define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
    390 ; AMD10H-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
    391 ; AMD10H:       # %bb.0:
    392 ; AMD10H-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    393 ; AMD10H-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    394 ; AMD10H-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
    395 ; AMD10H-NEXT:    retq
    396 ;
    397 ; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
    398 ; BTVER1:       # %bb.0:
    399 ; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
    400 ; BTVER1-NEXT:    retq
    401 ;
    402 ; BTVER2-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
    403 ; BTVER2:       # %bb.0:
    404 ; BTVER2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
    405 ; BTVER2-NEXT:    retq
    406   %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 0, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    407   ret <16 x i8> %1
    408 }
    409 
    410 define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
    411 ; AMD10H-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
    412 ; AMD10H:       # %bb.0:
    413 ; AMD10H-NEXT:    psrlq $16, %xmm0
    414 ; AMD10H-NEXT:    pand {{.*}}(%rip), %xmm0
    415 ; AMD10H-NEXT:    retq
    416 ;
    417 ; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
    418 ; BTVER1:       # %bb.0:
    419 ; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
    420 ; BTVER1-NEXT:    retq
    421 ;
    422 ; BTVER2-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
    423 ; BTVER2:       # %bb.0:
    424 ; BTVER2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
    425 ; BTVER2-NEXT:    retq
    426   %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 16, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    427   ret <16 x i8> %1
    428 }
    429 
    430 define <16 x i8> @shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
    431 ; ALL-LABEL: shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
    432 ; ALL:       # %bb.0:
    433 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3,4],zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    434 ; ALL-NEXT:    retq
    435   %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 undef, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    436   ret <16 x i8> %1
    437 }
    438 
    439 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
    440 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
    441