Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER1
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER2
      4 
      5 ;
      6 ; EXTRQI
      7 ;
      8 
      9 ; A length of zero is equivalent to a bit length of 64.
     10 define <2 x i64> @extrqi_len0_idx0(<2 x i64> %a) {
     11 ; ALL-LABEL: extrqi_len0_idx0:
     12 ; ALL:       # BB#0:
     13 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
     14 ; ALL-NEXT:    retq
     15   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 0, i8 0)
     16   ret <2 x i64> %1
     17 }
     18 
     19 define <2 x i64> @extrqi_len8_idx16(<2 x i64> %a) {
     20 ; ALL-LABEL: extrqi_len8_idx16:
     21 ; ALL:       # BB#0:
     22 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
     23 ; ALL-NEXT:    retq
     24   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 8, i8 16)
     25   ret <2 x i64> %1
     26 }
     27 
     28 ; If the length + index exceeds the bottom 64 bits the result is undefined.
     29 define <2 x i64> @extrqi_len32_idx48(<2 x i64> %a) {
     30 ; ALL-LABEL: extrqi_len32_idx48:
     31 ; ALL:       # BB#0:
     32 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
     33 ; ALL-NEXT:    retq
     34   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 32, i8 48)
     35   ret <2 x i64> %1
     36 }
     37 
     38 define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) {
     39 ; BTVER1-LABEL: shuf_0zzzuuuuuuuuuuuu:
     40 ; BTVER1:       # BB#0:
     41 ; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
     42 ; BTVER1-NEXT:    retq
     43 ;
     44 ; BTVER2-LABEL: shuf_0zzzuuuuuuuuuuuu:
     45 ; BTVER2:       # BB#0:
     46 ; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
     47 ; BTVER2-NEXT:    retq
     48   %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     49   ret <16 x i8> %s
     50 }
     51 
     52 define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) {
     53 ; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz:
     54 ; BTVER1:       # BB#0:
     55 ; BTVER1-NEXT:    movaps %xmm0, %xmm1
     56 ; BTVER1-NEXT:    extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
     57 ; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
     58 ; BTVER1-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     59 ; BTVER1-NEXT:    retq
     60 ;
     61 ; BTVER2-LABEL: shuf_0zzzzzzz1zzzzzzz:
     62 ; BTVER2:       # BB#0:
     63 ; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
     64 ; BTVER2-NEXT:    retq
     65   %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
     66   ret <16 x i8> %s
     67 }
     68 
     69 define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) {
     70 ; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz:
     71 ; BTVER1:       # BB#0:
     72 ; BTVER1-NEXT:    movaps %xmm0, %xmm1
     73 ; BTVER1-NEXT:    extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
     74 ; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
     75 ; BTVER1-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     76 ; BTVER1-NEXT:    retq
     77 ;
     78 ; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz:
     79 ; BTVER2:       # BB#0:
     80 ; BTVER2-NEXT:    vpsrld $16, %xmm0, %xmm0
     81 ; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
     82 ; BTVER2-NEXT:    retq
     83   %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
     84   ret <16 x i8> %s
     85 }
     86 
     87 define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) {
     88 ; BTVER1-LABEL: shuf_01zzuuuuuuuuuuuu:
     89 ; BTVER1:       # BB#0:
     90 ; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
     91 ; BTVER1-NEXT:    retq
     92 ;
     93 ; BTVER2-LABEL: shuf_01zzuuuuuuuuuuuu:
     94 ; BTVER2:       # BB#0:
     95 ; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
     96 ; BTVER2-NEXT:    retq
     97   %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     98   ret <16 x i8> %s
     99 }
    100 
    101 define <16 x i8> @shuf_01zzzzzz23zzzzzz(<16 x i8> %a0) {
    102 ; BTVER1-LABEL: shuf_01zzzzzz23zzzzzz:
    103 ; BTVER1:       # BB#0:
    104 ; BTVER1-NEXT:    movaps %xmm0, %xmm1
    105 ; BTVER1-NEXT:    extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
    106 ; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    107 ; BTVER1-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    108 ; BTVER1-NEXT:    retq
    109 ;
    110 ; BTVER2-LABEL: shuf_01zzzzzz23zzzzzz:
    111 ; BTVER2:       # BB#0:
    112 ; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    113 ; BTVER2-NEXT:    retq
    114   %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
    115   ret <16 x i8> %s
    116 }
    117 
    118 define <16 x i8> @shuf_1zzzuuuuuuuuuuuu(<16 x i8> %a0) {
    119 ; ALL-LABEL: shuf_1zzzuuuuuuuuuuuu:
    120 ; ALL:       # BB#0:
    121 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    122 ; ALL-NEXT:    retq
    123   %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    124   ret <16 x i8> %s
    125 }
    126 
    127 define <8 x i16> @shuf_1zzzuuuu(<8 x i16> %a0) {
    128 ; ALL-LABEL: shuf_1zzzuuuu:
    129 ; ALL:       # BB#0:
    130 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    131 ; ALL-NEXT:    retq
    132   %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
    133   ret <8 x i16> %s
    134 }
    135 
    136 define <8 x i16> @shuf_12zzuuuu(<8 x i16> %a0) {
    137 ; ALL-LABEL: shuf_12zzuuuu:
    138 ; ALL:       # BB#0:
    139 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3,4,5],zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    140 ; ALL-NEXT:    retq
    141   %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
    142   ret <8 x i16> %s
    143 }
    144 
    145 define <8 x i16> @shuf_012zuuuu(<8 x i16> %a0) {
    146 ; ALL-LABEL: shuf_012zuuuu:
    147 ; ALL:       # BB#0:
    148 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
    149 ; ALL-NEXT:    retq
    150   %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
    151   ret <8 x i16> %s
    152 }
    153 
    154 define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) {
    155 ; BTVER1-LABEL: shuf_0zzz1zzz:
    156 ; BTVER1:       # BB#0:
    157 ; BTVER1-NEXT:    movaps %xmm0, %xmm1
    158 ; BTVER1-NEXT:    extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
    159 ; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    160 ; BTVER1-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    161 ; BTVER1-NEXT:    retq
    162 ;
    163 ; BTVER2-LABEL: shuf_0zzz1zzz:
    164 ; BTVER2:       # BB#0:
    165 ; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    166 ; BTVER2-NEXT:    retq
    167   %s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8>
    168   ret <8 x i16> %s
    169 }
    170 
    171 define <4 x i32> @shuf_0z1z(<4 x i32> %a0) {
    172 ; BTVER1-LABEL: shuf_0z1z:
    173 ; BTVER1:       # BB#0:
    174 ; BTVER1-NEXT:    pxor %xmm1, %xmm1
    175 ; BTVER1-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    176 ; BTVER1-NEXT:    retq
    177 ;
    178 ; BTVER2-LABEL: shuf_0z1z:
    179 ; BTVER2:       # BB#0:
    180 ; BTVER2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    181 ; BTVER2-NEXT:    retq
    182   %s = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
    183   ret <4 x i32> %s
    184 }
    185 
    186 ;
    187 ; INSERTQI
    188 ;
    189 
    190 ; A length of zero is equivalent to a bit length of 64.
    191 define <2 x i64> @insertqi_len0_idx0(<2 x i64> %a, <2 x i64> %b) {
    192 ; ALL-LABEL: insertqi_len0_idx0:
    193 ; ALL:       # BB#0:
    194 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6,7],xmm0[u,u,u,u,u,u,u,u]
    195 ; ALL-NEXT:    retq
    196   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 0, i8 0)
    197   ret <2 x i64> %1
    198 }
    199 
    200 define <2 x i64> @insertqi_len8_idx16(<2 x i64> %a, <2 x i64> %b) {
    201 ; ALL-LABEL: insertqi_len8_idx16:
    202 ; ALL:       # BB#0:
    203 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
    204 ; ALL-NEXT:    retq
    205   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 8, i8 16)
    206   ret <2 x i64> %1
    207 }
    208 
    209 ; If the length + index exceeds the bottom 64 bits the result is undefined
    210 define <2 x i64> @insertqi_len32_idx48(<2 x i64> %a, <2 x i64> %b) {
    211 ; ALL-LABEL: insertqi_len32_idx48:
    212 ; ALL:       # BB#0:
    213 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
    214 ; ALL-NEXT:    retq
    215   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 32, i8 48)
    216   ret <2 x i64> %1
    217 }
    218 
    219 define <16 x i8> @shuf_0_0_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
    220 ; ALL-LABEL: shuf_0_0_2_3_uuuu_uuuu_uuuu:
    221 ; ALL:       # BB#0:
    222 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
    223 ; ALL-NEXT:    retq
    224   %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 0, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    225   ret <16 x i8> %s
    226 }
    227 
    228 define <16 x i8> @shuf_0_16_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
    229 ; ALL-LABEL: shuf_0_16_2_3_uuuu_uuuu_uuuu:
    230 ; ALL:       # BB#0:
    231 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3,4,5,6,7,u,u,u,u,u,u,u,u]
    232 ; ALL-NEXT:    retq
    233   %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    234   ret <16 x i8> %s
    235 }
    236 
    237 define <16 x i8> @shuf_16_1_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
    238 ; ALL-LABEL: shuf_16_1_2_3_uuuu_uuuu_uuuu:
    239 ; ALL:       # BB#0:
    240 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
    241 ; ALL-NEXT:    retq
    242   %s = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    243   ret <16 x i8> %s
    244 }
    245 
    246 define <8 x i16> @shuf_0823uuuu(<8 x i16> %a0, <8 x i16> %a1) {
    247 ; ALL-LABEL: shuf_0823uuuu:
    248 ; ALL:       # BB#0:
    249 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1],xmm0[4,5,6,7,u,u,u,u,u,u,u,u]
    250 ; ALL-NEXT:    retq
    251   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    252   ret <8 x i16> %s
    253 }
    254 
    255 define <8 x i16> @shuf_0183uuuu(<8 x i16> %a0, <8 x i16> %a1) {
    256 ; ALL-LABEL: shuf_0183uuuu:
    257 ; ALL:       # BB#0:
    258 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[0,1],xmm0[6,7,u,u,u,u,u,u,u,u]
    259 ; ALL-NEXT:    retq
    260   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    261   ret <8 x i16> %s
    262 }
    263 
    264 define <8 x i16> @shuf_0128uuuu(<8 x i16> %a0, <8 x i16> %a1) {
    265 ; ALL-LABEL: shuf_0128uuuu:
    266 ; ALL:       # BB#0:
    267 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[0,1],xmm0[u,u,u,u,u,u,u,u]
    268 ; ALL-NEXT:    retq
    269   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
    270   ret <8 x i16> %s
    271 }
    272 
    273 define <8 x i16> @shuf_0893uuuu(<8 x i16> %a0, <8 x i16> %a1) {
    274 ; ALL-LABEL: shuf_0893uuuu:
    275 ; ALL:       # BB#0:
    276 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u]
    277 ; ALL-NEXT:    retq
    278   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    279   ret <8 x i16> %s
    280 }
    281 
    282 define <8 x i16> @shuf_089Auuuu(<8 x i16> %a0, <8 x i16> %a1) {
    283 ; ALL-LABEL: shuf_089Auuuu:
    284 ; ALL:       # BB#0:
    285 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3,4,5],xmm0[u,u,u,u,u,u,u,u]
    286 ; ALL-NEXT:    retq
    287   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
    288   ret <8 x i16> %s
    289 }
    290 
    291 define <8 x i16> @shuf_089uuuuu(<8 x i16> %a0, <8 x i16> %a1) {
    292 ; ALL-LABEL: shuf_089uuuuu:
    293 ; ALL:       # BB#0:
    294 ; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1,2,3],xmm0[6,7,u,u,u,u,u,u,u,u]
    295 ; ALL-NEXT:    retq
    296   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    297   ret <8 x i16> %s
    298 }
    299 
    300 ;
    301 ; Special Cases
    302 ;
    303 
    304 ; Out of range.
    305 define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) {
    306 ; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
    307 ; BTVER1:       # BB#0:
    308 ; BTVER1-NEXT:    psrld $16, %xmm1
    309 ; BTVER1-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    310 ; BTVER1-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    311 ; BTVER1-NEXT:    retq
    312 ;
    313 ; BTVER2-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
    314 ; BTVER2:       # BB#0:
    315 ; BTVER2-NEXT:    vpsrld $16, %xmm1, %xmm1
    316 ; BTVER2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    317 ; BTVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    318 ; BTVER2-NEXT:    retq
    319   %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    320   ret <16 x i8> %1
    321 }
    322 
    323 define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
    324 ; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
    325 ; BTVER1:       # BB#0:
    326 ; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
    327 ; BTVER1-NEXT:    retq
    328 ;
    329 ; BTVER2-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
    330 ; BTVER2:       # BB#0:
    331 ; BTVER2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
    332 ; BTVER2-NEXT:    retq
    333   %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 0, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    334   ret <16 x i8> %1
    335 }
    336 
    337 define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
    338 ; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
    339 ; BTVER1:       # BB#0:
    340 ; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
    341 ; BTVER1-NEXT:    retq
    342 ;
    343 ; BTVER2-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
    344 ; BTVER2:       # BB#0:
    345 ; BTVER2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
    346 ; BTVER2-NEXT:    retq
    347   %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 16, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    348   ret <16 x i8> %1
    349 }
    350 
    351 define <16 x i8> @shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
    352 ; ALL-LABEL: shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
    353 ; ALL:       # BB#0:
    354 ; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3,4],zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
    355 ; ALL-NEXT:    retq
    356   %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 undef, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    357   ret <16 x i8> %1
    358 }
    359 
    360 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
    361 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
    362