Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=X64
      4 
      5 ; These are tests for SSE3 codegen.
      6 
      7 ; Test for v8xi16 lowering where we extract the first element of the vector and
      8 ; placed it in the second element of the result.
      9 
     10 define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
     11 ; X86-LABEL: t0:
     12 ; X86:       # %bb.0: # %entry
     13 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     14 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     15 ; X86-NEXT:    movl $1, %edx
     16 ; X86-NEXT:    movd %edx, %xmm0
     17 ; X86-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
     18 ; X86-NEXT:    movdqa %xmm0, (%eax)
     19 ; X86-NEXT:    retl
     20 ;
     21 ; X64-LABEL: t0:
     22 ; X64:       # %bb.0: # %entry
     23 ; X64-NEXT:    movl $1, %eax
     24 ; X64-NEXT:    movd %eax, %xmm0
     25 ; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
     26 ; X64-NEXT:    movdqa %xmm0, (%rdi)
     27 ; X64-NEXT:    retq
     28 entry:
     29 	%tmp3 = load <8 x i16>, <8 x i16>* %old
     30 	%tmp6 = shufflevector <8 x i16> %tmp3,
     31                 <8 x i16> < i16 1, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
     32                 <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef  >
     33 	store <8 x i16> %tmp6, <8 x i16>* %dest
     34 	ret void
     35 }
     36 
     37 define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     38 ; X86-LABEL: t1:
     39 ; X86:       # %bb.0:
     40 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     41 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     42 ; X86-NEXT:    movaps {{.*#+}} xmm0 = [0,65535,65535,65535,65535,65535,65535,65535]
     43 ; X86-NEXT:    movaps %xmm0, %xmm1
     44 ; X86-NEXT:    andnps (%ecx), %xmm1
     45 ; X86-NEXT:    andps (%eax), %xmm0
     46 ; X86-NEXT:    orps %xmm1, %xmm0
     47 ; X86-NEXT:    retl
     48 ;
     49 ; X64-LABEL: t1:
     50 ; X64:       # %bb.0:
     51 ; X64-NEXT:    movaps {{.*#+}} xmm0 = [0,65535,65535,65535,65535,65535,65535,65535]
     52 ; X64-NEXT:    movaps %xmm0, %xmm1
     53 ; X64-NEXT:    andnps (%rsi), %xmm1
     54 ; X64-NEXT:    andps (%rdi), %xmm0
     55 ; X64-NEXT:    orps %xmm1, %xmm0
     56 ; X64-NEXT:    retq
     57 	%tmp1 = load <8 x i16>, <8 x i16>* %A
     58 	%tmp2 = load <8 x i16>, <8 x i16>* %B
     59 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
     60 	ret <8 x i16> %tmp3
     61 
     62 }
     63 
     64 define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
     65 ; X86-LABEL: t2:
     66 ; X86:       # %bb.0:
     67 ; X86-NEXT:    movdqa {{.*#+}} xmm2 = [0,65535,65535,0,65535,65535,65535,65535]
     68 ; X86-NEXT:    pand %xmm2, %xmm0
     69 ; X86-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[1,1,2,1,4,5,6,7]
     70 ; X86-NEXT:    pandn %xmm1, %xmm2
     71 ; X86-NEXT:    por %xmm2, %xmm0
     72 ; X86-NEXT:    retl
     73 ;
     74 ; X64-LABEL: t2:
     75 ; X64:       # %bb.0:
     76 ; X64-NEXT:    movdqa {{.*#+}} xmm2 = [0,65535,65535,0,65535,65535,65535,65535]
     77 ; X64-NEXT:    pand %xmm2, %xmm0
     78 ; X64-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[1,1,2,1,4,5,6,7]
     79 ; X64-NEXT:    pandn %xmm1, %xmm2
     80 ; X64-NEXT:    por %xmm2, %xmm0
     81 ; X64-NEXT:    retq
     82 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
     83 	ret <8 x i16> %tmp
     84 }
     85 
     86 define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
     87 ; X86-LABEL: t3:
     88 ; X86:       # %bb.0:
     89 ; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
     90 ; X86-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,5]
     91 ; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
     92 ; X86-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
     93 ; X86-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
     94 ; X86-NEXT:    retl
     95 ;
     96 ; X64-LABEL: t3:
     97 ; X64:       # %bb.0:
     98 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
     99 ; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,5]
    100 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
    101 ; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
    102 ; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
    103 ; X64-NEXT:    retq
    104 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
    105 	ret <8 x i16> %tmp
    106 }
    107 
    108 define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
    109 ; X86-LABEL: t4:
    110 ; X86:       # %bb.0:
    111 ; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
    112 ; X86-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
    113 ; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
    114 ; X86-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,7,4,7]
    115 ; X86-NEXT:    retl
    116 ;
    117 ; X64-LABEL: t4:
    118 ; X64:       # %bb.0:
    119 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
    120 ; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
    121 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
    122 ; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,7,4,7]
    123 ; X64-NEXT:    retq
    124 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
    125 	ret <8 x i16> %tmp
    126 }
    127 
    128 define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
    129 ; X86-LABEL: t5:
    130 ; X86:       # %bb.0:
    131 ; X86-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
    132 ; X86-NEXT:    movaps %xmm1, %xmm0
    133 ; X86-NEXT:    retl
    134 ;
    135 ; X64-LABEL: t5:
    136 ; X64:       # %bb.0:
    137 ; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
    138 ; X64-NEXT:    movaps %xmm1, %xmm0
    139 ; X64-NEXT:    retq
    140 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
    141 	ret <8 x i16> %tmp
    142 }
    143 
    144 define <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind {
    145 ; X86-LABEL: t6:
    146 ; X86:       # %bb.0:
    147 ; X86-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    148 ; X86-NEXT:    retl
    149 ;
    150 ; X64-LABEL: t6:
    151 ; X64:       # %bb.0:
    152 ; X64-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
    153 ; X64-NEXT:    retq
    154 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
    155 	ret <8 x i16> %tmp
    156 }
    157 
    158 define <8 x i16> @t7(<8 x i16> %A, <8 x i16> %B) nounwind {
    159 ; X86-LABEL: t7:
    160 ; X86:       # %bb.0:
    161 ; X86-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
    162 ; X86-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
    163 ; X86-NEXT:    retl
    164 ;
    165 ; X64-LABEL: t7:
    166 ; X64:       # %bb.0:
    167 ; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
    168 ; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
    169 ; X64-NEXT:    retq
    170 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
    171 	ret <8 x i16> %tmp
    172 }
    173 
    174 define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
    175 ; X86-LABEL: t8:
    176 ; X86:       # %bb.0:
    177 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    178 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    179 ; X86-NEXT:    pshuflw {{.*#+}} xmm0 = mem[2,1,0,3,4,5,6,7]
    180 ; X86-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
    181 ; X86-NEXT:    movdqa %xmm0, (%eax)
    182 ; X86-NEXT:    retl
    183 ;
    184 ; X64-LABEL: t8:
    185 ; X64:       # %bb.0:
    186 ; X64-NEXT:    pshuflw {{.*#+}} xmm0 = mem[2,1,0,3,4,5,6,7]
    187 ; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
    188 ; X64-NEXT:    movdqa %xmm0, (%rdi)
    189 ; X64-NEXT:    retq
    190 	%tmp = load <2 x i64>, <2 x i64>* %A
    191 	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>
    192 	%tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0
    193 	%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1
    194 	%tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2
    195 	%tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3
    196 	%tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 4
    197 	%tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5
    198 	%tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 6
    199 	%tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7
    200 	%tmp8 = insertelement <8 x i16> undef, i16 %tmp2, i32 0
    201 	%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1
    202 	%tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp0, i32 2
    203 	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3
    204 	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp6, i32 4
    205 	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5
    206 	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp4, i32 6
    207 	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7
    208 	%tmp15.upgrd.2 = bitcast <8 x i16> %tmp15 to <2 x i64>
    209 	store <2 x i64> %tmp15.upgrd.2, <2 x i64>* %res
    210 	ret void
    211 }
    212 
    213 define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
    214 ; X86-LABEL: t9:
    215 ; X86:       # %bb.0:
    216 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    217 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    218 ; X86-NEXT:    movapd (%ecx), %xmm0
    219 ; X86-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
    220 ; X86-NEXT:    movapd %xmm0, (%ecx)
    221 ; X86-NEXT:    retl
    222 ;
    223 ; X64-LABEL: t9:
    224 ; X64:       # %bb.0:
    225 ; X64-NEXT:    movapd (%rdi), %xmm0
    226 ; X64-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
    227 ; X64-NEXT:    movapd %xmm0, (%rdi)
    228 ; X64-NEXT:    retq
    229 	%tmp = load <4 x float>, <4 x float>* %r
    230 	%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
    231 	%tmp.upgrd.4 = load double, double* %tmp.upgrd.3
    232 	%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0
    233 	%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1
    234 	%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>
    235 	%tmp.upgrd.6 = extractelement <4 x float> %tmp, i32 0
    236 	%tmp7 = extractelement <4 x float> %tmp, i32 1
    237 	%tmp8 = extractelement <4 x float> %tmp6, i32 0
    238 	%tmp9 = extractelement <4 x float> %tmp6, i32 1
    239 	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.6, i32 0
    240 	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1
    241 	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2
    242 	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3
    243 	store <4 x float> %tmp13, <4 x float>* %r
    244 	ret void
    245 }
    246 
    247 
    248 
    249 ; FIXME: This testcase produces icky code. It can be made much better!
    250 ; PR2585
    251 
    252 @g1 = external constant <4 x i32>
    253 @g2 = external constant <4 x i16>
    254 
    255 define void @t10() nounwind {
    256 ; X86-LABEL: t10:
    257 ; X86:       # %bb.0:
    258 ; X86-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
    259 ; X86-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
    260 ; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    261 ; X86-NEXT:    movq %xmm0, g2
    262 ; X86-NEXT:    retl
    263 ;
    264 ; X64-LABEL: t10:
    265 ; X64:       # %bb.0:
    266 ; X64-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
    267 ; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
    268 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    269 ; X64-NEXT:    movq %xmm0, {{.*}}(%rip)
    270 ; X64-NEXT:    retq
    271   load <4 x i32>, <4 x i32>* @g1, align 16
    272   bitcast <4 x i32> %1 to <8 x i16>
    273   shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
    274   bitcast <8 x i16> %3 to <2 x i64>
    275   extractelement <2 x i64> %4, i32 0
    276   bitcast i64 %5 to <4 x i16>
    277   store <4 x i16> %6, <4 x i16>* @g2, align 8
    278   ret void
    279 }
    280 
    281 ; Pack various elements via shuffles.
    282 define <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
    283 ; X86-LABEL: t11:
    284 ; X86:       # %bb.0: # %entry
    285 ; X86-NEXT:    psrld $16, %xmm0
    286 ; X86-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    287 ; X86-NEXT:    retl
    288 ;
    289 ; X64-LABEL: t11:
    290 ; X64:       # %bb.0: # %entry
    291 ; X64-NEXT:    psrld $16, %xmm0
    292 ; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    293 ; X64-NEXT:    retq
    294 entry:
    295 	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
    296 	ret <8 x i16> %tmp7
    297 
    298 }
    299 
    300 define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
    301 ; X86-LABEL: t12:
    302 ; X86:       # %bb.0: # %entry
    303 ; X86-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    304 ; X86-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
    305 ; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
    306 ; X86-NEXT:    retl
    307 ;
    308 ; X64-LABEL: t12:
    309 ; X64:       # %bb.0: # %entry
    310 ; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    311 ; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
    312 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
    313 ; X64-NEXT:    retq
    314 entry:
    315 	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
    316 	ret <8 x i16> %tmp9
    317 
    318 }
    319 
    320 define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
    321 ; X86-LABEL: t13:
    322 ; X86:       # %bb.0: # %entry
    323 ; X86-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    324 ; X86-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
    325 ; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
    326 ; X86-NEXT:    retl
    327 ;
    328 ; X64-LABEL: t13:
    329 ; X64:       # %bb.0: # %entry
    330 ; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    331 ; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
    332 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
    333 ; X64-NEXT:    retq
    334 entry:
    335 	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
    336 	ret <8 x i16> %tmp9
    337 }
    338 
    339 define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
    340 ; X86-LABEL: t14:
    341 ; X86:       # %bb.0: # %entry
    342 ; X86-NEXT:    psrlq $16, %xmm0
    343 ; X86-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    344 ; X86-NEXT:    movdqa %xmm1, %xmm0
    345 ; X86-NEXT:    retl
    346 ;
    347 ; X64-LABEL: t14:
    348 ; X64:       # %bb.0: # %entry
    349 ; X64-NEXT:    psrlq $16, %xmm0
    350 ; X64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    351 ; X64-NEXT:    movdqa %xmm1, %xmm0
    352 ; X64-NEXT:    retq
    353 entry:
    354 	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef >
    355 	ret <8 x i16> %tmp9
    356 }
    357 
    358 ; FIXME: t15 is worse off from disabling of scheduler 2-address hack.
    359 define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
    360 ; X86-LABEL: t15:
    361 ; X86:       # %bb.0: # %entry
    362 ; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
    363 ; X86-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
    364 ; X86-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    365 ; X86-NEXT:    retl
    366 ;
    367 ; X64-LABEL: t15:
    368 ; X64:       # %bb.0: # %entry
    369 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
    370 ; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
    371 ; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    372 ; X64-NEXT:    retq
    373 entry:
    374   %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
    375   ret <8 x i16> %tmp8
    376 }
    377 
    378 ; Test yonah where we convert a shuffle to pextrw and pinrsw
    379 define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone {
    380 ; X86-LABEL: t16:
    381 ; X86:       # %bb.0: # %entry
    382 ; X86-NEXT:    pslld $16, %xmm0
    383 ; X86-NEXT:    retl
    384 ;
    385 ; X64-LABEL: t16:
    386 ; X64:       # %bb.0: # %entry
    387 ; X64-NEXT:    pslld $16, %xmm0
    388 ; X64-NEXT:    retq
    389 entry:
    390   %tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0,  i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
    391   %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
    392   ret <16 x i8> %tmp9
    393 }
    394 
    395 ; rdar://8520311
    396 define <4 x i32> @t17() nounwind {
    397 ; X86-LABEL: t17:
    398 ; X86:       # %bb.0: # %entry
    399 ; X86-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
    400 ; X86-NEXT:    andpd {{\.LCPI.*}}, %xmm0
    401 ; X86-NEXT:    retl
    402 ;
    403 ; X64-LABEL: t17:
    404 ; X64:       # %bb.0: # %entry
    405 ; X64-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
    406 ; X64-NEXT:    andpd {{.*}}(%rip), %xmm0
    407 ; X64-NEXT:    retq
    408 entry:
    409   %tmp1 = load <4 x float>, <4 x float>* undef, align 16
    410   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
    411   %tmp3 = load <4 x float>, <4 x float>* undef, align 16
    412   %tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
    413   %tmp5 = bitcast <4 x float> %tmp3 to <4 x i32>
    414   %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
    415   %tmp7 = and <4 x i32> %tmp6, <i32 undef, i32 undef, i32 -1, i32 0>
    416   ret <4 x i32> %tmp7
    417 }
    418