Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; Tests for SSE2 and below, without SSE3+.
      3 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
      4 
      5 define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
      6 ; CHECK-LABEL: test1:
      7 ; CHECK:       ## BB#0:
      8 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
      9 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     10 ; CHECK-NEXT:    movapd (%ecx), %xmm0
     11 ; CHECK-NEXT:    movlpd {{[0-9]+}}(%esp), %xmm0
     12 ; CHECK-NEXT:    movapd %xmm0, (%eax)
     13 ; CHECK-NEXT:    retl
     14 	%tmp3 = load <2 x double>, <2 x double>* %A, align 16
     15 	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
     16 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
     17 	store <2 x double> %tmp9, <2 x double>* %r, align 16
     18 	ret void
     19 }
     20 
     21 define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
     22 ; CHECK-LABEL: test2:
     23 ; CHECK:       ## BB#0:
     24 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     25 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     26 ; CHECK-NEXT:    movapd (%ecx), %xmm0
     27 ; CHECK-NEXT:    movhpd {{[0-9]+}}(%esp), %xmm0
     28 ; CHECK-NEXT:    movapd %xmm0, (%eax)
     29 ; CHECK-NEXT:    retl
     30 	%tmp3 = load <2 x double>, <2 x double>* %A, align 16
     31 	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
     32 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
     33 	store <2 x double> %tmp9, <2 x double>* %r, align 16
     34 	ret void
     35 }
     36 
     37 
     38 define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind {
     39 ; CHECK-LABEL: test3:
     40 ; CHECK:       ## BB#0:
     41 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     42 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     43 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
     44 ; CHECK-NEXT:    movaps (%edx), %xmm0
     45 ; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
     46 ; CHECK-NEXT:    movaps %xmm0, (%eax)
     47 ; CHECK-NEXT:    retl
     48 	%tmp = load <4 x float>, <4 x float>* %B		; <<4 x float>> [#uses=2]
     49 	%tmp3 = load <4 x float>, <4 x float>* %A		; <<4 x float>> [#uses=2]
     50 	%tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0		; <float> [#uses=1]
     51 	%tmp7 = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
     52 	%tmp8 = extractelement <4 x float> %tmp3, i32 1		; <float> [#uses=1]
     53 	%tmp9 = extractelement <4 x float> %tmp, i32 1		; <float> [#uses=1]
     54 	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0		; <<4 x float>> [#uses=1]
     55 	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1		; <<4 x float>> [#uses=1]
     56 	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2		; <<4 x float>> [#uses=1]
     57 	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3		; <<4 x float>> [#uses=1]
     58 	store <4 x float> %tmp13, <4 x float>* %res
     59 	ret void
     60 }
     61 
     62 define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
     63 ; CHECK-LABEL: test4:
     64 ; CHECK:       ## BB#0:
     65 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     66 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,1,3,3]
     67 ; CHECK-NEXT:    movaps %xmm0, (%eax)
     68 ; CHECK-NEXT:    retl
     69 	%tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
     70 	store <4 x float> %tmp5, <4 x float>* %res
     71 	ret void
     72 }
     73 
     74 define <4 x i32> @test5(i8** %ptr) nounwind {
     75 ; CHECK-LABEL: test5:
     76 ; CHECK:       ## BB#0:
     77 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     78 ; CHECK-NEXT:    movl (%eax), %eax
     79 ; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
     80 ; CHECK-NEXT:    pxor %xmm0, %xmm0
     81 ; CHECK-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
     82 ; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     83 ; CHECK-NEXT:    retl
     84 	%tmp = load i8*, i8** %ptr		; <i8*> [#uses=1]
     85 	%tmp.upgrd.1 = bitcast i8* %tmp to float*		; <float*> [#uses=1]
     86 	%tmp.upgrd.2 = load float, float* %tmp.upgrd.1		; <float> [#uses=1]
     87 	%tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0		; <<4 x float>> [#uses=1]
     88 	%tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
     89 	%tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
     90 	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
     91 	%tmp21 = bitcast <4 x float> %tmp11 to <16 x i8>		; <<16 x i8>> [#uses=1]
     92 	%tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
     93 	%tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16>		; <<8 x i16>> [#uses=1]
     94 	%tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
     95 	%tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32>		; <<4 x i32>> [#uses=1]
     96 	ret <4 x i32> %tmp36
     97 }
     98 
     99 define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
    100 ; CHECK-LABEL: test6:
    101 ; CHECK:       ## BB#0:
    102 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    103 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    104 ; CHECK-NEXT:    movaps (%ecx), %xmm0
    105 ; CHECK-NEXT:    movaps %xmm0, (%eax)
    106 ; CHECK-NEXT:    retl
    107   %tmp1 = load <4 x float>, <4 x float>* %A            ; <<4 x float>> [#uses=1]
    108   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
    109   store <4 x float> %tmp2, <4 x float>* %res
    110   ret void
    111 }
    112 
    113 define void @test7() nounwind {
    114 ; CHECK-LABEL: test7:
    115 ; CHECK:       ## BB#0:
    116 ; CHECK-NEXT:    xorps %xmm0, %xmm0
    117 ; CHECK-NEXT:    movaps %xmm0, 0
    118 ; CHECK-NEXT:    retl
    119   bitcast <4 x i32> zeroinitializer to <4 x float>                ; <<4 x float>>:1 [#uses=1]
    120   shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
    121   store <4 x float> %2, <4 x float>* null
    122   ret void
    123 }
    124 
    125 @x = external global [4 x i32]
    126 
    127 define <2 x i64> @test8() nounwind {
    128 ; CHECK-LABEL: test8:
    129 ; CHECK:       ## BB#0:
    130 ; CHECK-NEXT:    movl L_x$non_lazy_ptr, %eax
    131 ; CHECK-NEXT:    movups (%eax), %xmm0
    132 ; CHECK-NEXT:    retl
    133 	%tmp = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 0)		; <i32> [#uses=1]
    134 	%tmp3 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 1)		; <i32> [#uses=1]
    135 	%tmp5 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 2)		; <i32> [#uses=1]
    136 	%tmp7 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 3)		; <i32> [#uses=1]
    137 	%tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0		; <<4 x i32>> [#uses=1]
    138 	%tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
    139 	%tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2		; <<4 x i32>> [#uses=1]
    140 	%tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3		; <<4 x i32>> [#uses=1]
    141 	%tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
    142 	ret <2 x i64> %tmp16
    143 }
    144 
    145 define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind {
    146 ; CHECK-LABEL: test9:
    147 ; CHECK:       ## BB#0:
    148 ; CHECK-NEXT:    movups {{[0-9]+}}(%esp), %xmm0
    149 ; CHECK-NEXT:    retl
    150 	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
    151 	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
    152 	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
    153 	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
    154 	ret <4 x float> %tmp13
    155 }
    156 
    157 define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind {
    158 ; CHECK-LABEL: test10:
    159 ; CHECK:       ## BB#0:
    160 ; CHECK-NEXT:    movaps {{[0-9]+}}(%esp), %xmm0
    161 ; CHECK-NEXT:    retl
    162 	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
    163 	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
    164 	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
    165 	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
    166 	ret <4 x float> %tmp13
    167 }
    168 
    169 define <2 x double> @test11(double %a, double %b) nounwind {
    170 ; CHECK-LABEL: test11:
    171 ; CHECK:       ## BB#0:
    172 ; CHECK-NEXT:    movaps {{[0-9]+}}(%esp), %xmm0
    173 ; CHECK-NEXT:    retl
    174 	%tmp = insertelement <2 x double> undef, double %a, i32 0		; <<2 x double>> [#uses=1]
    175 	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
    176 	ret <2 x double> %tmp7
    177 }
    178 
    179 define void @test12() nounwind {
    180 ; CHECK-LABEL: test12:
    181 ; CHECK:       ## BB#0:
    182 ; CHECK-NEXT:    movapd 0, %xmm0
    183 ; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
    184 ; CHECK-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    185 ; CHECK-NEXT:    xorpd %xmm2, %xmm2
    186 ; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
    187 ; CHECK-NEXT:    addps %xmm1, %xmm0
    188 ; CHECK-NEXT:    movaps %xmm0, 0
    189 ; CHECK-NEXT:    retl
    190   %tmp1 = load <4 x float>, <4 x float>* null          ; <<4 x float>> [#uses=2]
    191   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
    192   %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
    193   %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
    194   store <4 x float> %tmp4, <4 x float>* null
    195   ret void
    196 }
    197 
    198 define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
    199 ; CHECK-LABEL: test13:
    200 ; CHECK:       ## BB#0:
    201 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    202 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    203 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
    204 ; CHECK-NEXT:    movaps (%edx), %xmm0
    205 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1]
    206 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
    207 ; CHECK-NEXT:    movaps %xmm0, (%eax)
    208 ; CHECK-NEXT:    retl
    209   %tmp3 = load <4 x float>, <4 x float>* %B            ; <<4 x float>> [#uses=1]
    210   %tmp5 = load <4 x float>, <4 x float>* %C            ; <<4 x float>> [#uses=1]
    211   %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
    212   store <4 x float> %tmp11, <4 x float>* %res
    213   ret void
    214 }
    215 
    216 define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
    217 ; CHECK-LABEL: test14:
    218 ; CHECK:       ## BB#0:
    219 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    220 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    221 ; CHECK-NEXT:    movaps (%ecx), %xmm1
    222 ; CHECK-NEXT:    movaps (%eax), %xmm2
    223 ; CHECK-NEXT:    movaps %xmm2, %xmm0
    224 ; CHECK-NEXT:    addps %xmm1, %xmm0
    225 ; CHECK-NEXT:    subps %xmm1, %xmm2
    226 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
    227 ; CHECK-NEXT:    retl
    228   %tmp = load <4 x float>, <4 x float>* %y             ; <<4 x float>> [#uses=2]
    229   %tmp5 = load <4 x float>, <4 x float>* %x            ; <<4 x float>> [#uses=2]
    230   %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
    231   %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
    232   %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
    233   ret <4 x float> %tmp27
    234 }
    235 
    236 define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
    237 ; CHECK-LABEL: test15:
    238 ; CHECK:       ## BB#0: ## %entry
    239 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    240 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    241 ; CHECK-NEXT:    movapd (%ecx), %xmm0
    242 ; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
    243 ; CHECK-NEXT:    retl
    244 entry:
    245   %tmp = load <4 x float>, <4 x float>* %y             ; <<4 x float>> [#uses=1]
    246   %tmp3 = load <4 x float>, <4 x float>* %x            ; <<4 x float>> [#uses=1]
    247   %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
    248   ret <4 x float> %tmp4
    249 }
    250 
    251 ; PR8900
    252 
    253 define  <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) {
    254 ; CHECK-LABEL: test16:
    255 ; CHECK:       ## BB#0:
    256 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    257 ; CHECK-NEXT:    movapd 96(%eax), %xmm0
    258 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
    259 ; CHECK-NEXT:    retl
    260   %i5 = getelementptr inbounds <4 x double>, <4 x double>* %srcA, i32 3
    261   %i6 = load <4 x double>, <4 x double>* %i5, align 32
    262   %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
    263   ret <2 x double> %i7
    264 }
    265 
    266 ; PR9009
    267 define fastcc void @test17() nounwind {
    268 ; CHECK-LABEL: test17:
    269 ; CHECK:       ## BB#0: ## %entry
    270 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,u,32768,32768>
    271 ; CHECK-NEXT:    movaps %xmm0, (%eax)
    272 ; CHECK-NEXT:    retl
    273 entry:
    274   %0 = insertelement <4 x i32> undef, i32 undef, i32 1
    275   %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
    276   %2 = bitcast <4 x i32> %1 to <4 x float>
    277   store <4 x float> %2, <4 x float> * undef
    278   ret void
    279 }
    280 
    281 ; PR9210
    282 define <4 x float> @f(<4 x double>) nounwind {
    283 ; CHECK-LABEL: f:
    284 ; CHECK:       ## BB#0: ## %entry
    285 ; CHECK-NEXT:    cvtpd2ps %xmm1, %xmm1
    286 ; CHECK-NEXT:    cvtpd2ps %xmm0, %xmm0
    287 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    288 ; CHECK-NEXT:    retl
    289 entry:
    290  %double2float.i = fptrunc <4 x double> %0 to <4 x float>
    291  ret <4 x float> %double2float.i
    292 }
    293 
    294 define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
    295 ; CHECK-LABEL: test_insert_64_zext:
    296 ; CHECK:       ## BB#0:
    297 ; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    298 ; CHECK-NEXT:    retl
    299   %1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2>
    300   ret <2 x i64> %1
    301 }
    302 
    303 define <4 x i32> @PR19721(<4 x i32> %i) {
    304 ; CHECK-LABEL: PR19721:
    305 ; CHECK:       ## BB#0:
    306 ; CHECK-NEXT:    andps LCPI19_0, %xmm0
    307 ; CHECK-NEXT:    retl
    308   %bc = bitcast <4 x i32> %i to i128
    309   %insert = and i128 %bc, -4294967296
    310   %bc2 = bitcast i128 %insert to <4 x i32>
    311   ret <4 x i32> %bc2
    312 }
    313 
    314 define <4 x i32> @test_mul(<4 x i32> %x, <4 x i32> %y) {
    315 ; CHECK-LABEL: test_mul:
    316 ; CHECK:       ## BB#0:
    317 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
    318 ; CHECK-NEXT:    pmuludq %xmm1, %xmm0
    319 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    320 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
    321 ; CHECK-NEXT:    pmuludq %xmm2, %xmm1
    322 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    323 ; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    324 ; CHECK-NEXT:    retl
    325   %m = mul <4 x i32> %x, %y
    326   ret <4 x i32> %m
    327 }
    328