Home | History | Annotate | Download | only in X86
      1 ; Tests for SSE2 and below, without SSE3+.
      2 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
      3 
      4 define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
      5 ; CHECK-LABEL: test1:
      6 ; CHECK:       ## BB#0:
      7 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
      8 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
      9 ; CHECK-NEXT:    movapd (%ecx), %xmm0
     10 ; CHECK-NEXT:    movlpd {{[0-9]+}}(%esp), %xmm0
     11 ; CHECK-NEXT:    movapd %xmm0, (%eax)
     12 ; CHECK-NEXT:    retl
     13 	%tmp3 = load <2 x double>, <2 x double>* %A, align 16
     14 	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
     15 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
     16 	store <2 x double> %tmp9, <2 x double>* %r, align 16
     17 	ret void
     18 }
     19 
     20 define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
     21 ; CHECK-LABEL: test2:
     22 ; CHECK:       ## BB#0:
     23 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     24 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     25 ; CHECK-NEXT:    movapd (%ecx), %xmm0
     26 ; CHECK-NEXT:    movhpd {{[0-9]+}}(%esp), %xmm0
     27 ; CHECK-NEXT:    movapd %xmm0, (%eax)
     28 ; CHECK-NEXT:    retl
     29 	%tmp3 = load <2 x double>, <2 x double>* %A, align 16
     30 	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
     31 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
     32 	store <2 x double> %tmp9, <2 x double>* %r, align 16
     33 	ret void
     34 }
     35 
     36 
     37 define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind {
     38 ; CHECK-LABEL: test3:
     39 ; CHECK:       ## BB#0:
     40 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     41 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     42 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
     43 ; CHECK-NEXT:    movaps (%edx), %xmm0
     44 ; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
     45 ; CHECK-NEXT:    movaps %xmm0, (%eax)
     46 ; CHECK-NEXT:    retl
     47 	%tmp = load <4 x float>, <4 x float>* %B		; <<4 x float>> [#uses=2]
     48 	%tmp3 = load <4 x float>, <4 x float>* %A		; <<4 x float>> [#uses=2]
     49 	%tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0		; <float> [#uses=1]
     50 	%tmp7 = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
     51 	%tmp8 = extractelement <4 x float> %tmp3, i32 1		; <float> [#uses=1]
     52 	%tmp9 = extractelement <4 x float> %tmp, i32 1		; <float> [#uses=1]
     53 	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0		; <<4 x float>> [#uses=1]
     54 	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1		; <<4 x float>> [#uses=1]
     55 	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2		; <<4 x float>> [#uses=1]
     56 	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3		; <<4 x float>> [#uses=1]
     57 	store <4 x float> %tmp13, <4 x float>* %res
     58 	ret void
     59 }
     60 
     61 define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
     62 ; CHECK-LABEL: test4:
     63 ; CHECK:       ## BB#0:
     64 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     65 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,1,3,3]
     66 ; CHECK-NEXT:    movaps %xmm0, (%eax)
     67 ; CHECK-NEXT:    retl
     68 	%tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
     69 	store <4 x float> %tmp5, <4 x float>* %res
     70 	ret void
     71 }
     72 
     73 define <4 x i32> @test5(i8** %ptr) nounwind {
     74 ; CHECK-LABEL: test5:
     75 ; CHECK:       ## BB#0:
     76 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     77 ; CHECK-NEXT:    movl (%eax), %eax
     78 ; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
     79 ; CHECK-NEXT:    pxor %xmm0, %xmm0
     80 ; CHECK-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
     81 ; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     82 ; CHECK-NEXT:    retl
     83 	%tmp = load i8*, i8** %ptr		; <i8*> [#uses=1]
     84 	%tmp.upgrd.1 = bitcast i8* %tmp to float*		; <float*> [#uses=1]
     85 	%tmp.upgrd.2 = load float, float* %tmp.upgrd.1		; <float> [#uses=1]
     86 	%tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0		; <<4 x float>> [#uses=1]
     87 	%tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
     88 	%tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
     89 	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
     90 	%tmp21 = bitcast <4 x float> %tmp11 to <16 x i8>		; <<16 x i8>> [#uses=1]
     91 	%tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
     92 	%tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16>		; <<8 x i16>> [#uses=1]
     93 	%tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
     94 	%tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32>		; <<4 x i32>> [#uses=1]
     95 	ret <4 x i32> %tmp36
     96 }
     97 
     98 define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
     99 ; CHECK-LABEL: test6:
    100 ; CHECK:       ## BB#0:
    101 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    102 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    103 ; CHECK-NEXT:    movaps (%ecx), %xmm0
    104 ; CHECK-NEXT:    movaps %xmm0, (%eax)
    105 ; CHECK-NEXT:    retl
    106   %tmp1 = load <4 x float>, <4 x float>* %A            ; <<4 x float>> [#uses=1]
    107   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
    108   store <4 x float> %tmp2, <4 x float>* %res
    109   ret void
    110 }
    111 
    112 define void @test7() nounwind {
    113 ; CHECK-LABEL: test7:
    114 ; CHECK:       ## BB#0:
    115 ; CHECK-NEXT:    xorps %xmm0, %xmm0
    116 ; CHECK-NEXT:    movaps %xmm0, 0
    117 ; CHECK-NEXT:    retl
    118   bitcast <4 x i32> zeroinitializer to <4 x float>                ; <<4 x float>>:1 [#uses=1]
    119   shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
    120   store <4 x float> %2, <4 x float>* null
    121   ret void
    122 }
    123 
    124 @x = external global [4 x i32]
    125 
    126 define <2 x i64> @test8() nounwind {
    127 ; CHECK-LABEL: test8:
    128 ; CHECK:       ## BB#0:
    129 ; CHECK-NEXT:    movl L_x$non_lazy_ptr, %eax
    130 ; CHECK-NEXT:    movups (%eax), %xmm0
    131 ; CHECK-NEXT:    retl
    132 	%tmp = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 0)		; <i32> [#uses=1]
    133 	%tmp3 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 1)		; <i32> [#uses=1]
    134 	%tmp5 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 2)		; <i32> [#uses=1]
    135 	%tmp7 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 3)		; <i32> [#uses=1]
    136 	%tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0		; <<4 x i32>> [#uses=1]
    137 	%tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
    138 	%tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2		; <<4 x i32>> [#uses=1]
    139 	%tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3		; <<4 x i32>> [#uses=1]
    140 	%tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
    141 	ret <2 x i64> %tmp16
    142 }
    143 
    144 define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind {
    145 ; CHECK-LABEL: test9:
    146 ; CHECK:       ## BB#0:
    147 ; CHECK-NEXT:    movups {{[0-9]+}}(%esp), %xmm0
    148 ; CHECK-NEXT:    retl
    149 	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
    150 	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
    151 	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
    152 	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
    153 	ret <4 x float> %tmp13
    154 }
    155 
    156 define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind {
    157 ; CHECK-LABEL: test10:
    158 ; CHECK:       ## BB#0:
    159 ; CHECK-NEXT:    movaps {{[0-9]+}}(%esp), %xmm0
    160 ; CHECK-NEXT:    retl
    161 	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
    162 	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
    163 	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
    164 	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
    165 	ret <4 x float> %tmp13
    166 }
    167 
    168 define <2 x double> @test11(double %a, double %b) nounwind {
    169 ; CHECK-LABEL: test11:
    170 ; CHECK:       ## BB#0:
    171 ; CHECK-NEXT:    movaps {{[0-9]+}}(%esp), %xmm0
    172 ; CHECK-NEXT:    retl
    173 	%tmp = insertelement <2 x double> undef, double %a, i32 0		; <<2 x double>> [#uses=1]
    174 	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
    175 	ret <2 x double> %tmp7
    176 }
    177 
    178 define void @test12() nounwind {
    179 ; CHECK-LABEL: test12:
    180 ; CHECK:       ## BB#0:
    181 ; CHECK-NEXT:    movapd 0, %xmm0
    182 ; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
    183 ; CHECK-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
    184 ; CHECK-NEXT:    xorpd %xmm2, %xmm2
    185 ; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
    186 ; CHECK-NEXT:    addps %xmm1, %xmm0
    187 ; CHECK-NEXT:    movaps %xmm0, 0
    188 ; CHECK-NEXT:    retl
    189   %tmp1 = load <4 x float>, <4 x float>* null          ; <<4 x float>> [#uses=2]
    190   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
    191   %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
    192   %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
    193   store <4 x float> %tmp4, <4 x float>* null
    194   ret void
    195 }
    196 
    197 define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
    198 ; CHECK-LABEL: test13:
    199 ; CHECK:       ## BB#0:
    200 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    201 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    202 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
    203 ; CHECK-NEXT:    movaps (%edx), %xmm0
    204 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1]
    205 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
    206 ; CHECK-NEXT:    movaps %xmm0, (%eax)
    207 ; CHECK-NEXT:    retl
    208   %tmp3 = load <4 x float>, <4 x float>* %B            ; <<4 x float>> [#uses=1]
    209   %tmp5 = load <4 x float>, <4 x float>* %C            ; <<4 x float>> [#uses=1]
    210   %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
    211   store <4 x float> %tmp11, <4 x float>* %res
    212   ret void
    213 }
    214 
    215 define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
    216 ; CHECK-LABEL: test14:
    217 ; CHECK:       ## BB#0:
    218 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    219 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    220 ; CHECK-NEXT:    movaps (%ecx), %xmm1
    221 ; CHECK-NEXT:    movaps (%eax), %xmm2
    222 ; CHECK-NEXT:    movaps %xmm2, %xmm0
    223 ; CHECK-NEXT:    addps %xmm1, %xmm0
    224 ; CHECK-NEXT:    subps %xmm1, %xmm2
    225 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
    226 ; CHECK-NEXT:    retl
    227   %tmp = load <4 x float>, <4 x float>* %y             ; <<4 x float>> [#uses=2]
    228   %tmp5 = load <4 x float>, <4 x float>* %x            ; <<4 x float>> [#uses=2]
    229   %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
    230   %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
    231   %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
    232   ret <4 x float> %tmp27
    233 }
    234 
    235 define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
    236 ; CHECK-LABEL: test15:
    237 ; CHECK:       ## BB#0: ## %entry
    238 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    239 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    240 ; CHECK-NEXT:    movapd (%ecx), %xmm0
    241 ; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
    242 ; CHECK-NEXT:    retl
    243 entry:
    244   %tmp = load <4 x float>, <4 x float>* %y             ; <<4 x float>> [#uses=1]
    245   %tmp3 = load <4 x float>, <4 x float>* %x            ; <<4 x float>> [#uses=1]
    246   %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
    247   ret <4 x float> %tmp4
    248 }
    249 
    250 ; PR8900
    251 
    252 define  <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) {
    253 ; CHECK-LABEL: test16:
    254 ; CHECK:       ## BB#0:
    255 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    256 ; CHECK-NEXT:    movapd 96(%eax), %xmm0
    257 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
    258 ; CHECK-NEXT:    retl
    259   %i5 = getelementptr inbounds <4 x double>, <4 x double>* %srcA, i32 3
    260   %i6 = load <4 x double>, <4 x double>* %i5, align 32
    261   %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
    262   ret <2 x double> %i7
    263 }
    264 
    265 ; PR9009
    266 define fastcc void @test17() nounwind {
    267 ; CHECK-LABEL: test17:
    268 ; CHECK:       ## BB#0: ## %entry
    269 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,u,32768,32768>
    270 ; CHECK-NEXT:    movaps %xmm0, (%eax)
    271 ; CHECK-NEXT:    retl
    272 entry:
    273   %0 = insertelement <4 x i32> undef, i32 undef, i32 1
    274   %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
    275   %2 = bitcast <4 x i32> %1 to <4 x float>
    276   store <4 x float> %2, <4 x float> * undef
    277   ret void
    278 }
    279 
    280 ; PR9210
    281 define <4 x float> @f(<4 x double>) nounwind {
    282 ; CHECK-LABEL: f:
    283 ; CHECK:       ## BB#0: ## %entry
    284 ; CHECK-NEXT:    cvtpd2ps %xmm1, %xmm1
    285 ; CHECK-NEXT:    cvtpd2ps %xmm0, %xmm0
    286 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    287 ; CHECK-NEXT:    retl
    288 entry:
    289  %double2float.i = fptrunc <4 x double> %0 to <4 x float>
    290  ret <4 x float> %double2float.i
    291 }
    292 
    293 define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
    294 ; CHECK-LABEL: test_insert_64_zext:
    295 ; CHECK:       ## BB#0:
    296 ; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
    297 ; CHECK-NEXT:    retl
    298   %1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2>
    299   ret <2 x i64> %1
    300 }
    301 
    302 define <4 x i32> @PR19721(<4 x i32> %i) {
    303 ; CHECK-LABEL: PR19721:
    304 ; CHECK:       ## BB#0:
    305 ; CHECK-NEXT:    andps LCPI19_0, %xmm0
    306 ; CHECK-NEXT:    retl
    307   %bc = bitcast <4 x i32> %i to i128
    308   %insert = and i128 %bc, -4294967296
    309   %bc2 = bitcast i128 %insert to <4 x i32>
    310   ret <4 x i32> %bc2
    311 }
    312 
    313 define <4 x i32> @test_mul(<4 x i32> %x, <4 x i32> %y) {
    314 ; CHECK-LABEL: test_mul:
    315 ; CHECK:       ## BB#0:
    316 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
    317 ; CHECK-NEXT:    pmuludq %xmm1, %xmm0
    318 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
    319 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
    320 ; CHECK-NEXT:    pmuludq %xmm2, %xmm1
    321 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
    322 ; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    323 ; CHECK-NEXT:    retl
    324   %m = mul <4 x i32> %x, %y
    325   ret <4 x i32> %m
    326 }
    327