Home | History | Annotate | Download | only in X86
      1 ; Tests for SSE2 and below, without SSE3+.
      2 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
      3 
      4 define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
      5 	%tmp3 = load <2 x double>* %A, align 16
      6 	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
      7 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
      8 	store <2 x double> %tmp9, <2 x double>* %r, align 16
      9 	ret void
     10         
     11 ; CHECK: test1:
     12 ; CHECK: 	movl	8(%esp), %eax
     13 ; CHECK-NEXT: 	movapd	(%eax), %xmm0
     14 ; CHECK-NEXT: 	movlpd	12(%esp), %xmm0
     15 ; CHECK-NEXT: 	movl	4(%esp), %eax
     16 ; CHECK-NEXT: 	movapd	%xmm0, (%eax)
     17 ; CHECK-NEXT: 	ret
     18 }
     19 
     20 define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
     21 	%tmp3 = load <2 x double>* %A, align 16
     22 	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
     23 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
     24 	store <2 x double> %tmp9, <2 x double>* %r, align 16
     25 	ret void
     26         
     27 ; CHECK: test2:
     28 ; CHECK: 	movl	8(%esp), %eax
     29 ; CHECK-NEXT: 	movapd	(%eax), %xmm0
     30 ; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
     31 ; CHECK-NEXT: 	movl	4(%esp), %eax
     32 ; CHECK-NEXT: 	movapd	%xmm0, (%eax)
     33 ; CHECK-NEXT: 	ret
     34 }
     35 
     36 
     37 define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind {
     38 	%tmp = load <4 x float>* %B		; <<4 x float>> [#uses=2]
     39 	%tmp3 = load <4 x float>* %A		; <<4 x float>> [#uses=2]
     40 	%tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0		; <float> [#uses=1]
     41 	%tmp7 = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
     42 	%tmp8 = extractelement <4 x float> %tmp3, i32 1		; <float> [#uses=1]
     43 	%tmp9 = extractelement <4 x float> %tmp, i32 1		; <float> [#uses=1]
     44 	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0		; <<4 x float>> [#uses=1]
     45 	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1		; <<4 x float>> [#uses=1]
     46 	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2		; <<4 x float>> [#uses=1]
     47 	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3		; <<4 x float>> [#uses=1]
     48 	store <4 x float> %tmp13, <4 x float>* %res
     49 	ret void
     50 ; CHECK: @test3
     51 ; CHECK: 	unpcklps	
     52 }
     53 
     54 define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
     55 	%tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
     56 	store <4 x float> %tmp5, <4 x float>* %res
     57 	ret void
     58 ; CHECK: @test4
     59 ; CHECK: 	pshufd	$50, %xmm0, %xmm0
     60 }
     61 
     62 define <4 x i32> @test5(i8** %ptr) nounwind {
     63 ; CHECK: test5:
     64 ; CHECK: pxor
     65 ; CHECK: punpcklbw
     66 ; CHECK: punpcklwd
     67 
     68 	%tmp = load i8** %ptr		; <i8*> [#uses=1]
     69 	%tmp.upgrd.1 = bitcast i8* %tmp to float*		; <float*> [#uses=1]
     70 	%tmp.upgrd.2 = load float* %tmp.upgrd.1		; <float> [#uses=1]
     71 	%tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0		; <<4 x float>> [#uses=1]
     72 	%tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
     73 	%tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
     74 	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
     75 	%tmp21 = bitcast <4 x float> %tmp11 to <16 x i8>		; <<16 x i8>> [#uses=1]
     76 	%tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
     77 	%tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16>		; <<8 x i16>> [#uses=1]
     78 	%tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
     79 	%tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32>		; <<4 x i32>> [#uses=1]
     80 	ret <4 x i32> %tmp36
     81 }
     82 
     83 define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
     84         %tmp1 = load <4 x float>* %A            ; <<4 x float>> [#uses=1]
     85         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
     86         store <4 x float> %tmp2, <4 x float>* %res
     87         ret void
     88         
     89 ; CHECK: test6:
     90 ; CHECK: 	movaps	(%eax), %xmm0
     91 ; CHECK:	movaps	%xmm0, (%eax)
     92 }
     93 
     94 define void @test7() nounwind {
     95         bitcast <4 x i32> zeroinitializer to <4 x float>                ; <<4 x float>>:1 [#uses=1]
     96         shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
     97         store <4 x float> %2, <4 x float>* null
     98         ret void
     99         
    100 ; CHECK: test7:
    101 ; CHECK:	xorps	%xmm0, %xmm0
    102 ; CHECK:	movaps	%xmm0, 0
    103 }
    104 
    105 @x = external global [4 x i32]
    106 
    107 define <2 x i64> @test8() nounwind {
    108 	%tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0)		; <i32> [#uses=1]
    109 	%tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1)		; <i32> [#uses=1]
    110 	%tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2)		; <i32> [#uses=1]
    111 	%tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3)		; <i32> [#uses=1]
    112 	%tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0		; <<4 x i32>> [#uses=1]
    113 	%tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
    114 	%tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2		; <<4 x i32>> [#uses=1]
    115 	%tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3		; <<4 x i32>> [#uses=1]
    116 	%tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
    117 	ret <2 x i64> %tmp16
    118 ; CHECK: test8:
    119 ; CHECK: movups	(%eax), %xmm0
    120 }
    121 
    122 define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind {
    123 	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
    124 	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
    125 	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
    126 	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
    127 	ret <4 x float> %tmp13
    128 ; CHECK: test9:
    129 ; CHECK: movups	8(%esp), %xmm0
    130 }
    131 
    132 define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind {
    133 	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
    134 	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
    135 	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
    136 	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
    137 	ret <4 x float> %tmp13
    138 ; CHECK: test10:
    139 ; CHECK: movaps	4(%esp), %xmm0
    140 }
    141 
    142 define <2 x double> @test11(double %a, double %b) nounwind {
    143 	%tmp = insertelement <2 x double> undef, double %a, i32 0		; <<2 x double>> [#uses=1]
    144 	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
    145 	ret <2 x double> %tmp7
    146 ; CHECK: test11:
    147 ; CHECK: movaps	4(%esp), %xmm0
    148 }
    149 
    150 define void @test12() nounwind {
    151         %tmp1 = load <4 x float>* null          ; <<4 x float>> [#uses=2]
    152         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
    153         %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
    154         %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
    155         store <4 x float> %tmp4, <4 x float>* null
    156         ret void
    157 ; CHECK: test12:
    158 ; CHECK: movhlps
    159 ; CHECK: shufps
    160 }
    161 
    162 define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
    163         %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=1]
    164         %tmp5 = load <4 x float>* %C            ; <<4 x float>> [#uses=1]
    165         %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
    166         store <4 x float> %tmp11, <4 x float>* %res
    167         ret void
    168 ; CHECK: test13
    169 ; CHECK: shufps	$69, (%eax), %xmm0
    170 ; CHECK: pshufd	$-40, %xmm0, %xmm0
    171 }
    172 
    173 define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
    174         %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
    175         %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
    176         %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
    177         %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
    178         %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
    179         ret <4 x float> %tmp27
    180 ; CHECK: test14:
    181 ; CHECK: 	subps	[[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
    182 ; CHECK: 	addps	[[X1]], [[X0:%xmm[0-9]+]]
    183 ; CHECK: 	movlhps	[[X2]], [[X0]]
    184 }
    185 
    186 define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
    187 entry:
    188         %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=1]
    189         %tmp3 = load <4 x float>* %x            ; <<4 x float>> [#uses=1]
    190         %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
    191         ret <4 x float> %tmp4
    192 ; CHECK: test15:
    193 ; CHECK: 	movhlps	%xmm1, %xmm0
    194 }
    195 
    196 ; PR8900
    197 ; CHECK: test16:
    198 ; CHECK: unpcklpd
    199 ; CHECK: ret
    200 
    201 define  <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) {
    202   %i5 = getelementptr inbounds <4 x double>* %srcA, i32 3
    203   %i6 = load <4 x double>* %i5, align 32
    204   %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
    205   ret <2 x double> %i7
    206 }
    207 
    208 ; PR9009
    209 define fastcc void @test17() nounwind {
    210 entry:
    211   %0 = insertelement <4 x i32> undef, i32 undef, i32 1
    212   %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
    213   %2 = bitcast <4 x i32> %1 to <4 x float>
    214   store <4 x float> %2, <4 x float> * undef
    215   ret void
    216 }
    217 
    218 ; PR9210
    219 define <4 x float> @f(<4 x double>) nounwind {
    220 entry:
    221  %double2float.i = fptrunc <4 x double> %0 to <4 x float>
    222  ret <4 x float> %double2float.i
    223 }
    224 
    225