Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse41 | FileCheck %s
      2 
      3 target datalayout = "e-p:32:32"
      4 target triple = "i686-apple-darwin8.7.2"
      5 
      6 define i16 @test1(float %f) nounwind {
      7 	%tmp = insertelement <4 x float> undef, float %f, i32 0		; <<4 x float>> [#uses=1]
      8 	%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
      9 	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
     10 	%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
     11 	%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
     12 	%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
     13 	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
     14 	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )		; <<4 x float>> [#uses=1]
     15 	%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
     16 	%tmp69 = trunc i32 %tmp.upgrd.1 to i16		; <i16> [#uses=1]
     17 	ret i16 %tmp69
     18 ; CHECK: test1:
     19 ; CHECK: subss	LCPI0_
     20 ; CHECK: mulss	LCPI0_
     21 ; CHECK: minss	LCPI0_
     22 }
     23 
     24 define i16 @test2(float %f) nounwind {
     25 	%tmp28 = fsub float %f, 1.000000e+00		; <float> [#uses=1]
     26 	%tmp37 = fmul float %tmp28, 5.000000e-01		; <float> [#uses=1]
     27 	%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0		; <<4 x float>> [#uses=1]
     28 	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
     29 	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
     30 	%tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
     31 	%tmp69 = trunc i32 %tmp to i16		; <i16> [#uses=1]
     32 	ret i16 %tmp69
     33 ; CHECK: test2:
     34 ; CHECK: addss	LCPI1_
     35 ; CHECK: mulss	LCPI1_
     36 ; CHECK: minss	LCPI1_
     37 }
     38 
     39 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
     40 
     41 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
     42 
     43 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
     44 
     45 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
     46 
     47 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
     48 
     49 
     50 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32)
     51 declare <4 x float> @f()
     52 
     53 define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind {
     54   %a = load float *%b
     55   %B = insertelement <4 x float> undef, float %a, i32 0
     56   %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4)
     57   ret <4 x float> %X
     58 ; CHECK: test3:
     59 ; CHECK: roundss	$4, (%eax), %xmm0
     60 }
     61 
     62 define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
     63   %a = load float *%b
     64   %B = insertelement <4 x float> undef, float %a, i32 0
     65   %q = call <4 x float> @f()
     66   %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4)
     67   ret <4 x float> %X
     68 ; CHECK: test4:
     69 ; CHECK: movss	(%eax), %xmm
     70 ; CHECK: call
     71 ; CHECK: roundss $4, %xmm{{.*}}, %xmm0
     72 }
     73