Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2
      2 ; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2 -relocation-model=pic -disable-fp-elim
      3 ; PR3154
      4 
      5 define void @ff_flac_compute_autocorr_sse2(i32* %data, i32 %len, i32 %lag, double* %autoc) nounwind {
      6 entry:
      7 	%c = alloca double, align 8		; <double*> [#uses=2]
      8 	%0 = add i32 %len, 2		; <i32> [#uses=1]
      9 	%1 = add i32 %0, %lag		; <i32> [#uses=1]
     10 	%2 = alloca double, i32 %1		; <double*> [#uses=2]
     11 	%3 = getelementptr double* %2, i32 %lag		; <double*> [#uses=2]
     12 	%4 = ptrtoint double* %3 to i32		; <i32> [#uses=1]
     13 	%5 = and i32 %4, 8		; <i32> [#uses=1]
     14 	%6 = icmp eq i32 %5, 0		; <i1> [#uses=1]
     15 	br i1 %6, label %bb19, label %bb
     16 
     17 bb:		; preds = %entry
     18 	%.sum = add i32 %lag, 1		; <i32> [#uses=1]
     19 	%7 = getelementptr double* %2, i32 %.sum		; <double*> [#uses=1]
     20 	br label %bb19
     21 
     22 bb19:		; preds = %bb, %entry
     23 	%data15.0 = phi double* [ %7, %bb ], [ %3, %entry ]		; <double*> [#uses=5]
     24 	%8 = sitofp i32 %len to double		; <double> [#uses=1]
     25 	%9 = fsub double %8, 1.000000e+00		; <double> [#uses=1]
     26 	%10 = fdiv double 2.000000e+00, %9		; <double> [#uses=1]
     27 	store double %10, double* %c, align 8
     28 	%11 = ashr i32 %len, 1		; <i32> [#uses=3]
     29 	%12 = mul i32 %11, -4		; <i32> [#uses=2]
     30 	%13 = shl i32 %len, 1		; <i32> [#uses=1]
     31 	%14 = and i32 %13, -4		; <i32> [#uses=2]
     32 	call void asm sideeffect "movsd   $0,     %xmm7                \0A\09movapd  ff_pd_1, %xmm6     \0A\09movapd  ff_pd_2, %xmm5     \0A\09movlhps %xmm7, %xmm7                \0A\09subpd   %xmm5, %xmm7                \0A\09addsd   %xmm6, %xmm7                \0A\09", "*m,~{dirflag},~{fpsr},~{flags}"(double* %c) nounwind
     33 	%15 = and i32 %len, 1		; <i32> [#uses=1]
     34 	%toBool = icmp eq i32 %15, 0		; <i1> [#uses=1]
     35 	%16 = getelementptr double* %data15.0, i32 %11		; <double*> [#uses=2]
     36 	%17 = getelementptr i32* %data, i32 %11		; <i32*> [#uses=2]
     37 	br i1 %toBool, label %bb22, label %bb20
     38 
     39 bb20:		; preds = %bb19
     40 	%asmtmp = call { i32, i32 } asm sideeffect "1:                                    \0A\09movapd   %xmm7,  %xmm1              \0A\09mulpd    %xmm1,  %xmm1              \0A\09movapd   %xmm6,  %xmm0              \0A\09subpd    %xmm1,  %xmm0              \0A\09pshufd   $$0x4e,   %xmm0, %xmm1      \0A\09cvtpi2pd ($3,$0), %xmm2              \0A\09cvtpi2pd -1*4($3,$1), %xmm3   \0A\09mulpd    %xmm0,  %xmm2              \0A\09mulpd    %xmm1,  %xmm3              \0A\09movapd   %xmm2, ($2,$0,2)            \0A\09movupd    %xmm3, -1*8($2,$1,2) \0A\09subpd    %xmm5,  %xmm7              \0A\09sub      $$8,      $1                  \0A\09add      $$8,      $0                  \0A\09jl 1b                                 \0A\09", "=&r,=&r,r,r,0,1,~{dirflag},~{fpsr},~{flags}"(double* %16, i32* %17, i32 %12, i32 %14) nounwind		; <{ i32, i32 }> [#uses=0]
     41 	br label %bb28.preheader
     42 
     43 bb22:		; preds = %bb19
     44 	%asmtmp23 = call { i32, i32 } asm sideeffect "1:                                    \0A\09movapd   %xmm7,  %xmm1              \0A\09mulpd    %xmm1,  %xmm1              \0A\09movapd   %xmm6,  %xmm0              \0A\09subpd    %xmm1,  %xmm0              \0A\09pshufd   $$0x4e,   %xmm0, %xmm1      \0A\09cvtpi2pd ($3,$0), %xmm2              \0A\09cvtpi2pd -2*4($3,$1), %xmm3   \0A\09mulpd    %xmm0,  %xmm2              \0A\09mulpd    %xmm1,  %xmm3              \0A\09movapd   %xmm2, ($2,$0,2)            \0A\09movapd    %xmm3, -2*8($2,$1,2) \0A\09subpd    %xmm5,  %xmm7              \0A\09sub      $$8,      $1                  \0A\09add      $$8,      $0                  \0A\09jl 1b                                 \0A\09", "=&r,=&r,r,r,0,1,~{dirflag},~{fpsr},~{flags}"(double* %16, i32* %17, i32 %12, i32 %14) nounwind		; <{ i32, i32 }> [#uses=0]
     45 	br label %bb28.preheader
     46 
     47 bb28.preheader:		; preds = %bb22, %bb20
     48 	%18 = icmp sgt i32 %lag, 0		; <i1> [#uses=2]
     49 	br i1 %18, label %bb27, label %bb29
     50 
     51 bb27:		; preds = %bb27, %bb28.preheader
     52 	%j4.042 = phi i32 [ 0, %bb28.preheader ], [ %indvar.next45, %bb27 ]		; <i32> [#uses=2]
     53 	%19 = sub i32 %j4.042, %lag		; <i32> [#uses=1]
     54 	%20 = getelementptr double* %data15.0, i32 %19		; <double*> [#uses=1]
     55 	store double 0.000000e+00, double* %20, align 8
     56 	%indvar.next45 = add i32 %j4.042, 1		; <i32> [#uses=2]
     57 	%exitcond = icmp eq i32 %indvar.next45, %lag		; <i1> [#uses=1]
     58 	br i1 %exitcond, label %bb29, label %bb27
     59 
     60 bb29:		; preds = %bb27, %bb28.preheader
     61 	%21 = getelementptr double* %data15.0, i32 %len		; <double*> [#uses=3]
     62 	store double 0.000000e+00, double* %21, align 8
     63 	br i1 %18, label %bb.nph, label %bb37
     64 
     65 bb.nph:		; preds = %bb29
     66 	%22 = mul i32 %len, -8		; <i32> [#uses=2]
     67 	%23 = add i32 %lag, -2		; <i32> [#uses=1]
     68 	br label %bb30
     69 
     70 bb30:		; preds = %bb35, %bb.nph
     71 	%indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb35 ]		; <i32> [#uses=2]
     72 	%j4.141 = shl i32 %indvar, 1		; <i32> [#uses=8]
     73 	%24 = icmp eq i32 %23, %j4.141		; <i1> [#uses=1]
     74 	%25 = or i32 %j4.141, 1		; <i32> [#uses=2]
     75 	br i1 %24, label %bb31, label %bb33
     76 
     77 bb31:		; preds = %bb30
     78 	%26 = add i32 %j4.141, 2		; <i32> [#uses=2]
     79 	%.sum38 = sub i32 %len, %j4.141		; <i32> [#uses=1]
     80 	%27 = getelementptr double* %data15.0, i32 %.sum38		; <double*> [#uses=1]
     81 	%28 = getelementptr double* %autoc, i32 %j4.141		; <double*> [#uses=1]
     82 	%29 = getelementptr double* %autoc, i32 %25		; <double*> [#uses=1]
     83 	%30 = getelementptr double* %autoc, i32 %26		; <double*> [#uses=1]
     84 	%asmtmp32 = call i32 asm sideeffect "movsd    ff_pd_1, %xmm0 \0A\09movsd    ff_pd_1, %xmm1 \0A\09movsd    ff_pd_1, %xmm2 \0A\091:                                 \0A\09movapd   ($4,$0), %xmm3           \0A\09movupd -8($5,$0), %xmm4           \0A\09movapd   ($5,$0), %xmm5           \0A\09mulpd     %xmm3, %xmm4           \0A\09mulpd     %xmm3, %xmm5           \0A\09mulpd -16($5,$0), %xmm3           \0A\09addpd     %xmm4, %xmm1           \0A\09addpd     %xmm5, %xmm0           \0A\09addpd     %xmm3, %xmm2           \0A\09add       $$16,    $0               \0A\09jl 1b                              \0A\09movhlps   %xmm0, %xmm3           \0A\09movhlps   %xmm1, %xmm4           \0A\09movhlps   %xmm2, %xmm5           \0A\09addsd     %xmm3, %xmm0           \0A\09addsd     %xmm4, %xmm1           \0A\09addsd     %xmm5, %xmm2           \0A\09movsd     %xmm0, $1               \0A\09movsd     %xmm1, $2               \0A\09movsd     %xmm2, $3               \0A\09", "=&r,=*m,=*m,=*m,r,r,0,~{dirflag},~{fpsr},~{flags}"(double* %28, double* %29, double* %30, double* %21, double* %27, i32 %22) nounwind		; <i32> [#uses=0]
     85 	br label %bb35
     86 
     87 bb33:		; preds = %bb30
     88 	%.sum39 = sub i32 %len, %j4.141		; <i32> [#uses=1]
     89 	%31 = getelementptr double* %data15.0, i32 %.sum39		; <double*> [#uses=1]
     90 	%32 = getelementptr double* %autoc, i32 %j4.141		; <double*> [#uses=1]
     91 	%33 = getelementptr double* %autoc, i32 %25		; <double*> [#uses=1]
     92 	%asmtmp34 = call i32 asm sideeffect "movsd    ff_pd_1, %xmm0 \0A\09movsd    ff_pd_1, %xmm1 \0A\091:                                 \0A\09movapd   ($3,$0), %xmm3           \0A\09movupd -8($4,$0), %xmm4           \0A\09mulpd     %xmm3, %xmm4           \0A\09mulpd    ($4,$0), %xmm3           \0A\09addpd     %xmm4, %xmm1           \0A\09addpd     %xmm3, %xmm0           \0A\09add       $$16,    $0               \0A\09jl 1b                              \0A\09movhlps   %xmm0, %xmm3           \0A\09movhlps   %xmm1, %xmm4           \0A\09addsd     %xmm3, %xmm0           \0A\09addsd     %xmm4, %xmm1           \0A\09movsd     %xmm0, $1               \0A\09movsd     %xmm1, $2               \0A\09", "=&r,=*m,=*m,r,r,0,~{dirflag},~{fpsr},~{flags}"(double* %32, double* %33, double* %21, double* %31, i32 %22) nounwind		; <i32> [#uses=0]
     93 	%.pre = add i32 %j4.141, 2		; <i32> [#uses=1]
     94 	br label %bb35
     95 
     96 bb35:		; preds = %bb33, %bb31
     97 	%.pre-phi = phi i32 [ %.pre, %bb33 ], [ %26, %bb31 ]		; <i32> [#uses=1]
     98 	%34 = icmp slt i32 %.pre-phi, %lag		; <i1> [#uses=1]
     99 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
    100 	br i1 %34, label %bb30, label %bb37
    101 
    102 bb37:		; preds = %bb35, %bb29
    103 	ret void
    104 }
    105