Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -march=x86 -mattr=+mmx | grep esi
      2 ; PR2082
      3 ; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of
      4 ; registers.
      5 define void @transpose4x4(i8* %dst, i8* %src, i32 %dst_stride, i32 %src_stride) {
      6 entry:
      7 	%dst_addr = alloca i8*		; <i8**> [#uses=5]
      8 	%src_addr = alloca i8*		; <i8**> [#uses=5]
      9 	%dst_stride_addr = alloca i32		; <i32*> [#uses=4]
     10 	%src_stride_addr = alloca i32		; <i32*> [#uses=4]
     11 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
     12 	store i8* %dst, i8** %dst_addr
     13 	store i8* %src, i8** %src_addr
     14 	store i32 %dst_stride, i32* %dst_stride_addr
     15 	store i32 %src_stride, i32* %src_stride_addr
     16 	%tmp = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
     17 	%tmp1 = getelementptr i8* %tmp, i32 0		; <i8*> [#uses=1]
     18 	%tmp12 = bitcast i8* %tmp1 to i32*		; <i32*> [#uses=1]
     19 	%tmp3 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
     20 	%tmp4 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
     21 	%tmp5 = getelementptr i8* %tmp3, i32 %tmp4		; <i8*> [#uses=1]
     22 	%tmp56 = bitcast i8* %tmp5 to i32*		; <i32*> [#uses=1]
     23 	%tmp7 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
     24 	%tmp8 = mul i32 %tmp7, 2		; <i32> [#uses=1]
     25 	%tmp9 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
     26 	%tmp10 = getelementptr i8* %tmp9, i32 %tmp8		; <i8*> [#uses=1]
     27 	%tmp1011 = bitcast i8* %tmp10 to i32*		; <i32*> [#uses=1]
     28 	%tmp13 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
     29 	%tmp14 = mul i32 %tmp13, 3		; <i32> [#uses=1]
     30 	%tmp15 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
     31 	%tmp16 = getelementptr i8* %tmp15, i32 %tmp14		; <i8*> [#uses=1]
     32 	%tmp1617 = bitcast i8* %tmp16 to i32*		; <i32*> [#uses=1]
     33 	%tmp18 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
     34 	%tmp19 = getelementptr i8* %tmp18, i32 0		; <i8*> [#uses=1]
     35 	%tmp1920 = bitcast i8* %tmp19 to i32*		; <i32*> [#uses=1]
     36 	%tmp21 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
     37 	%tmp22 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
     38 	%tmp23 = getelementptr i8* %tmp21, i32 %tmp22		; <i8*> [#uses=1]
     39 	%tmp2324 = bitcast i8* %tmp23 to i32*		; <i32*> [#uses=1]
     40 	%tmp25 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
     41 	%tmp26 = mul i32 %tmp25, 2		; <i32> [#uses=1]
     42 	%tmp27 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
     43 	%tmp28 = getelementptr i8* %tmp27, i32 %tmp26		; <i8*> [#uses=1]
     44 	%tmp2829 = bitcast i8* %tmp28 to i32*		; <i32*> [#uses=1]
     45 	%tmp30 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
     46 	%tmp31 = mul i32 %tmp30, 3		; <i32> [#uses=1]
     47 	%tmp32 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
     48 	%tmp33 = getelementptr i8* %tmp32, i32 %tmp31		; <i8*> [#uses=1]
     49 	%tmp3334 = bitcast i8* %tmp33 to i32*		; <i32*> [#uses=1]
     50 	call void asm sideeffect "movd  $4, %mm0                \0A\09movd  $5, %mm1                \0A\09movd  $6, %mm2                \0A\09movd  $7, %mm3                \0A\09punpcklbw %mm1, %mm0         \0A\09punpcklbw %mm3, %mm2         \0A\09movq %mm0, %mm1              \0A\09punpcklwd %mm2, %mm0         \0A\09punpckhwd %mm2, %mm1         \0A\09movd  %mm0, $0                \0A\09punpckhdq %mm0, %mm0         \0A\09movd  %mm0, $1                \0A\09movd  %mm1, $2                \0A\09punpckhdq %mm1, %mm1         \0A\09movd  %mm1, $3                \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* %tmp12, i32* %tmp56, i32* %tmp1011, i32* %tmp1617, i32* %tmp1920, i32* %tmp2324, i32* %tmp2829, i32* %tmp3334 ) nounwind 
     51 	br label %return
     52 
     53 return:		; preds = %entry
     54 	ret void
     55 }
     56