1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -verify-machineinstrs | FileCheck %s 3 4 ; After tail duplication, two copies in an early exit BB can be cancelled out. 5 ; rdar://10640363 6 define i32 @t1(i32 %a, i32 %b) nounwind { 7 ; CHECK-LABEL: t1: 8 ; CHECK: ## %bb.0: ## %entry 9 ; CHECK-NEXT: movl %edi, %eax 10 ; CHECK-NEXT: testl %esi, %esi 11 ; CHECK-NEXT: je LBB0_1 12 ; CHECK-NEXT: ## %bb.2: ## %while.body.preheader 13 ; CHECK-NEXT: movl %esi, %edx 14 ; CHECK-NEXT: .p2align 4, 0x90 15 ; CHECK-NEXT: LBB0_3: ## %while.body 16 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 17 ; CHECK-NEXT: movl %edx, %ecx 18 ; CHECK-NEXT: cltd 19 ; CHECK-NEXT: idivl %ecx 20 ; CHECK-NEXT: testl %edx, %edx 21 ; CHECK-NEXT: movl %ecx, %eax 22 ; CHECK-NEXT: jne LBB0_3 23 ; CHECK-NEXT: ## %bb.4: ## %while.end 24 ; CHECK-NEXT: movl %ecx, %eax 25 ; CHECK-NEXT: retq 26 ; CHECK-NEXT: LBB0_1: 27 ; CHECK-NEXT: retq 28 entry: 29 %cmp1 = icmp eq i32 %b, 0 30 br i1 %cmp1, label %while.end, label %while.body 31 32 while.body: ; preds = %entry, %while.body 33 %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ] 34 %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ] 35 %rem = srem i32 %a.addr.03, %b.addr.02 36 %cmp = icmp eq i32 %rem, 0 37 br i1 %cmp, label %while.end, label %while.body 38 39 while.end: ; preds = %while.body, %entry 40 %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ] 41 ret i32 %a.addr.0.lcssa 42 } 43 44 ; Two movdqa (from phi-elimination) in the entry BB cancels out. 45 ; rdar://10428165 46 define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 47 ; CHECK-LABEL: t2: 48 ; CHECK: ## %bb.0: ## %entry 49 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 50 ; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7] 51 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 52 ; CHECK-NEXT: retq 53 entry: 54 %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef > 55 ret <8 x i16> %tmp8 56 } 57 58 define i32 @t3(i64 %a, i64 %b) nounwind { 59 ; CHECK-LABEL: t3: 60 ; CHECK: ## %bb.0: ## %entry 61 ; CHECK-NEXT: movq %rdi, %rax 62 ; CHECK-NEXT: testq %rsi, %rsi 63 ; CHECK-NEXT: je LBB2_1 64 ; CHECK-NEXT: ## %bb.2: ## %while.body.preheader 65 ; CHECK-NEXT: movq %rsi, %rdx 66 ; CHECK-NEXT: .p2align 4, 0x90 67 ; CHECK-NEXT: LBB2_3: ## %while.body 68 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 69 ; CHECK-NEXT: movq %rdx, %rcx 70 ; CHECK-NEXT: cqto 71 ; CHECK-NEXT: idivq %rcx 72 ; CHECK-NEXT: testq %rdx, %rdx 73 ; CHECK-NEXT: movq %rcx, %rax 74 ; CHECK-NEXT: jne LBB2_3 75 ; CHECK-NEXT: ## %bb.4: ## %while.end 76 ; CHECK-NEXT: movl %ecx, %eax 77 ; CHECK-NEXT: retq 78 ; CHECK-NEXT: LBB2_1: 79 ; CHECK-NEXT: retq 80 entry: 81 %cmp1 = icmp eq i64 %b, 0 82 br i1 %cmp1, label %while.end, label %while.body 83 84 while.body: ; preds = %entry, %while.body 85 %a.addr.03 = phi i64 [ %b.addr.02, %while.body ], [ %a, %entry ] 86 %b.addr.02 = phi i64 [ %rem, %while.body ], [ %b, %entry ] 87 %rem = srem i64 %a.addr.03, %b.addr.02 88 %cmp = icmp eq i64 %rem, 0 89 br i1 %cmp, label %while.end, label %while.body 90 91 while.end: ; preds = %while.body, %entry 92 %a.addr.0.lcssa = phi i64 [ %a, %entry ], [ %b.addr.02, %while.body ] 93 %t = trunc i64 %a.addr.0.lcssa to i32 94 ret i32 %t 95 } 96 97 ; Check that copy propagation does not kill thing like: 98 ; dst = copy src <-- do not kill that. 99 ; ... = op1 undef dst 100 ; ... = op2 dst <-- this is used here. 101 define <16 x float> @foo(<16 x float> %x) { 102 ; CHECK-LABEL: foo: 103 ; CHECK: ## %bb.0: ## %bb 104 ; CHECK-NEXT: movaps %xmm3, %xmm9 105 ; CHECK-NEXT: movaps %xmm2, %xmm8 106 ; CHECK-NEXT: movaps %xmm1, %xmm6 107 ; CHECK-NEXT: movaps %xmm0, %xmm7 108 ; CHECK-NEXT: xorps %xmm0, %xmm0 109 ; CHECK-NEXT: movaps %xmm3, %xmm1 110 ; CHECK-NEXT: cmpltps %xmm0, %xmm1 111 ; CHECK-NEXT: movaps %xmm1, %xmm4 112 ; CHECK-NEXT: orps {{.*}}(%rip), %xmm4 113 ; CHECK-NEXT: movaps %xmm4, %xmm10 114 ; CHECK-NEXT: andnps %xmm1, %xmm10 115 ; CHECK-NEXT: movaps %xmm2, %xmm1 116 ; CHECK-NEXT: cmpltps %xmm0, %xmm1 117 ; CHECK-NEXT: movaps {{.*#+}} xmm11 = [9,10,11,12] 118 ; CHECK-NEXT: movaps %xmm1, %xmm3 119 ; CHECK-NEXT: orps %xmm11, %xmm3 120 ; CHECK-NEXT: movaps %xmm3, %xmm14 121 ; CHECK-NEXT: andnps %xmm1, %xmm14 122 ; CHECK-NEXT: cvttps2dq %xmm6, %xmm12 123 ; CHECK-NEXT: cmpltps %xmm0, %xmm6 124 ; CHECK-NEXT: movaps {{.*#+}} xmm13 = [5,6,7,8] 125 ; CHECK-NEXT: movaps %xmm6, %xmm2 126 ; CHECK-NEXT: orps %xmm13, %xmm2 127 ; CHECK-NEXT: movaps %xmm2, %xmm5 128 ; CHECK-NEXT: andnps %xmm6, %xmm5 129 ; CHECK-NEXT: cvttps2dq %xmm7, %xmm6 130 ; CHECK-NEXT: cmpltps %xmm0, %xmm7 131 ; CHECK-NEXT: movaps {{.*#+}} xmm15 = [1,2,3,4] 132 ; CHECK-NEXT: movaps %xmm7, %xmm0 133 ; CHECK-NEXT: orps %xmm15, %xmm0 134 ; CHECK-NEXT: movaps %xmm0, %xmm1 135 ; CHECK-NEXT: andnps %xmm7, %xmm1 136 ; CHECK-NEXT: andps %xmm15, %xmm0 137 ; CHECK-NEXT: cvtdq2ps %xmm6, %xmm6 138 ; CHECK-NEXT: andps %xmm6, %xmm0 139 ; CHECK-NEXT: movaps {{.*#+}} xmm6 = [1,1,1,1] 140 ; CHECK-NEXT: andps %xmm6, %xmm1 141 ; CHECK-NEXT: orps %xmm1, %xmm0 142 ; CHECK-NEXT: andps %xmm13, %xmm2 143 ; CHECK-NEXT: cvtdq2ps %xmm12, %xmm1 144 ; CHECK-NEXT: andps %xmm1, %xmm2 145 ; CHECK-NEXT: andps %xmm6, %xmm5 146 ; CHECK-NEXT: orps %xmm5, %xmm2 147 ; CHECK-NEXT: andps %xmm11, %xmm3 148 ; CHECK-NEXT: cvttps2dq %xmm8, %xmm1 149 ; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 150 ; CHECK-NEXT: andps %xmm1, %xmm3 151 ; CHECK-NEXT: andps %xmm6, %xmm14 152 ; CHECK-NEXT: orps %xmm14, %xmm3 153 ; CHECK-NEXT: andps %xmm6, %xmm10 154 ; CHECK-NEXT: andps {{.*}}(%rip), %xmm4 155 ; CHECK-NEXT: cvttps2dq %xmm9, %xmm1 156 ; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 157 ; CHECK-NEXT: andps %xmm1, %xmm4 158 ; CHECK-NEXT: orps %xmm10, %xmm4 159 ; CHECK-NEXT: movaps %xmm2, %xmm1 160 ; CHECK-NEXT: movaps %xmm3, %xmm2 161 ; CHECK-NEXT: movaps %xmm4, %xmm3 162 ; CHECK-NEXT: retq 163 bb: 164 %v3 = icmp slt <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, zeroinitializer 165 %v14 = zext <16 x i1> %v3 to <16 x i32> 166 %v16 = fcmp olt <16 x float> %x, zeroinitializer 167 %v17 = sext <16 x i1> %v16 to <16 x i32> 168 %v18 = zext <16 x i1> %v16 to <16 x i32> 169 %v19 = xor <16 x i32> %v14, %v18 170 %v20 = or <16 x i32> %v17, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 171 %v21 = fptosi <16 x float> %x to <16 x i32> 172 %v22 = sitofp <16 x i32> %v21 to <16 x float> 173 %v69 = fcmp ogt <16 x float> %v22, zeroinitializer 174 %v75 = and <16 x i1> %v69, %v3 175 %v77 = bitcast <16 x float> %v22 to <16 x i32> 176 %v79 = sext <16 x i1> %v75 to <16 x i32> 177 %v80 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v79 178 %v81 = xor <16 x i32> %v77, %v80 179 %v82 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v81 180 %v83 = xor <16 x i32> %v19, %v82 181 %v84 = and <16 x i32> %v83, %v20 182 %v85 = xor <16 x i32> %v19, %v84 183 %v86 = bitcast <16 x i32> %v85 to <16 x float> 184 ret <16 x float> %v86 185 } 186