1 ; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=CMOV 2 ; RUN: llc < %s -asm-verbose=false -mtriple=i686-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=NOCMOV 3 4 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 5 6 ; Test 2xCMOV patterns exposed after legalization. 7 ; One way to do that is with (select (fcmp une/oeq)), which gets 8 ; legalized to setp/setne. 9 10 ; CHECK-LABEL: test_select_fcmp_oeq_i32: 11 12 ; CMOV-NEXT: ucomiss %xmm1, %xmm0 13 ; CMOV-NEXT: cmovnel %esi, %edi 14 ; CMOV-NEXT: cmovpl %esi, %edi 15 ; CMOV-NEXT: movl %edi, %eax 16 ; CMOV-NEXT: retq 17 18 ; NOCMOV-NEXT: flds 8(%esp) 19 ; NOCMOV-NEXT: flds 4(%esp) 20 ; NOCMOV-NEXT: fucompp 21 ; NOCMOV-NEXT: fnstsw %ax 22 ; NOCMOV-NEXT: sahf 23 ; NOCMOV-NEXT: leal 16(%esp), %eax 24 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 25 ; NOCMOV-NEXT: jp [[TBB]] 26 ; NOCMOV-NEXT: leal 12(%esp), %eax 27 ; NOCMOV-NEXT:[[TBB]]: 28 ; NOCMOV-NEXT: movl (%eax), %eax 29 ; NOCMOV-NEXT: retl 30 define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 { 31 entry: 32 %cmp = fcmp oeq float %a, %b 33 %r = select i1 %cmp, i32 %c, i32 %d 34 ret i32 %r 35 } 36 37 ; CHECK-LABEL: test_select_fcmp_oeq_i64: 38 39 ; CMOV-NEXT: ucomiss %xmm1, %xmm0 40 ; CMOV-NEXT: cmovneq %rsi, %rdi 41 ; CMOV-NEXT: cmovpq %rsi, %rdi 42 ; CMOV-NEXT: movq %rdi, %rax 43 ; CMOV-NEXT: retq 44 45 ; NOCMOV-NEXT: flds 8(%esp) 46 ; NOCMOV-NEXT: flds 4(%esp) 47 ; NOCMOV-NEXT: fucompp 48 ; NOCMOV-NEXT: fnstsw %ax 49 ; NOCMOV-NEXT: sahf 50 ; NOCMOV-NEXT: leal 20(%esp), %ecx 51 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 52 ; NOCMOV-NEXT: jp [[TBB]] 53 ; NOCMOV-NEXT: leal 12(%esp), %ecx 54 ; NOCMOV-NEXT: [[TBB]]: 55 ; NOCMOV-NEXT: movl (%ecx), %eax 56 ; NOCMOV-NEXT: orl $4, %ecx 57 ; NOCMOV-NEXT: movl (%ecx), %edx 58 ; NOCMOV-NEXT: retl 59 define i64 @test_select_fcmp_oeq_i64(float %a, float %b, i64 %c, i64 %d) #0 { 60 entry: 61 %cmp = fcmp oeq float %a, %b 62 %r = select i1 %cmp, i64 %c, i64 %d 63 ret i64 %r 64 } 65 66 ; CHECK-LABEL: test_select_fcmp_une_i64: 67 68 ; CMOV-NEXT: ucomiss %xmm1, %xmm0 69 ; CMOV-NEXT: cmovneq %rdi, %rsi 70 ; CMOV-NEXT: cmovpq %rdi, %rsi 71 ; CMOV-NEXT: movq %rsi, %rax 72 ; CMOV-NEXT: retq 73 74 ; NOCMOV-NEXT: flds 8(%esp) 75 ; NOCMOV-NEXT: flds 4(%esp) 76 ; NOCMOV-NEXT: fucompp 77 ; NOCMOV-NEXT: fnstsw %ax 78 ; NOCMOV-NEXT: sahf 79 ; NOCMOV-NEXT: leal 12(%esp), %ecx 80 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 81 ; NOCMOV-NEXT: jp [[TBB]] 82 ; NOCMOV-NEXT: leal 20(%esp), %ecx 83 ; NOCMOV-NEXT: [[TBB]]: 84 ; NOCMOV-NEXT: movl (%ecx), %eax 85 ; NOCMOV-NEXT: orl $4, %ecx 86 ; NOCMOV-NEXT: movl (%ecx), %edx 87 ; NOCMOV-NEXT: retl 88 define i64 @test_select_fcmp_une_i64(float %a, float %b, i64 %c, i64 %d) #0 { 89 entry: 90 %cmp = fcmp une float %a, %b 91 %r = select i1 %cmp, i64 %c, i64 %d 92 ret i64 %r 93 } 94 95 ; CHECK-LABEL: test_select_fcmp_oeq_f64: 96 97 ; CMOV-NEXT: ucomiss %xmm1, %xmm0 98 ; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 99 ; CMOV-NEXT: jp [[TBB]] 100 ; CMOV-NEXT: movaps %xmm2, %xmm3 101 ; CMOV-NEXT: [[TBB]]: 102 ; CMOV-NEXT: movaps %xmm3, %xmm0 103 ; CMOV-NEXT: retq 104 105 ; NOCMOV-NEXT: flds 8(%esp) 106 ; NOCMOV-NEXT: flds 4(%esp) 107 ; NOCMOV-NEXT: fucompp 108 ; NOCMOV-NEXT: fnstsw %ax 109 ; NOCMOV-NEXT: sahf 110 ; NOCMOV-NEXT: leal 20(%esp), %eax 111 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 112 ; NOCMOV-NEXT: jp [[TBB]] 113 ; NOCMOV-NEXT: leal 12(%esp), %eax 114 ; NOCMOV-NEXT: [[TBB]]: 115 ; NOCMOV-NEXT: fldl (%eax) 116 ; NOCMOV-NEXT: retl 117 define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 { 118 entry: 119 %cmp = fcmp oeq float %a, %b 120 %r = select i1 %cmp, double %c, double %d 121 ret double %r 122 } 123 124 ; CHECK-LABEL: test_select_fcmp_oeq_v4i32: 125 126 ; CMOV-NEXT: ucomiss %xmm1, %xmm0 127 ; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 128 ; CMOV-NEXT: jp [[TBB]] 129 ; CMOV-NEXT: movaps %xmm2, %xmm3 130 ; CMOV-NEXT: [[TBB]]: 131 ; CMOV-NEXT: movaps %xmm3, %xmm0 132 ; CMOV-NEXT: retq 133 134 ; NOCMOV-NEXT: pushl %edi 135 ; NOCMOV-NEXT: pushl %esi 136 ; NOCMOV-NEXT: flds 20(%esp) 137 ; NOCMOV-NEXT: flds 16(%esp) 138 ; NOCMOV-NEXT: fucompp 139 ; NOCMOV-NEXT: fnstsw %ax 140 ; NOCMOV-NEXT: sahf 141 ; NOCMOV-NEXT: leal 40(%esp), %eax 142 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 143 ; NOCMOV-NEXT: jp [[TBB]] 144 ; NOCMOV-NEXT: leal 24(%esp), %eax 145 ; NOCMOV-NEXT: [[TBB]]: 146 ; NOCMOV-NEXT: movl (%eax), %ecx 147 ; NOCMOV-NEXT: leal 44(%esp), %edx 148 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 149 ; NOCMOV-NEXT: jp [[TBB]] 150 ; NOCMOV-NEXT: leal 28(%esp), %edx 151 ; NOCMOV-NEXT: [[TBB]]: 152 ; NOCMOV-NEXT: movl 12(%esp), %eax 153 ; NOCMOV-NEXT: movl (%edx), %edx 154 ; NOCMOV-NEXT: leal 48(%esp), %esi 155 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 156 ; NOCMOV-NEXT: jp [[TBB]] 157 ; NOCMOV-NEXT: leal 32(%esp), %esi 158 ; NOCMOV-NEXT: [[TBB]]: 159 ; NOCMOV-NEXT: movl (%esi), %esi 160 ; NOCMOV-NEXT: leal 52(%esp), %edi 161 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 162 ; NOCMOV-NEXT: jp [[TBB]] 163 ; NOCMOV-NEXT: leal 36(%esp), %edi 164 ; NOCMOV-NEXT: [[TBB]]: 165 ; NOCMOV-NEXT: movl (%edi), %edi 166 ; NOCMOV-NEXT: movl %edi, 12(%eax) 167 ; NOCMOV-NEXT: movl %esi, 8(%eax) 168 ; NOCMOV-NEXT: movl %edx, 4(%eax) 169 ; NOCMOV-NEXT: movl %ecx, (%eax) 170 ; NOCMOV-NEXT: popl %esi 171 ; NOCMOV-NEXT: popl %edi 172 ; NOCMOV-NEXT: retl $4 173 define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) #0 { 174 entry: 175 %cmp = fcmp oeq float %a, %b 176 %r = select i1 %cmp, <4 x i32> %c, <4 x i32> %d 177 ret <4 x i32> %r 178 } 179 180 ; Also make sure we catch the original code-sequence of interest: 181 182 ; CMOV: [[ONE_F32_LCPI:.LCPI.*]]: 183 ; CMOV-NEXT: .long 1065353216 184 185 ; CHECK-LABEL: test_zext_fcmp_une: 186 ; CMOV-NEXT: ucomiss %xmm1, %xmm0 187 ; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0 188 ; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 189 ; CMOV-NEXT: jp [[TBB]] 190 ; CMOV-NEXT: xorps %xmm0, %xmm0 191 ; CMOV-NEXT: [[TBB]]: 192 ; CMOV-NEXT: retq 193 194 ; NOCMOV: jne 195 ; NOCMOV-NEXT: jp 196 define float @test_zext_fcmp_une(float %a, float %b) #0 { 197 entry: 198 %cmp = fcmp une float %a, %b 199 %conv1 = zext i1 %cmp to i32 200 %conv2 = sitofp i32 %conv1 to float 201 ret float %conv2 202 } 203 204 ; CMOV: [[ONE_F32_LCPI:.LCPI.*]]: 205 ; CMOV-NEXT: .long 1065353216 206 207 ; CHECK-LABEL: test_zext_fcmp_oeq: 208 ; CMOV-NEXT: ucomiss %xmm1, %xmm0 209 ; CMOV-NEXT: xorps %xmm0, %xmm0 210 ; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] 211 ; CMOV-NEXT: jp [[TBB]] 212 ; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0 213 ; CMOV-NEXT: [[TBB]]: 214 ; CMOV-NEXT: retq 215 216 ; NOCMOV: jne 217 ; NOCMOV-NEXT: jp 218 define float @test_zext_fcmp_oeq(float %a, float %b) #0 { 219 entry: 220 %cmp = fcmp oeq float %a, %b 221 %conv1 = zext i1 %cmp to i32 222 %conv2 = sitofp i32 %conv1 to float 223 ret float %conv2 224 } 225 226 attributes #0 = { nounwind } 227 228 @g8 = global i8 0 229 230 ; The following test failed because llvm had a bug where a structure like: 231 ; 232 ; %vreg12<def> = CMOV_GR8 %vreg7, %vreg11 ... (lt) 233 ; %vreg13<def> = CMOV_GR8 %vreg12, %vreg11 ... (gt) 234 ; 235 ; was lowered to: 236 ; 237 ; The first two cmovs got expanded to: 238 ; BB#0: 239 ; JL_1 BB#9 240 ; BB#7: 241 ; JG_1 BB#9 242 ; BB#8: 243 ; BB#9: 244 ; vreg12 = phi(vreg7, BB#8, vreg11, BB#0, vreg12, BB#7) 245 ; vreg13 = COPY vreg12 246 ; Which was invalid as %vreg12 is not the same value as %vreg13 247 248 ; CHECK-LABEL: no_cascade_opt: 249 ; CMOV-DAG: cmpl %edx, %esi 250 ; CMOV-DAG: movb $20, %al 251 ; CMOV-DAG: movb $20, %dl 252 ; CMOV: jl [[BB0:.LBB[0-9_]+]] 253 ; CMOV: movl %ecx, %edx 254 ; CMOV: [[BB0]]: 255 ; CMOV: jg [[BB1:.LBB[0-9_]+]] 256 ; CMOV: movl %edx, %eax 257 ; CMOV: [[BB1]]: 258 ; CMOV: testl %edi, %edi 259 ; CMOV: je [[BB2:.LBB[0-9_]+]] 260 ; CMOV: movl %edx, %eax 261 ; CMOV: [[BB2]]: 262 ; CMOV: movb %al, g8(%rip) 263 ; CMOV: retq 264 define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) { 265 entry: 266 %c0 = icmp eq i32 %v0, 0 267 %c1 = icmp slt i32 %v1, %v2 268 %c2 = icmp sgt i32 %v1, %v2 269 %trunc = trunc i32 %v3 to i8 270 %sel0 = select i1 %c1, i8 20, i8 %trunc 271 %sel1 = select i1 %c2, i8 20, i8 %sel0 272 %sel2 = select i1 %c0, i8 %sel1, i8 %sel0 273 store volatile i8 %sel2, i8* @g8 274 ret void 275 } 276