Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      2 
      3 
      4 ; Normally icmp + select is optimized to select_cc, when this happens the
      5 ; DAGLegalizer never sees the select and doesn't have a chance to leaglize it.
      6 ;
      7 ; In order to avoid the select_cc optimization, this test case calculates the
      8 ; condition for the select in a separate basic block.
      9 
     10 ; FUNC-LABEL: {{^}}select:
     11 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.X
     12 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.X
     13 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
     14 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
     15 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW
     16 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW
     17 define amdgpu_kernel void @select (i32 addrspace(1)* %i32out, float addrspace(1)* %f32out,
     18                      <2 x i32> addrspace(1)* %v2i32out, <2 x float> addrspace(1)* %v2f32out,
     19                      <4 x i32> addrspace(1)* %v4i32out, <4 x float> addrspace(1)* %v4f32out,
     20                      i32 %cond) {
     21 entry:
     22   br label %for
     23 body:
     24   %inc = add i32 %i, 1
     25   %br_cmp.i = icmp eq i1 %br_cmp, 0
     26   br label %for
     27 for:
     28   %i = phi i32 [ %inc, %body], [ 0, %entry ]
     29   %br_cmp = phi i1 [ %br_cmp.i, %body ], [ 0, %entry ]
     30   %0 = icmp eq i32 %cond, %i
     31   %1 = select i1 %br_cmp, i32 2, i32 3
     32   %2 = select i1 %br_cmp, float 2.0 , float 5.0
     33   %3 = select i1 %br_cmp, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 5>
     34   %4 = select i1 %br_cmp, <2 x float> <float 2.0, float 3.0>, <2 x float> <float 4.0, float 5.0>
     35   %5 = select i1 %br_cmp, <4 x i32> <i32 2 , i32 3, i32 4, i32 5>, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
     36   %6 = select i1 %br_cmp, <4 x float> <float 2.0, float 3.0, float 4.0, float 5.0>, <4 x float> <float 6.0, float 7.0, float 8.0, float 9.0>
     37   br i1 %0, label %body, label %done
     38 
     39 done:
     40   store i32 %1, i32 addrspace(1)* %i32out
     41   store float %2, float addrspace(1)* %f32out
     42   store <2 x i32> %3, <2 x i32> addrspace(1)* %v2i32out
     43   store <2 x float> %4, <2 x float> addrspace(1)* %v2f32out
     44   store <4 x i32> %5, <4 x i32> addrspace(1)* %v4i32out
     45   store <4 x float> %6, <4 x float> addrspace(1)* %v4f32out
     46   ret void
     47 }
     48