Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 
      5 declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
      6 
      7 declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
      8 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
      9 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
     10 
     11 declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
     12 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
     13 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
     14 
     15 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
     16 
     17 ; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i32:
     18 ; SI: s_load_dword [[VAL:s[0-9]+]],
     19 ; SI: s_flbit_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
     20 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
     21 ; SI: buffer_store_dword [[VRESULT]],
     22 ; SI: s_endpgm
     23 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
     24 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
     25 define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
     26   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
     27   store i32 %ctlz, i32 addrspace(1)* %out, align 4
     28   ret void
     29 }
     30 
     31 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32:
     32 ; SI: buffer_load_dword [[VAL:v[0-9]+]],
     33 ; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
     34 ; SI: buffer_store_dword [[RESULT]],
     35 ; SI: s_endpgm
     36 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
     37 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
     38 define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
     39   %val = load i32, i32 addrspace(1)* %valptr, align 4
     40   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
     41   store i32 %ctlz, i32 addrspace(1)* %out, align 4
     42   ret void
     43 }
     44 
     45 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v2i32:
     46 ; SI: buffer_load_dwordx2
     47 ; SI: v_ffbh_u32_e32
     48 ; SI: v_ffbh_u32_e32
     49 ; SI: buffer_store_dwordx2
     50 ; SI: s_endpgm
     51 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
     52 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
     53 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
     54 define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
     55   %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
     56   %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
     57   store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
     58   ret void
     59 }
     60 
     61 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v4i32:
     62 ; SI: buffer_load_dwordx4
     63 ; SI: v_ffbh_u32_e32
     64 ; SI: v_ffbh_u32_e32
     65 ; SI: v_ffbh_u32_e32
     66 ; SI: v_ffbh_u32_e32
     67 ; SI: buffer_store_dwordx4
     68 ; SI: s_endpgm
     69 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
     70 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
     71 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
     72 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
     73 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
     74 define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
     75   %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
     76   %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
     77   store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
     78   ret void
     79 }
     80 
     81 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i8:
     82 ; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
     83 ; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
     84 ; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[FFBH]]
     85 ; SI: buffer_store_byte [[RESULT]],
     86 define void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
     87   %val = load i8, i8 addrspace(1)* %valptr
     88   %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
     89   store i8 %ctlz, i8 addrspace(1)* %out
     90   ret void
     91 }
     92 
     93 ; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64:
     94 ; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
     95 ; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]]
     96 ; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
     97 ; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
     98 ; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
     99 ; SI-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[FFBH_LO]]
    100 ; SI-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]]
    101 ; SI-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]
    102 ; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
    103 ; SI: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
    104 define void @s_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
    105   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
    106   store i64 %ctlz, i64 addrspace(1)* %out
    107   ret void
    108 }
    109 
    110 ; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64_trunc:
    111 define void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
    112   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
    113   %trunc = trunc i64 %ctlz to i32
    114   store i32 %trunc, i32 addrspace(1)* %out
    115   ret void
    116 }
    117 
    118 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64:
    119 ; SI-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
    120 ; SI-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
    121 ; SI-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
    122 ; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
    123 ; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
    124 ; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]]
    125 ; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
    126 ; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
    127 define void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
    128   %tid = call i32 @llvm.r600.read.tidig.x()
    129   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
    130   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    131   %val = load i64, i64 addrspace(1)* %in.gep
    132   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
    133   store i64 %ctlz, i64 addrspace(1)* %out.gep
    134   ret void
    135 }
    136 
    137 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64_trunc:
    138 define void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
    139   %tid = call i32 @llvm.r600.read.tidig.x()
    140   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
    141   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
    142   %val = load i64, i64 addrspace(1)* %in.gep
    143   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
    144   %trunc = trunc i64 %ctlz to i32
    145   store i32 %trunc, i32 addrspace(1)* %out.gep
    146   ret void
    147 }
    148 
    149 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1:
    150 ; SI: buffer_load_dword [[VAL:v[0-9]+]],
    151 ; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
    152 ; SI: buffer_store_dword [[RESULT]],
    153  define void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
    154   %val = load i32, i32 addrspace(1)* %valptr
    155   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
    156   %cmp = icmp eq i32 %val, 0
    157   %sel = select i1 %cmp, i32 -1, i32 %ctlz
    158   store i32 %sel, i32 addrspace(1)* %out
    159   ret void
    160 }
    161 
    162 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_neg1:
    163 ; SI: buffer_load_dword [[VAL:v[0-9]+]],
    164 ; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
    165 ; SI: buffer_store_dword [[RESULT]],
    166 define void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
    167   %val = load i32, i32 addrspace(1)* %valptr
    168   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
    169   %cmp = icmp ne i32 %val, 0
    170   %sel = select i1 %cmp, i32 %ctlz, i32 -1
    171   store i32 %sel, i32 addrspace(1)* %out
    172   ret void
    173 }
    174 
    175 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i8_sel_eq_neg1:
    176 ; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
    177 ; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
    178 ; SI: buffer_store_byte [[FFBH]],
    179  define void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
    180   %val = load i8, i8 addrspace(1)* %valptr
    181   %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
    182   %cmp = icmp eq i8 %val, 0
    183   %sel = select i1 %cmp, i8 -1, i8 %ctlz
    184   store i8 %sel, i8 addrspace(1)* %out
    185   ret void
    186 }
    187 
    188 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1_two_use:
    189 ; SI: buffer_load_dword [[VAL:v[0-9]+]],
    190 ; SI-DAG: v_ffbh_u32_e32 [[RESULT0:v[0-9]+]], [[VAL]]
    191 ; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[VAL]]
    192 ; SI-DAG: v_cndmask_b32_e64 [[RESULT1:v[0-9]+]], 0, 1, vcc
    193 ; SI-DAG: buffer_store_dword [[RESULT0]]
    194 ; SI-DAG: buffer_store_byte [[RESULT1]]
    195 ; SI: s_endpgm
    196  define void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
    197   %val = load i32, i32 addrspace(1)* %valptr
    198   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
    199   %cmp = icmp eq i32 %val, 0
    200   %sel = select i1 %cmp, i32 -1, i32 %ctlz
    201   store volatile i32 %sel, i32 addrspace(1)* %out
    202   store volatile i1 %cmp, i1 addrspace(1)* undef
    203   ret void
    204 }
    205 
    206 ; Selected on wrong constant
    207 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_0:
    208 ; SI: buffer_load_dword
    209 ; SI: v_ffbh_u32_e32
    210 ; SI: v_cmp
    211 ; SI: v_cndmask
    212 ; SI: buffer_store_dword
    213  define void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
    214   %val = load i32, i32 addrspace(1)* %valptr
    215   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
    216   %cmp = icmp eq i32 %val, 0
    217   %sel = select i1 %cmp, i32 0, i32 %ctlz
    218   store i32 %sel, i32 addrspace(1)* %out
    219   ret void
    220 }
    221 
    222 ; Selected on wrong constant
    223 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_0:
    224 ; SI: buffer_load_dword
    225 ; SI: v_ffbh_u32_e32
    226 ; SI: v_cmp
    227 ; SI: v_cndmask
    228 ; SI: buffer_store_dword
    229 define void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
    230   %val = load i32, i32 addrspace(1)* %valptr
    231   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
    232   %cmp = icmp ne i32 %val, 0
    233   %sel = select i1 %cmp, i32 %ctlz, i32 0
    234   store i32 %sel, i32 addrspace(1)* %out
    235   ret void
    236 }
    237 
    238 ; Compare on wrong constant
    239 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_cmp_non0:
    240 ; SI: buffer_load_dword
    241 ; SI: v_ffbh_u32_e32
    242 ; SI: v_cmp
    243 ; SI: v_cndmask
    244 ; SI: buffer_store_dword
    245  define void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
    246   %val = load i32, i32 addrspace(1)* %valptr
    247   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
    248   %cmp = icmp eq i32 %val, 1
    249   %sel = select i1 %cmp, i32 0, i32 %ctlz
    250   store i32 %sel, i32 addrspace(1)* %out
    251   ret void
    252 }
    253 
    254 ; Selected on wrong constant
    255 ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_cmp_non0:
    256 ; SI: buffer_load_dword
    257 ; SI: v_ffbh_u32_e32
    258 ; SI: v_cmp
    259 ; SI: v_cndmask
    260 ; SI: buffer_store_dword
    261 define void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
    262   %val = load i32, i32 addrspace(1)* %valptr
    263   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
    264   %cmp = icmp ne i32 %val, 1
    265   %sel = select i1 %cmp, i32 %ctlz, i32 0
    266   store i32 %sel, i32 addrspace(1)* %out
    267   ret void
    268 }
    269