Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 
      5 declare i7 @llvm.ctlz.i7(i7, i1) nounwind readnone
      6 declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
      7 declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
      8 
      9 declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
     10 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
     11 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
     12 
     13 declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
     14 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
     15 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
     16 
     17 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
     18 
     19 ; FUNC-LABEL: {{^}}s_ctlz_i32:
     20 ; SI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
     21 ; SI-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]]
     22 ; SI-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
     23 ; SI-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]]
     24 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]]
     25 ; SI: buffer_store_dword [[RESULT]]
     26 ; SI: s_endpgm
     27 
     28 ; EG: FFBH_UINT
     29 ; EG: CNDE_INT
     30 define void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
     31   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
     32   store i32 %ctlz, i32 addrspace(1)* %out, align 4
     33   ret void
     34 }
     35 
     36 ; FUNC-LABEL: {{^}}v_ctlz_i32:
     37 ; SI: buffer_load_dword [[VAL:v[0-9]+]],
     38 ; SI-DAG: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]]
     39 ; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
     40 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[CTLZ]], 32, vcc
     41 ; SI: buffer_store_dword [[RESULT]],
     42 ; SI: s_endpgm
     43 
     44 ; EG: FFBH_UINT
     45 ; EG: CNDE_INT
     46 define void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
     47   %val = load i32, i32 addrspace(1)* %valptr, align 4
     48   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
     49   store i32 %ctlz, i32 addrspace(1)* %out, align 4
     50   ret void
     51 }
     52 
     53 ; FUNC-LABEL: {{^}}v_ctlz_v2i32:
     54 ; SI: buffer_load_dwordx2
     55 ; SI: v_ffbh_u32_e32
     56 ; SI: v_ffbh_u32_e32
     57 ; SI: buffer_store_dwordx2
     58 ; SI: s_endpgm
     59 
     60 ; EG: FFBH_UINT
     61 ; EG: CNDE_INT
     62 ; EG: FFBH_UINT
     63 ; EG: CNDE_INT
     64 define void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
     65   %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
     66   %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone
     67   store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
     68   ret void
     69 }
     70 
     71 ; FUNC-LABEL: {{^}}v_ctlz_v4i32:
     72 ; SI: buffer_load_dwordx4
     73 ; SI: v_ffbh_u32_e32
     74 ; SI: v_ffbh_u32_e32
     75 ; SI: v_ffbh_u32_e32
     76 ; SI: v_ffbh_u32_e32
     77 ; SI: buffer_store_dwordx4
     78 ; SI: s_endpgm
     79 
     80 
     81 ; EG-DAG: FFBH_UINT
     82 ; EG-DAG: CNDE_INT
     83 
     84 ; EG-DAG: FFBH_UINT
     85 ; EG-DAG: CNDE_INT
     86 
     87 ; EG-DAG: FFBH_UINT
     88 ; EG-DAG: CNDE_INT
     89 
     90 ; EG-DAG: FFBH_UINT
     91 ; EG-DAG: CNDE_INT
     92 define void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
     93   %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
     94   %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone
     95   store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
     96   ret void
     97 }
     98 
     99 ; FUNC-LABEL: {{^}}v_ctlz_i8:
    100 ; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
    101 ; SI-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
    102 ; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
    103 ; SI-DAG: v_cndmask_b32_e64 [[CORRECTED_FFBH:v[0-9]+]], [[FFBH]], 32, vcc
    104 ; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[CORRECTED_FFBH]]
    105 ; SI: buffer_store_byte [[RESULT]],
    106 define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
    107   %val = load i8, i8 addrspace(1)* %valptr
    108   %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
    109   store i8 %ctlz, i8 addrspace(1)* %out
    110   ret void
    111 }
    112 
    113 ; FUNC-LABEL: {{^}}s_ctlz_i64:
    114 ; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
    115 ; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]]
    116 ; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
    117 ; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
    118 ; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
    119 ; SI-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[ADD]]
    120 ; SI-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]]
    121 ; SI-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]
    122 ; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
    123 ; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
    124 define void @s_ctlz_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
    125   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
    126   store i64 %ctlz, i64 addrspace(1)* %out
    127   ret void
    128 }
    129 
    130 ; FUNC-LABEL: {{^}}s_ctlz_i64_trunc:
    131 define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
    132   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
    133   %trunc = trunc i64 %ctlz to i32
    134   store i32 %trunc, i32 addrspace(1)* %out
    135   ret void
    136 }
    137 
    138 ; FUNC-LABEL: {{^}}v_ctlz_i64:
    139 ; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
    140 ; SI-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
    141 ; SI-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
    142 ; SI-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
    143 ; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
    144 ; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
    145 ; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]]
    146 ; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]]
    147 ; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]]
    148 ; SI-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc
    149 ; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
    150 define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
    151   %tid = call i32 @llvm.r600.read.tidig.x()
    152   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
    153   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
    154   %val = load i64, i64 addrspace(1)* %in.gep
    155   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
    156   store i64 %ctlz, i64 addrspace(1)* %out.gep
    157   ret void
    158 }
    159 
    160 ; FUNC-LABEL: {{^}}v_ctlz_i64_trunc:
    161 define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
    162   %tid = call i32 @llvm.r600.read.tidig.x()
    163   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
    164   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
    165   %val = load i64, i64 addrspace(1)* %in.gep
    166   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
    167   %trunc = trunc i64 %ctlz to i32
    168   store i32 %trunc, i32 addrspace(1)* %out.gep
    169   ret void
    170 }
    171 
    172 ; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_neg1:
    173 ; SI: buffer_load_dword [[VAL:v[0-9]+]],
    174 ; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
    175 ; SI: buffer_store_dword [[RESULT]],
    176 ; SI: s_endpgm
    177  define void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
    178   %val = load i32, i32 addrspace(1)* %valptr
    179   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
    180   %cmp = icmp eq i32 %val, 0
    181   %sel = select i1 %cmp, i32 -1, i32 %ctlz
    182   store i32 %sel, i32 addrspace(1)* %out
    183   ret void
    184 }
    185 
    186 ; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_neg1:
    187 ; SI: buffer_load_dword [[VAL:v[0-9]+]],
    188 ; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
    189 ; SI: buffer_store_dword [[RESULT]],
    190 ; SI: s_endpgm
    191 define void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
    192   %val = load i32, i32 addrspace(1)* %valptr
    193   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
    194   %cmp = icmp ne i32 %val, 0
    195   %sel = select i1 %cmp, i32 %ctlz, i32 -1
    196   store i32 %sel, i32 addrspace(1)* %out
    197   ret void
    198 }
    199 
    200 ; TODO: Should be able to eliminate select here as well.
    201 ; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_bitwidth:
    202 ; SI: buffer_load_dword
    203 ; SI: v_ffbh_u32_e32
    204 ; SI: v_cmp
    205 ; SI: v_cndmask
    206 ; SI: s_endpgm
    207 define void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
    208   %val = load i32, i32 addrspace(1)* %valptr
    209   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
    210   %cmp = icmp eq i32 %ctlz, 32
    211   %sel = select i1 %cmp, i32 -1, i32 %ctlz
    212   store i32 %sel, i32 addrspace(1)* %out
    213   ret void
    214 }
    215 
    216 ; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_bitwidth:
    217 ; SI: buffer_load_dword
    218 ; SI: v_ffbh_u32_e32
    219 ; SI: v_cmp
    220 ; SI: v_cndmask
    221 ; SI: s_endpgm
    222 define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
    223   %val = load i32, i32 addrspace(1)* %valptr
    224   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
    225   %cmp = icmp ne i32 %ctlz, 32
    226   %sel = select i1 %cmp, i32 %ctlz, i32 -1
    227   store i32 %sel, i32 addrspace(1)* %out
    228   ret void
    229 }
    230 
    231 ; FUNC-LABEL: {{^}}v_ctlz_i8_sel_eq_neg1:
    232 ; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
    233 ; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
    234 ; SI: buffer_store_byte [[FFBH]],
    235  define void @v_ctlz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
    236   %val = load i8, i8 addrspace(1)* %valptr
    237   %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
    238   %cmp = icmp eq i8 %val, 0
    239   %sel = select i1 %cmp, i8 -1, i8 %ctlz
    240   store i8 %sel, i8 addrspace(1)* %out
    241   ret void
    242 }
    243 
    244 ; FUNC-LABEL: {{^}}v_ctlz_i16_sel_eq_neg1:
    245 ; SI: buffer_load_ushort [[VAL:v[0-9]+]],
    246 ; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
    247 ; SI: buffer_store_short [[FFBH]],
    248  define void @v_ctlz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {
    249   %val = load i16, i16 addrspace(1)* %valptr
    250   %ctlz = call i16 @llvm.ctlz.i16(i16 %val, i1 false) nounwind readnone
    251   %cmp = icmp eq i16 %val, 0
    252   %sel = select i1 %cmp, i16 -1, i16 %ctlz
    253   store i16 %sel, i16 addrspace(1)* %out
    254   ret void
    255 }
    256 
    257 ; FUNC-LABEL: {{^}}v_ctlz_i7_sel_eq_neg1:
    258 ; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
    259 ; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
    260 ; SI: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]]
    261 ; SI: buffer_store_byte [[TRUNC]],
    262  define void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
    263   %val = load i7, i7 addrspace(1)* %valptr
    264   %ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone
    265   %cmp = icmp eq i7 %val, 0
    266   %sel = select i1 %cmp, i7 -1, i7 %ctlz
    267   store i7 %sel, i7 addrspace(1)* %out
    268   ret void
    269 }
    270