Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      2 ; RUN: llc -march=amdgcn -mcpu=gfx902  -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
      3 
      4 ; GCN-LABEL: {{^}}add1:
      5 ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
      6 ; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
      7 ; GCN-NOT: v_cndmask
      8 
      9 ; GFX9-LABEL: {{^}}add1:
     10 ; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
     11 define amdgpu_kernel void @add1(i32 addrspace(1)* nocapture %arg) {
     12 bb:
     13   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
     14   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
     15   %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
     16   %v = load i32, i32 addrspace(1)* %gep, align 4
     17   %cmp = icmp ugt i32 %x, %y
     18   %ext = zext i1 %cmp to i32
     19   %add = add i32 %v, %ext
     20   store i32 %add, i32 addrspace(1)* %gep, align 4
     21   ret void
     22 }
     23 
     24 ; GCN-LABEL: {{^}}add1_i16:
     25 ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
     26 ; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
     27 ; GCN-NOT: v_cndmask
     28 
     29 ; GFX9-LABEL: {{^}}add1_i16:
     30 ; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
     31 define i16 @add1_i16(i32 addrspace(1)* nocapture %arg, i16 addrspace(1)* nocapture %dst) {
     32 bb:
     33   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
     34   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
     35   %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
     36   %v = load i32, i32 addrspace(1)* %gep, align 4
     37   %cmp = icmp ugt i32 %x, %y
     38   %ext = zext i1 %cmp to i32
     39   %add = add i32 %v, %ext
     40   %trunc = trunc i32 %add to i16
     41   ret i16 %trunc
     42 }
     43 
     44 ; GCN-LABEL: {{^}}sub1:
     45 ; GCN: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
     46 ; GCN: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
     47 ; GCN-NOT: v_cndmask
     48 
     49 ; GFX9-LABEL: {{^}}sub1:
     50 ; GFX9: v_subbrev_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
     51 define amdgpu_kernel void @sub1(i32 addrspace(1)* nocapture %arg) {
     52 bb:
     53   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
     54   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
     55   %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
     56   %v = load i32, i32 addrspace(1)* %gep, align 4
     57   %cmp = icmp ugt i32 %x, %y
     58   %ext = sext i1 %cmp to i32
     59   %add = add i32 %v, %ext
     60   store i32 %add, i32 addrspace(1)* %gep, align 4
     61   ret void
     62 }
     63 
     64 ; GCN-LABEL: {{^}}add_adde:
     65 ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
     66 ; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
     67 ; GCN-NOT: v_cndmask
     68 ; GCN-NOT: v_add
     69 
     70 ; GFX9-LABEL: {{^}}add_adde:
     71 ; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
     72 define amdgpu_kernel void @add_adde(i32 addrspace(1)* nocapture %arg, i32 %a) {
     73 bb:
     74   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
     75   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
     76   %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
     77   %v = load i32, i32 addrspace(1)* %gep, align 4
     78   %cmp = icmp ugt i32 %x, %y
     79   %ext = zext i1 %cmp to i32
     80   %adde = add i32 %v, %ext
     81   %add2 = add i32 %adde, %a
     82   store i32 %add2, i32 addrspace(1)* %gep, align 4
     83   ret void
     84 }
     85 
     86 ; GCN-LABEL: {{^}}adde_add:
     87 ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
     88 ; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
     89 ; GCN-NOT: v_cndmask
     90 ; GCN-NOT: v_add
     91 
     92 ; GFX9-LABEL: {{^}}adde_add:
     93 ; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
     94 define amdgpu_kernel void @adde_add(i32 addrspace(1)* nocapture %arg, i32 %a) {
     95 bb:
     96   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
     97   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
     98   %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
     99   %v = load i32, i32 addrspace(1)* %gep, align 4
    100   %cmp = icmp ugt i32 %x, %y
    101   %ext = zext i1 %cmp to i32
    102   %add = add i32 %v, %a
    103   %adde = add i32 %add, %ext
    104   store i32 %adde, i32 addrspace(1)* %gep, align 4
    105   ret void
    106 }
    107 
    108 ; GCN-LABEL: {{^}}sub_sube:
    109 ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
    110 ; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
    111 ; GCN-NOT: v_cndmask
    112 ; GCN-NOT: v_sub
    113 
    114 ; GFX9-LABEL: {{^}}sub_sube:
    115 ; GFX9: v_subb_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
    116 define amdgpu_kernel void @sub_sube(i32 addrspace(1)* nocapture %arg, i32 %a) {
    117 bb:
    118   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
    119   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
    120   %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
    121   %v = load i32, i32 addrspace(1)* %gep, align 4
    122   %cmp = icmp ugt i32 %x, %y
    123   %ext = sext i1 %cmp to i32
    124   %adde = add i32 %v, %ext
    125   %sub = sub i32 %adde, %a
    126   store i32 %sub, i32 addrspace(1)* %gep, align 4
    127   ret void
    128 }
    129 
    130 ; GCN-LABEL: {{^}}sube_sub:
    131 ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
    132 ; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
    133 ; GCN-NOT: v_cndmask
    134 ; GCN-NOT: v_sub
    135 
    136 ; GFX9-LABEL: {{^}}sube_sub:
    137 ; GFX9: v_subb_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
    138 define amdgpu_kernel void @sube_sub(i32 addrspace(1)* nocapture %arg, i32 %a) {
    139 bb:
    140   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
    141   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
    142   %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
    143   %v = load i32, i32 addrspace(1)* %gep, align 4
    144   %cmp = icmp ugt i32 %x, %y
    145   %ext = sext i1 %cmp to i32
    146   %sub = sub i32 %v, %a
    147   %adde = add i32 %sub, %ext
    148   store i32 %adde, i32 addrspace(1)* %gep, align 4
    149   ret void
    150 }
    151 
    152 ; GCN-LABEL: {{^}}zext_flclass:
    153 ; GCN: v_cmp_class_f32_e{{32|64}} [[CC:[^,]+]],
    154 ; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
    155 ; GCN-NOT: v_cndmask
    156 
    157 ; GFX9-LABEL: {{^}}zext_flclass:
    158 ; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
    159 define amdgpu_kernel void @zext_flclass(i32 addrspace(1)* nocapture %arg, float %x) {
    160 bb:
    161   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
    162   %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id
    163   %v = load i32, i32 addrspace(1)* %gep, align 4
    164   %cmp = tail call zeroext i1 @llvm.amdgcn.class.f32(float %x, i32 608)
    165   %ext = zext i1 %cmp to i32
    166   %add = add i32 %v, %ext
    167   store i32 %add, i32 addrspace(1)* %gep, align 4
    168   ret void
    169 }
    170 
    171 ; GCN-LABEL: {{^}}sext_flclass:
    172 ; GCN: v_cmp_class_f32_e32 vcc,
    173 ; GCN: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
    174 ; GCN-NOT: v_cndmask
    175 
    176 ; GFX9-LABEL: {{^}}sext_flclass:
    177 ; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc
    178 define amdgpu_kernel void @sext_flclass(i32 addrspace(1)* nocapture %arg, float %x) {
    179 bb:
    180   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
    181   %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id
    182   %v = load i32, i32 addrspace(1)* %gep, align 4
    183   %cmp = tail call zeroext i1 @llvm.amdgcn.class.f32(float %x, i32 608)
    184   %ext = sext i1 %cmp to i32
    185   %add = add i32 %v, %ext
    186   store i32 %add, i32 addrspace(1)* %gep, align 4
    187   ret void
    188 }
    189 
    190 ; GCN-LABEL: {{^}}add_and:
    191 ; GCN: s_and_b64 [[CC:[^,]+]],
    192 ; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
    193 ; GCN-NOT: v_cndmask
    194 
    195 ; GFX9-LABEL: {{^}}add_and:
    196 ; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
    197 define amdgpu_kernel void @add_and(i32 addrspace(1)* nocapture %arg) {
    198 bb:
    199   %x = tail call i32 @llvm.amdgcn.workitem.id.x()
    200   %y = tail call i32 @llvm.amdgcn.workitem.id.y()
    201   %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
    202   %v = load i32, i32 addrspace(1)* %gep, align 4
    203   %cmp1 = icmp ugt i32 %x, %y
    204   %cmp2 = icmp ugt i32 %x, 1
    205   %cmp = and i1 %cmp1, %cmp2
    206   %ext = zext i1 %cmp to i32
    207   %add = add i32 %v, %ext
    208   store i32 %add, i32 addrspace(1)* %gep, align 4
    209   ret void
    210 }
    211 
    212 declare i1 @llvm.amdgcn.class.f32(float, i32) #0
    213 
    214 declare i32 @llvm.amdgcn.workitem.id.x() #0
    215 
    216 declare i32 @llvm.amdgcn.workitem.id.y() #0
    217 
    218 attributes #0 = { nounwind readnone speculatable }
    219