Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
      3 
      4 ; Test that add/sub with a constant is swapped to sub/add with negated
      5 ; constant to minimize code size.
      6 
      7 ; GCN-LABEL: {{^}}v_test_i32_x_sub_64:
      8 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
      9 ; GCN: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
     10 define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
     11   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     12   %tid.ext = sext i32 %tid to i64
     13   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
     14   %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
     15   %x = load i32, i32 addrspace(1)* %gep
     16   %result = sub i32 %x, 64
     17   store i32 %result, i32 addrspace(1)* %gep.out
     18   ret void
     19 }
     20 
     21 ; GCN-LABEL: {{^}}v_test_i32_x_sub_64_multi_use:
     22 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
     23 ; GCN: {{buffer|flat}}_load_dword [[Y:v[0-9]+]]
     24 ; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
     25 ; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[Y]]
     26 define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
     27   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     28   %tid.ext = sext i32 %tid to i64
     29   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
     30   %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
     31   %x = load volatile i32, i32 addrspace(1)* %gep
     32   %y = load volatile i32, i32 addrspace(1)* %gep
     33   %result0 = sub i32 %x, 64
     34   %result1 = sub i32 %y, 64
     35   store volatile i32 %result0, i32 addrspace(1)* %gep.out
     36   store volatile i32 %result1, i32 addrspace(1)* %gep.out
     37   ret void
     38 }
     39 
     40 ; GCN-LABEL: {{^}}v_test_i32_64_sub_x:
     41 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
     42 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
     43 define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
     44   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     45   %tid.ext = sext i32 %tid to i64
     46   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
     47   %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
     48   %x = load i32, i32 addrspace(1)* %gep
     49   %result = sub i32 64, %x
     50   store i32 %result, i32 addrspace(1)* %gep.out
     51   ret void
     52 }
     53 
     54 ; GCN-LABEL: {{^}}v_test_i32_x_sub_65:
     55 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
     56 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]]
     57 define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
     58   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     59   %tid.ext = sext i32 %tid to i64
     60   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
     61   %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
     62   %x = load i32, i32 addrspace(1)* %gep
     63   %result = sub i32 %x, 65
     64   store i32 %result, i32 addrspace(1)* %gep.out
     65   ret void
     66 }
     67 
     68 ; GCN-LABEL: {{^}}v_test_i32_65_sub_x:
     69 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
     70 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]]
     71 define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
     72   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     73   %tid.ext = sext i32 %tid to i64
     74   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
     75   %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
     76   %x = load i32, i32 addrspace(1)* %gep
     77   %result = sub i32 65, %x
     78   store i32 %result, i32 addrspace(1)* %gep.out
     79   ret void
     80 }
     81 
     82 ; GCN-LABEL: {{^}}v_test_i32_x_sub_neg16:
     83 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
     84 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 16, [[X]]
     85 define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
     86   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     87   %tid.ext = sext i32 %tid to i64
     88   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
     89   %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
     90   %x = load i32, i32 addrspace(1)* %gep
     91   %result = sub i32 %x, -16
     92   store i32 %result, i32 addrspace(1)* %gep.out
     93   ret void
     94 }
     95 
     96 ; GCN-LABEL: {{^}}v_test_i32_neg16_sub_x:
     97 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
     98 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, -16, [[X]]
     99 define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
    100   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    101   %tid.ext = sext i32 %tid to i64
    102   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
    103   %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
    104   %x = load i32, i32 addrspace(1)* %gep
    105   %result = sub i32 -16, %x
    106   store i32 %result, i32 addrspace(1)* %gep.out
    107   ret void
    108 }
    109 
    110 ; GCN-LABEL: {{^}}v_test_i32_x_sub_neg17:
    111 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
    112 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 17, [[X]]
    113 define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
    114   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    115   %tid.ext = sext i32 %tid to i64
    116   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
    117   %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
    118   %x = load i32, i32 addrspace(1)* %gep
    119   %result = sub i32 %x, -17
    120   store i32 %result, i32 addrspace(1)* %gep.out
    121   ret void
    122 }
    123 
    124 ; GCN-LABEL: {{^}}v_test_i32_neg17_sub_x:
    125 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
    126 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]]
    127 define amdgpu_kernel void @v_test_i32_neg17_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
    128   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    129   %tid.ext = sext i32 %tid to i64
    130   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
    131   %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
    132   %x = load i32, i32 addrspace(1)* %gep
    133   %result = sub i32 -17, %x
    134   store i32 %result, i32 addrspace(1)* %gep.out
    135   ret void
    136 }
    137 
    138 ; GCN-LABEL: {{^}}s_test_i32_x_sub_64:
    139 ; GCN: s_load_dword [[X:s[0-9]+]]
    140 ; GCN: s_sub_i32 s{{[0-9]+}}, [[X]], 64
    141 define amdgpu_kernel void @s_test_i32_x_sub_64(i32 %x) #0 {
    142   %result = sub i32 %x, 64
    143   call void asm sideeffect "; use $0", "s"(i32 %result)
    144   ret void
    145 }
    146 
    147 ; GCN-LABEL: {{^}}v_test_i16_x_sub_64:
    148 ; VI: {{buffer|flat}}_load_ushort [[X:v[0-9]+]]
    149 ; VI: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[X]]
    150 define amdgpu_kernel void @v_test_i16_x_sub_64(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
    151   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    152   %tid.ext = sext i32 %tid to i64
    153   %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext
    154   %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
    155   %x = load i16, i16 addrspace(1)* %gep
    156   %result = sub i16 %x, 64
    157   store i16 %result, i16 addrspace(1)* %gep.out
    158   ret void
    159 }
    160 
    161 ; GCN-LABEL: {{^}}v_test_i16_x_sub_64_multi_use:
    162 ; GCN: {{buffer|flat}}_load_ushort [[X:v[0-9]+]]
    163 ; GCN: {{buffer|flat}}_load_ushort [[Y:v[0-9]+]]
    164 ; VI-DAG: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[X]]
    165 ; VI-DAG: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[Y]]
    166 
    167 ; SI-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
    168 ; SI-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[Y]]
    169 define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
    170   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    171   %tid.ext = sext i32 %tid to i64
    172   %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext
    173   %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
    174   %x = load volatile i16, i16 addrspace(1)* %gep
    175   %y = load volatile i16, i16 addrspace(1)* %gep
    176   %result0 = sub i16 %x, 64
    177   %result1 = sub i16 %y, 64
    178   store volatile i16 %result0, i16 addrspace(1)* %gep.out
    179   store volatile i16 %result1, i16 addrspace(1)* %gep.out
    180   ret void
    181 }
    182 
    183 declare i32 @llvm.amdgcn.workitem.id.x() #1
    184 
    185 attributes #0 = { nounwind }
    186 attributes #1 = { nounwind readnone }
    187