1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4 ; Test that add/sub with a constant is swapped to sub/add with negated 5 ; constant to minimize code size. 6 7 ; GCN-LABEL: {{^}}v_test_i32_x_sub_64: 8 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 9 ; GCN: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]] 10 define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 11 %tid = call i32 @llvm.amdgcn.workitem.id.x() 12 %tid.ext = sext i32 %tid to i64 13 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext 14 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext 15 %x = load i32, i32 addrspace(1)* %gep 16 %result = sub i32 %x, 64 17 store i32 %result, i32 addrspace(1)* %gep.out 18 ret void 19 } 20 21 ; GCN-LABEL: {{^}}v_test_i32_x_sub_64_multi_use: 22 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 23 ; GCN: {{buffer|flat}}_load_dword [[Y:v[0-9]+]] 24 ; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]] 25 ; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[Y]] 26 define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 27 %tid = call i32 @llvm.amdgcn.workitem.id.x() 28 %tid.ext = sext i32 %tid to i64 29 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext 30 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext 31 %x = load volatile i32, i32 addrspace(1)* %gep 32 %y = load volatile i32, i32 addrspace(1)* %gep 33 %result0 = sub i32 %x, 64 34 %result1 = sub i32 %y, 64 35 store volatile i32 %result0, i32 addrspace(1)* %gep.out 36 store volatile i32 %result1, i32 addrspace(1)* %gep.out 37 ret void 38 } 39 40 ; GCN-LABEL: {{^}}v_test_i32_64_sub_x: 41 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 42 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]] 43 define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 44 %tid = call i32 @llvm.amdgcn.workitem.id.x() 45 %tid.ext = sext i32 %tid to i64 46 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext 47 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext 48 %x = load i32, i32 addrspace(1)* %gep 49 %result = sub i32 64, %x 50 store i32 %result, i32 addrspace(1)* %gep.out 51 ret void 52 } 53 54 ; GCN-LABEL: {{^}}v_test_i32_x_sub_65: 55 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 56 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]] 57 define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 58 %tid = call i32 @llvm.amdgcn.workitem.id.x() 59 %tid.ext = sext i32 %tid to i64 60 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext 61 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext 62 %x = load i32, i32 addrspace(1)* %gep 63 %result = sub i32 %x, 65 64 store i32 %result, i32 addrspace(1)* %gep.out 65 ret void 66 } 67 68 ; GCN-LABEL: {{^}}v_test_i32_65_sub_x: 69 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 70 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]] 71 define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 72 %tid = call i32 @llvm.amdgcn.workitem.id.x() 73 %tid.ext = sext i32 %tid to i64 74 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext 75 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext 76 %x = load i32, i32 addrspace(1)* %gep 77 %result = sub i32 65, %x 78 store i32 %result, i32 addrspace(1)* %gep.out 79 ret void 80 } 81 82 ; GCN-LABEL: {{^}}v_test_i32_x_sub_neg16: 83 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 84 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 16, [[X]] 85 define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 86 %tid = call i32 @llvm.amdgcn.workitem.id.x() 87 %tid.ext = sext i32 %tid to i64 88 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext 89 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext 90 %x = load i32, i32 addrspace(1)* %gep 91 %result = sub i32 %x, -16 92 store i32 %result, i32 addrspace(1)* %gep.out 93 ret void 94 } 95 96 ; GCN-LABEL: {{^}}v_test_i32_neg16_sub_x: 97 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 98 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, -16, [[X]] 99 define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 100 %tid = call i32 @llvm.amdgcn.workitem.id.x() 101 %tid.ext = sext i32 %tid to i64 102 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext 103 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext 104 %x = load i32, i32 addrspace(1)* %gep 105 %result = sub i32 -16, %x 106 store i32 %result, i32 addrspace(1)* %gep.out 107 ret void 108 } 109 110 ; GCN-LABEL: {{^}}v_test_i32_x_sub_neg17: 111 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 112 ; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 17, [[X]] 113 define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 114 %tid = call i32 @llvm.amdgcn.workitem.id.x() 115 %tid.ext = sext i32 %tid to i64 116 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext 117 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext 118 %x = load i32, i32 addrspace(1)* %gep 119 %result = sub i32 %x, -17 120 store i32 %result, i32 addrspace(1)* %gep.out 121 ret void 122 } 123 124 ; GCN-LABEL: {{^}}v_test_i32_neg17_sub_x: 125 ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] 126 ; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]] 127 define amdgpu_kernel void @v_test_i32_neg17_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 128 %tid = call i32 @llvm.amdgcn.workitem.id.x() 129 %tid.ext = sext i32 %tid to i64 130 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext 131 %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext 132 %x = load i32, i32 addrspace(1)* %gep 133 %result = sub i32 -17, %x 134 store i32 %result, i32 addrspace(1)* %gep.out 135 ret void 136 } 137 138 ; GCN-LABEL: {{^}}s_test_i32_x_sub_64: 139 ; GCN: s_load_dword [[X:s[0-9]+]] 140 ; GCN: s_sub_i32 s{{[0-9]+}}, [[X]], 64 141 define amdgpu_kernel void @s_test_i32_x_sub_64(i32 %x) #0 { 142 %result = sub i32 %x, 64 143 call void asm sideeffect "; use $0", "s"(i32 %result) 144 ret void 145 } 146 147 ; GCN-LABEL: {{^}}v_test_i16_x_sub_64: 148 ; VI: {{buffer|flat}}_load_ushort [[X:v[0-9]+]] 149 ; VI: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[X]] 150 define amdgpu_kernel void @v_test_i16_x_sub_64(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 151 %tid = call i32 @llvm.amdgcn.workitem.id.x() 152 %tid.ext = sext i32 %tid to i64 153 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext 154 %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext 155 %x = load i16, i16 addrspace(1)* %gep 156 %result = sub i16 %x, 64 157 store i16 %result, i16 addrspace(1)* %gep.out 158 ret void 159 } 160 161 ; GCN-LABEL: {{^}}v_test_i16_x_sub_64_multi_use: 162 ; GCN: {{buffer|flat}}_load_ushort [[X:v[0-9]+]] 163 ; GCN: {{buffer|flat}}_load_ushort [[Y:v[0-9]+]] 164 ; VI-DAG: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[X]] 165 ; VI-DAG: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[Y]] 166 167 ; SI-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]] 168 ; SI-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[Y]] 169 define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 170 %tid = call i32 @llvm.amdgcn.workitem.id.x() 171 %tid.ext = sext i32 %tid to i64 172 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext 173 %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext 174 %x = load volatile i16, i16 addrspace(1)* %gep 175 %y = load volatile i16, i16 addrspace(1)* %gep 176 %result0 = sub i16 %x, 64 177 %result1 = sub i16 %y, 64 178 store volatile i16 %result0, i16 addrspace(1)* %gep.out 179 store volatile i16 %result1, i16 addrspace(1)* %gep.out 180 ret void 181 } 182 183 declare i32 @llvm.amdgcn.workitem.id.x() #1 184 185 attributes #0 = { nounwind } 186 attributes #1 = { nounwind readnone } 187