1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s 3 4 declare i32 @llvm.r600.read.tidig.x() #0 5 declare i32 @llvm.r600.read.tidig.y() #0 6 7 ; In this test both the pointer and the offset operands to the 8 ; BUFFER_LOAD instructions end up being stored in vgprs. This 9 ; requires us to add the pointer and offset together, store the 10 ; result in the offset operand (vaddr), and then store 0 in an 11 ; sgpr register pair and use that for the pointer operand 12 ; (low 64-bits of srsrc). 13 14 ; GCN-LABEL: {{^}}mubuf: 15 16 ; Make sure we aren't using VGPRs for the source operand of s_mov_b64 17 ; GCN-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v 18 19 ; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_* 20 ; instructions 21 ; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 22 ; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 23 24 define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { 25 entry: 26 %tmp = call i32 @llvm.r600.read.tidig.x() 27 %tmp1 = call i32 @llvm.r600.read.tidig.y() 28 %tmp2 = sext i32 %tmp to i64 29 %tmp3 = sext i32 %tmp1 to i64 30 br label %loop 31 32 loop: ; preds = %loop, %entry 33 %tmp4 = phi i64 [ 0, %entry ], [ %tmp5, %loop ] 34 %tmp5 = add i64 %tmp2, %tmp4 35 %tmp6 = getelementptr i8, i8 addrspace(1)* %in, i64 %tmp5 36 %tmp7 = load i8, i8 addrspace(1)* %tmp6, align 1 37 %tmp8 = or i64 %tmp5, 1 38 %tmp9 = getelementptr i8, i8 addrspace(1)* %in, i64 %tmp8 39 %tmp10 = load i8, i8 addrspace(1)* %tmp9, align 1 40 %tmp11 = add i8 %tmp7, %tmp10 41 %tmp12 = sext i8 %tmp11 to i32 42 store i32 %tmp12, i32 addrspace(1)* %out 43 %tmp13 = icmp slt i64 %tmp5, 10 44 br i1 %tmp13, label %loop, label %done 45 46 done: ; preds = %loop 47 ret void 48 } 49 50 ; Test moving an SMRD instruction to the VALU 51 52 ; GCN-LABEL: {{^}}smrd_valu: 53 ; GCN: buffer_load_dword [[OUT:v[0-9]+]] 54 ; GCN: buffer_store_dword [[OUT]] 55 define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { 56 entry: 57 %tmp = icmp ne i32 %a, 0 58 br i1 %tmp, label %if, label %else 59 60 if: ; preds = %entry 61 %tmp1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in 62 br label %endif 63 64 else: ; preds = %entry 65 %tmp2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in 66 %tmp3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %tmp2 67 br label %endif 68 69 endif: ; preds = %else, %if 70 %tmp4 = phi i32 addrspace(2)* [ %tmp1, %if ], [ %tmp3, %else ] 71 %tmp5 = getelementptr i32, i32 addrspace(2)* %tmp4, i32 3000 72 %tmp6 = load i32, i32 addrspace(2)* %tmp5 73 store i32 %tmp6, i32 addrspace(1)* %out 74 ret void 75 } 76 77 ; Test moving an SMRD with an immediate offset to the VALU 78 79 ; GCN-LABEL: {{^}}smrd_valu2: 80 ; GCN-NOT: v_add 81 ; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}} 82 define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 { 83 entry: 84 %tmp = call i32 @llvm.r600.read.tidig.x() #0 85 %tmp1 = add i32 %tmp, 4 86 %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4 87 %tmp3 = load i32, i32 addrspace(2)* %tmp2 88 store i32 %tmp3, i32 addrspace(1)* %out 89 ret void 90 } 91 92 ; Use a big offset that will use the SMRD literal offset on CI 93 ; GCN-LABEL: {{^}}smrd_valu_ci_offset: 94 ; GCN-NOT: v_add 95 ; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4e20{{$}} 96 ; GCN-NOT: v_add 97 ; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} 98 ; GCN: v_add_i32_e32 99 ; GCN: buffer_store_dword 100 define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 { 101 entry: 102 %tmp = call i32 @llvm.r600.read.tidig.x() #0 103 %tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp 104 %tmp3 = getelementptr i32, i32 addrspace(2)* %tmp2, i32 5000 105 %tmp4 = load i32, i32 addrspace(2)* %tmp3 106 %tmp5 = add i32 %tmp4, %c 107 store i32 %tmp5, i32 addrspace(1)* %out 108 ret void 109 } 110 111 ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x2: 112 ; GCN-NOT: v_add 113 ; GCN: s_mov_b32 [[OFFSET:s[0-9]+]], 0x9c40{{$}} 114 ; GCN-NOT: v_add 115 ; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} 116 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 117 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 118 ; GCN: buffer_store_dwordx2 119 define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 { 120 entry: 121 %tmp = call i32 @llvm.r600.read.tidig.x() #0 122 %tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp 123 %tmp3 = getelementptr i64, i64 addrspace(2)* %tmp2, i32 5000 124 %tmp4 = load i64, i64 addrspace(2)* %tmp3 125 %tmp5 = or i64 %tmp4, %c 126 store i64 %tmp5, i64 addrspace(1)* %out 127 ret void 128 } 129 130 ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x4: 131 ; GCN-NOT: v_add 132 ; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4d20{{$}} 133 ; GCN-NOT: v_add 134 ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} 135 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 136 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 137 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 138 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 139 ; GCN: buffer_store_dwordx4 140 define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 { 141 entry: 142 %tmp = call i32 @llvm.r600.read.tidig.x() #0 143 %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp 144 %tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %tmp2, i32 1234 145 %tmp4 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp3 146 %tmp5 = or <4 x i32> %tmp4, %c 147 store <4 x i32> %tmp5, <4 x i32> addrspace(1)* %out 148 ret void 149 } 150 151 ; Original scalar load uses SGPR offset on SI and 32-bit literal on 152 ; CI. 153 154 ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8: 155 ; GCN-NOT: v_add 156 ; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} 157 ; GCN-NOT: v_add 158 ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} 159 ; GCN-NOT: v_add 160 ; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} 161 ; GCN-NOT: v_add 162 ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} 163 164 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 165 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 166 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 167 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 168 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 169 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 170 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 171 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 172 ; GCN: buffer_store_dwordx4 173 ; GCN: buffer_store_dwordx4 174 define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 { 175 entry: 176 %tmp = call i32 @llvm.r600.read.tidig.x() #0 177 %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp 178 %tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %tmp2, i32 1234 179 %tmp4 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp3 180 %tmp5 = or <8 x i32> %tmp4, %c 181 store <8 x i32> %tmp5, <8 x i32> addrspace(1)* %out 182 ret void 183 } 184 185 ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16: 186 187 ; GCN-NOT: v_add 188 ; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} 189 ; GCN-NOT: v_add 190 ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} 191 ; GCN-NOT: v_add 192 ; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}} 193 ; GCN-NOT: v_add 194 ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} 195 ; GCN-NOT: v_add 196 ; GCN: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}} 197 ; GCN-NOT: v_add 198 ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}} 199 ; GCN-NOT: v_add 200 ; GCN: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}} 201 ; GCN-NOT: v_add 202 ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}} 203 204 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 205 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 206 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 207 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 208 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 209 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 210 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 211 ; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} 212 ; GCN: buffer_store_dwordx4 213 ; GCN: buffer_store_dwordx4 214 ; GCN: buffer_store_dwordx4 215 ; GCN: buffer_store_dwordx4 216 217 ; GCN: s_endpgm 218 define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 { 219 entry: 220 %tmp = call i32 @llvm.r600.read.tidig.x() #0 221 %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp 222 %tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %tmp2, i32 1234 223 %tmp4 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp3 224 %tmp5 = or <16 x i32> %tmp4, %c 225 store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out 226 ret void 227 } 228 229 ; GCN-LABEL: {{^}}smrd_valu2_salu_user: 230 ; GCN: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 231 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]] 232 ; GCN: buffer_store_dword [[ADD]] 233 define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 { 234 entry: 235 %tmp = call i32 @llvm.r600.read.tidig.x() #0 236 %tmp1 = add i32 %tmp, 4 237 %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4 238 %tmp3 = load i32, i32 addrspace(2)* %tmp2 239 %tmp4 = add i32 %tmp3, %a 240 store i32 %tmp4, i32 addrspace(1)* %out 241 ret void 242 } 243 244 ; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset: 245 ; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}} 246 define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { 247 entry: 248 %tmp = call i32 @llvm.r600.read.tidig.x() #0 249 %tmp1 = add i32 %tmp, 4 250 %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 255 251 %tmp3 = load i32, i32 addrspace(2)* %tmp2 252 store i32 %tmp3, i32 addrspace(1)* %out 253 ret void 254 } 255 256 ; GCN-LABEL: {{^}}smrd_valu2_mubuf_offset: 257 ; GCN-NOT: v_add 258 ; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}} 259 define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { 260 entry: 261 %tmp = call i32 @llvm.r600.read.tidig.x() #0 262 %tmp1 = add i32 %tmp, 4 263 %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 256 264 %tmp3 = load i32, i32 addrspace(2)* %tmp2 265 store i32 %tmp3, i32 addrspace(1)* %out 266 ret void 267 } 268 269 ; GCN-LABEL: {{^}}s_load_imm_v8i32: 270 ; GCN: buffer_load_dwordx4 271 ; GCN: buffer_load_dwordx4 272 define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { 273 entry: 274 %tmp0 = tail call i32 @llvm.r600.read.tidig.x() 275 %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 276 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)* 277 %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4 278 store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32 279 ret void 280 } 281 282 ; GCN-LABEL: {{^}}s_load_imm_v8i32_salu_user: 283 ; GCN: buffer_load_dwordx4 284 ; GCN: buffer_load_dwordx4 285 ; GCN: v_add_i32_e32 286 ; GCN: v_add_i32_e32 287 ; GCN: v_add_i32_e32 288 ; GCN: v_add_i32_e32 289 ; GCN: v_add_i32_e32 290 ; GCN: v_add_i32_e32 291 ; GCN: v_add_i32_e32 292 ; GCN: buffer_store_dword 293 define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { 294 entry: 295 %tmp0 = tail call i32 @llvm.r600.read.tidig.x() 296 %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 297 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)* 298 %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4 299 300 %elt0 = extractelement <8 x i32> %tmp3, i32 0 301 %elt1 = extractelement <8 x i32> %tmp3, i32 1 302 %elt2 = extractelement <8 x i32> %tmp3, i32 2 303 %elt3 = extractelement <8 x i32> %tmp3, i32 3 304 %elt4 = extractelement <8 x i32> %tmp3, i32 4 305 %elt5 = extractelement <8 x i32> %tmp3, i32 5 306 %elt6 = extractelement <8 x i32> %tmp3, i32 6 307 %elt7 = extractelement <8 x i32> %tmp3, i32 7 308 309 %add0 = add i32 %elt0, %elt1 310 %add1 = add i32 %add0, %elt2 311 %add2 = add i32 %add1, %elt3 312 %add3 = add i32 %add2, %elt4 313 %add4 = add i32 %add3, %elt5 314 %add5 = add i32 %add4, %elt6 315 %add6 = add i32 %add5, %elt7 316 317 store i32 %add6, i32 addrspace(1)* %out 318 ret void 319 } 320 321 ; GCN-LABEL: {{^}}s_load_imm_v16i32: 322 ; GCN: buffer_load_dwordx4 323 ; GCN: buffer_load_dwordx4 324 ; GCN: buffer_load_dwordx4 325 ; GCN: buffer_load_dwordx4 326 define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { 327 entry: 328 %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 329 %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 330 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)* 331 %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4 332 store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32 333 ret void 334 } 335 336 ; GCN-LABEL: {{^}}s_load_imm_v16i32_salu_user: 337 ; GCN: buffer_load_dwordx4 338 ; GCN: buffer_load_dwordx4 339 ; GCN: buffer_load_dwordx4 340 ; GCN: buffer_load_dwordx4 341 ; GCN: v_add_i32_e32 342 ; GCN: v_add_i32_e32 343 ; GCN: v_add_i32_e32 344 ; GCN: v_add_i32_e32 345 ; GCN: v_add_i32_e32 346 ; GCN: v_add_i32_e32 347 ; GCN: v_add_i32_e32 348 ; GCN: v_add_i32_e32 349 ; GCN: v_add_i32_e32 350 ; GCN: v_add_i32_e32 351 ; GCN: v_add_i32_e32 352 ; GCN: v_add_i32_e32 353 ; GCN: v_add_i32_e32 354 ; GCN: v_add_i32_e32 355 ; GCN: v_add_i32_e32 356 ; GCN: buffer_store_dword 357 define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { 358 entry: 359 %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 360 %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 361 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)* 362 %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4 363 364 %elt0 = extractelement <16 x i32> %tmp3, i32 0 365 %elt1 = extractelement <16 x i32> %tmp3, i32 1 366 %elt2 = extractelement <16 x i32> %tmp3, i32 2 367 %elt3 = extractelement <16 x i32> %tmp3, i32 3 368 %elt4 = extractelement <16 x i32> %tmp3, i32 4 369 %elt5 = extractelement <16 x i32> %tmp3, i32 5 370 %elt6 = extractelement <16 x i32> %tmp3, i32 6 371 %elt7 = extractelement <16 x i32> %tmp3, i32 7 372 %elt8 = extractelement <16 x i32> %tmp3, i32 8 373 %elt9 = extractelement <16 x i32> %tmp3, i32 9 374 %elt10 = extractelement <16 x i32> %tmp3, i32 10 375 %elt11 = extractelement <16 x i32> %tmp3, i32 11 376 %elt12 = extractelement <16 x i32> %tmp3, i32 12 377 %elt13 = extractelement <16 x i32> %tmp3, i32 13 378 %elt14 = extractelement <16 x i32> %tmp3, i32 14 379 %elt15 = extractelement <16 x i32> %tmp3, i32 15 380 381 %add0 = add i32 %elt0, %elt1 382 %add1 = add i32 %add0, %elt2 383 %add2 = add i32 %add1, %elt3 384 %add3 = add i32 %add2, %elt4 385 %add4 = add i32 %add3, %elt5 386 %add5 = add i32 %add4, %elt6 387 %add6 = add i32 %add5, %elt7 388 %add7 = add i32 %add6, %elt8 389 %add8 = add i32 %add7, %elt9 390 %add9 = add i32 %add8, %elt10 391 %add10 = add i32 %add9, %elt11 392 %add11 = add i32 %add10, %elt12 393 %add12 = add i32 %add11, %elt13 394 %add13 = add i32 %add12, %elt14 395 %add14 = add i32 %add13, %elt15 396 397 store i32 %add14, i32 addrspace(1)* %out 398 ret void 399 } 400 401 attributes #0 = { nounwind readnone } 402 attributes #1 = { nounwind } 403