1 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s 2 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s 3 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s 4 ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 5 ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s 6 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 7 8 declare i32 @llvm.r600.read.tidig.x() #0 9 10 ; OPT-LABEL: @test_sink_global_small_offset_i32( 11 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in 12 ; OPT-VI: getelementptr i32, i32 addrspace(1)* %in 13 ; OPT: br i1 14 ; OPT-CI: ptrtoint 15 16 ; GCN-LABEL: {{^}}test_sink_global_small_offset_i32: 17 ; GCN: {{^}}BB0_2: 18 define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) { 19 entry: 20 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 21 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7 22 %tmp0 = icmp eq i32 %cond, 0 23 br i1 %tmp0, label %endif, label %if 24 25 if: 26 %tmp1 = load i32, i32 addrspace(1)* %in.gep 27 br label %endif 28 29 endif: 30 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 31 store i32 %x, i32 addrspace(1)* %out.gep 32 br label %done 33 34 done: 35 ret void 36 } 37 38 ; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset( 39 ; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 40 ; OPT: br i1 41 42 ; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset: 43 ; GCN: s_and_saveexec_b64 44 ; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} 45 ; GCN: {{^}}BB1_2: 46 ; GCN: s_or_b64 exec 47 define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) { 48 entry: 49 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 50 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 51 %tmp0 = icmp eq i32 %cond, 0 52 br i1 %tmp0, label %endif, label %if 53 54 if: 55 %tmp1 = load i8, i8 addrspace(1)* %in.gep 56 %tmp2 = sext i8 %tmp1 to i32 57 br label %endif 58 59 endif: 60 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] 61 store i32 %x, i32 addrspace(1)* %out.gep 62 br label %done 63 64 done: 65 ret void 66 } 67 68 ; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset: 69 ; GCN: s_and_saveexec_b64 70 ; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}} 71 ; GCN: {{^}}BB2_2: 72 ; GCN: s_or_b64 exec 73 define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) { 74 entry: 75 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 76 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095 77 %tmp0 = icmp eq i32 %cond, 0 78 br i1 %tmp0, label %endif, label %if 79 80 if: 81 %tmp1 = load i8, i8 addrspace(1)* %in.gep 82 %tmp2 = sext i8 %tmp1 to i32 83 br label %endif 84 85 endif: 86 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] 87 store i32 %x, i32 addrspace(1)* %out.gep 88 br label %done 89 90 done: 91 ret void 92 } 93 94 ; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset: 95 ; GCN: s_and_saveexec_b64 96 ; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} 97 ; GCN: {{^}}BB3_2: 98 ; GCN: s_or_b64 exec 99 define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in, i32 %cond) { 100 entry: 101 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 102 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096 103 %tmp0 = icmp eq i32 %cond, 0 104 br i1 %tmp0, label %endif, label %if 105 106 if: 107 %tmp1 = load i8, i8 addrspace(1)* %in.gep 108 %tmp2 = sext i8 %tmp1 to i32 109 br label %endif 110 111 endif: 112 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] 113 store i32 %x, i32 addrspace(1)* %out.gep 114 br label %done 115 116 done: 117 ret void 118 } 119 120 ; OPT-LABEL: @test_sink_scratch_small_offset_i32( 121 ; OPT-NOT: getelementptr [512 x i32] 122 ; OPT: br i1 123 ; OPT: ptrtoint 124 125 ; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32: 126 ; GCN: s_and_saveexec_b64 127 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} 128 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} 129 ; GCN: {{^}}BB4_2: 130 define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) { 131 entry: 132 %alloca = alloca [512 x i32], align 4 133 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 134 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 135 %add.arg = add i32 %arg, 8 136 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023 137 %tmp0 = icmp eq i32 %cond, 0 138 br i1 %tmp0, label %endif, label %if 139 140 if: 141 store volatile i32 123, i32* %alloca.gep 142 %tmp1 = load volatile i32, i32* %alloca.gep 143 br label %endif 144 145 endif: 146 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 147 store i32 %x, i32 addrspace(1)* %out.gep.0 148 %load = load volatile i32, i32* %alloca.gep 149 store i32 %load, i32 addrspace(1)* %out.gep.1 150 br label %done 151 152 done: 153 ret void 154 } 155 156 ; OPT-LABEL: @test_no_sink_scratch_large_offset_i32( 157 ; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024 158 ; OPT: br i1 159 ; OPT-NOT: ptrtoint 160 161 ; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32: 162 ; GCN: s_and_saveexec_b64 163 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} 164 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} 165 ; GCN: {{^}}BB5_2: 166 define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) { 167 entry: 168 %alloca = alloca [512 x i32], align 4 169 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 170 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 171 %add.arg = add i32 %arg, 8 172 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024 173 %tmp0 = icmp eq i32 %cond, 0 174 br i1 %tmp0, label %endif, label %if 175 176 if: 177 store volatile i32 123, i32* %alloca.gep 178 %tmp1 = load volatile i32, i32* %alloca.gep 179 br label %endif 180 181 endif: 182 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 183 store i32 %x, i32 addrspace(1)* %out.gep.0 184 %load = load volatile i32, i32* %alloca.gep 185 store i32 %load, i32 addrspace(1)* %out.gep.1 186 br label %done 187 188 done: 189 ret void 190 } 191 192 ; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32: 193 ; VI-DAG: s_movk_i32 flat_scratch_lo, 0x0 194 ; VI-DAG: s_movk_i32 flat_scratch_hi, 0x0 195 ; GCN: s_and_saveexec_b64 196 ; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 197 ; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] 198 ; GCN: {{^}}BB6_2: 199 define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) { 200 entry: 201 %offset.ext = zext i32 %offset to i64 202 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 203 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext 204 %tmp0 = icmp eq i32 %cond, 0 205 br i1 %tmp0, label %endif, label %if 206 207 if: 208 %tmp1 = load i32, i32 addrspace(1)* %in.gep 209 br label %endif 210 211 endif: 212 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 213 store i32 %x, i32 addrspace(1)* %out.gep 214 br label %done 215 216 done: 217 ret void 218 } 219 220 attributes #0 = { nounwind readnone } 221 attributes #1 = { nounwind } 222 223 224 225 ; OPT-LABEL: @test_sink_constant_small_offset_i32 226 ; OPT-NOT: getelementptr i32, i32 addrspace(2)* 227 ; OPT: br i1 228 229 ; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32: 230 ; GCN: s_and_saveexec_b64 231 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}} 232 ; GCN: s_or_b64 exec, exec 233 define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 234 entry: 235 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 236 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7 237 %tmp0 = icmp eq i32 %cond, 0 238 br i1 %tmp0, label %endif, label %if 239 240 if: 241 %tmp1 = load i32, i32 addrspace(2)* %in.gep 242 br label %endif 243 244 endif: 245 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 246 store i32 %x, i32 addrspace(1)* %out.gep 247 br label %done 248 249 done: 250 ret void 251 } 252 253 ; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32 254 ; OPT-NOT: getelementptr i32, i32 addrspace(2)* 255 ; OPT: br i1 256 257 ; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32: 258 ; GCN: s_and_saveexec_b64 259 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}} 260 ; GCN: s_or_b64 exec, exec 261 define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 262 entry: 263 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 264 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255 265 %tmp0 = icmp eq i32 %cond, 0 266 br i1 %tmp0, label %endif, label %if 267 268 if: 269 %tmp1 = load i32, i32 addrspace(2)* %in.gep 270 br label %endif 271 272 endif: 273 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 274 store i32 %x, i32 addrspace(1)* %out.gep 275 br label %done 276 277 done: 278 ret void 279 } 280 281 ; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32 282 ; OPT-SI: getelementptr i32, i32 addrspace(2)* 283 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* 284 ; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)* 285 ; OPT: br i1 286 287 ; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32: 288 ; GCN: s_and_saveexec_b64 289 ; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400 290 291 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 292 ; GCN: s_or_b64 exec, exec 293 define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 294 entry: 295 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 296 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256 297 %tmp0 = icmp eq i32 %cond, 0 298 br i1 %tmp0, label %endif, label %if 299 300 if: 301 %tmp1 = load i32, i32 addrspace(2)* %in.gep 302 br label %endif 303 304 endif: 305 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 306 store i32 %x, i32 addrspace(1)* %out.gep 307 br label %done 308 309 done: 310 ret void 311 } 312 313 ; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32 314 ; OPT-SI: getelementptr i32, i32 addrspace(2)* 315 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* 316 ; OPT: br i1 317 318 ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32: 319 ; GCN: s_and_saveexec_b64 320 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} 321 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} 322 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 323 ; GCN: s_or_b64 exec, exec 324 define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 325 entry: 326 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 327 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295 328 %tmp0 = icmp eq i32 %cond, 0 329 br i1 %tmp0, label %endif, label %if 330 331 if: 332 %tmp1 = load i32, i32 addrspace(2)* %in.gep 333 br label %endif 334 335 endif: 336 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 337 store i32 %x, i32 addrspace(1)* %out.gep 338 br label %done 339 340 done: 341 ret void 342 } 343 344 ; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32 345 ; OPT: getelementptr i32, i32 addrspace(2)* 346 ; OPT: br i1 347 348 ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32: 349 ; GCN: s_and_saveexec_b64 350 ; GCN: s_add_u32 351 ; GCN: s_addc_u32 352 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 353 ; GCN: s_or_b64 exec, exec 354 define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 355 entry: 356 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 357 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181 358 %tmp0 = icmp eq i32 %cond, 0 359 br i1 %tmp0, label %endif, label %if 360 361 if: 362 %tmp1 = load i32, i32 addrspace(2)* %in.gep 363 br label %endif 364 365 endif: 366 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 367 store i32 %x, i32 addrspace(1)* %out.gep 368 br label %done 369 370 done: 371 ret void 372 } 373 374 ; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32: 375 ; GCN: s_and_saveexec_b64 376 ; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}} 377 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 378 379 ; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}} 380 ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}} 381 382 ; GCN: s_or_b64 exec, exec 383 define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 384 entry: 385 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 386 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143 387 %tmp0 = icmp eq i32 %cond, 0 388 br i1 %tmp0, label %endif, label %if 389 390 if: 391 %tmp1 = load i32, i32 addrspace(2)* %in.gep 392 br label %endif 393 394 endif: 395 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 396 store i32 %x, i32 addrspace(1)* %out.gep 397 br label %done 398 399 done: 400 ret void 401 } 402 403 ; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32 404 ; OPT-SI: getelementptr i32, i32 addrspace(2)* 405 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* 406 ; OPT-VI: getelementptr i32, i32 addrspace(2)* 407 ; OPT: br i1 408 409 ; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32: 410 ; GCN: s_and_saveexec_b64 411 ; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} 412 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 413 414 ; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}} 415 416 ; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} 417 ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 418 419 ; GCN: s_or_b64 exec, exec 420 define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { 421 entry: 422 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 423 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144 424 %tmp0 = icmp eq i32 %cond, 0 425 br i1 %tmp0, label %endif, label %if 426 427 if: 428 %tmp1 = load i32, i32 addrspace(2)* %in.gep 429 br label %endif 430 431 endif: 432 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 433 store i32 %x, i32 addrspace(1)* %out.gep 434 br label %done 435 436 done: 437 ret void 438 } 439