1 /* 2 * Copyright 2010 Jerome Glisse <glisse (at) freedesktop.org> 3 * Copyright 2014 Marek Olk <marek.olsak (at) amd.com> 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25 #include "r600_query.h" 26 #include "r600_cs.h" 27 #include "util/u_memory.h" 28 #include "util/u_upload_mgr.h" 29 30 #include "tgsi/tgsi_text.h" 31 32 struct r600_hw_query_params { 33 unsigned start_offset; 34 unsigned end_offset; 35 unsigned fence_offset; 36 unsigned pair_stride; 37 unsigned pair_count; 38 }; 39 40 /* Queries without buffer handling or suspend/resume. */ 41 struct r600_query_sw { 42 struct r600_query b; 43 44 uint64_t begin_result; 45 uint64_t end_result; 46 /* Fence for GPU_FINISHED. */ 47 struct pipe_fence_handle *fence; 48 }; 49 50 static void r600_query_sw_destroy(struct r600_common_context *rctx, 51 struct r600_query *rquery) 52 { 53 struct pipe_screen *screen = rctx->b.screen; 54 struct r600_query_sw *query = (struct r600_query_sw *)rquery; 55 56 screen->fence_reference(screen, &query->fence, NULL); 57 FREE(query); 58 } 59 60 static enum radeon_value_id winsys_id_from_type(unsigned type) 61 { 62 switch (type) { 63 case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY; 64 case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY; 65 case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM; 66 case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT; 67 case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS; 68 case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS; 69 case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS; 70 case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED; 71 case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS; 72 case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE; 73 case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE; 74 case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE; 75 case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK; 76 case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK; 77 default: unreachable("query type does not correspond to winsys id"); 78 } 79 } 80 81 static bool r600_query_sw_begin(struct r600_common_context *rctx, 82 struct r600_query *rquery) 83 { 84 struct r600_query_sw *query = (struct r600_query_sw *)rquery; 85 86 switch(query->b.type) { 87 case PIPE_QUERY_TIMESTAMP_DISJOINT: 88 case PIPE_QUERY_GPU_FINISHED: 89 break; 90 case R600_QUERY_DRAW_CALLS: 91 query->begin_result = rctx->num_draw_calls; 92 break; 93 case R600_QUERY_SPILL_DRAW_CALLS: 94 query->begin_result = rctx->num_spill_draw_calls; 95 break; 96 case R600_QUERY_COMPUTE_CALLS: 97 query->begin_result = rctx->num_compute_calls; 98 break; 99 case R600_QUERY_SPILL_COMPUTE_CALLS: 100 query->begin_result = rctx->num_spill_compute_calls; 101 break; 102 case R600_QUERY_DMA_CALLS: 103 query->begin_result = rctx->num_dma_calls; 104 break; 105 case R600_QUERY_CP_DMA_CALLS: 106 query->begin_result = rctx->num_cp_dma_calls; 107 break; 108 case R600_QUERY_NUM_VS_FLUSHES: 109 query->begin_result = rctx->num_vs_flushes; 110 break; 111 case R600_QUERY_NUM_PS_FLUSHES: 112 query->begin_result = rctx->num_ps_flushes; 113 break; 114 case R600_QUERY_NUM_CS_FLUSHES: 115 query->begin_result = rctx->num_cs_flushes; 116 break; 117 case R600_QUERY_NUM_FB_CACHE_FLUSHES: 118 query->begin_result = rctx->num_fb_cache_flushes; 119 break; 120 case R600_QUERY_NUM_L2_INVALIDATES: 121 query->begin_result = rctx->num_L2_invalidates; 122 break; 123 case R600_QUERY_NUM_L2_WRITEBACKS: 124 query->begin_result = rctx->num_L2_writebacks; 125 break; 126 case R600_QUERY_REQUESTED_VRAM: 127 case R600_QUERY_REQUESTED_GTT: 128 case R600_QUERY_MAPPED_VRAM: 129 case R600_QUERY_MAPPED_GTT: 130 case R600_QUERY_VRAM_USAGE: 131 case R600_QUERY_GTT_USAGE: 132 case R600_QUERY_GPU_TEMPERATURE: 133 case R600_QUERY_CURRENT_GPU_SCLK: 134 case R600_QUERY_CURRENT_GPU_MCLK: 135 case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO: 136 query->begin_result = 0; 137 break; 138 case R600_QUERY_BUFFER_WAIT_TIME: 139 case R600_QUERY_NUM_GFX_IBS: 140 case R600_QUERY_NUM_SDMA_IBS: 141 case R600_QUERY_NUM_BYTES_MOVED: 142 case R600_QUERY_NUM_EVICTIONS: { 143 enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); 144 query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); 145 break; 146 } 147 case R600_QUERY_GPU_LOAD: 148 query->begin_result = r600_begin_counter_gui(rctx->screen); 149 break; 150 case R600_QUERY_GPU_SHADERS_BUSY: 151 query->begin_result = r600_begin_counter_spi(rctx->screen); 152 break; 153 case R600_QUERY_NUM_COMPILATIONS: 154 query->begin_result = p_atomic_read(&rctx->screen->num_compilations); 155 break; 156 case R600_QUERY_NUM_SHADERS_CREATED: 157 query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); 158 break; 159 case R600_QUERY_NUM_SHADER_CACHE_HITS: 160 query->begin_result = 161 p_atomic_read(&rctx->screen->num_shader_cache_hits); 162 break; 163 case R600_QUERY_GPIN_ASIC_ID: 164 case R600_QUERY_GPIN_NUM_SIMD: 165 case R600_QUERY_GPIN_NUM_RB: 166 case R600_QUERY_GPIN_NUM_SPI: 167 case R600_QUERY_GPIN_NUM_SE: 168 break; 169 default: 170 unreachable("r600_query_sw_begin: bad query type"); 171 } 172 173 return true; 174 } 175 176 static bool r600_query_sw_end(struct r600_common_context *rctx, 177 struct r600_query *rquery) 178 { 179 struct r600_query_sw *query = (struct r600_query_sw *)rquery; 180 181 switch(query->b.type) { 182 case PIPE_QUERY_TIMESTAMP_DISJOINT: 183 break; 184 case PIPE_QUERY_GPU_FINISHED: 185 rctx->b.flush(&rctx->b, &query->fence, PIPE_FLUSH_DEFERRED); 186 break; 187 case R600_QUERY_DRAW_CALLS: 188 query->end_result = rctx->num_draw_calls; 189 break; 190 case R600_QUERY_SPILL_DRAW_CALLS: 191 query->end_result = rctx->num_spill_draw_calls; 192 break; 193 case R600_QUERY_COMPUTE_CALLS: 194 query->end_result = rctx->num_compute_calls; 195 break; 196 case R600_QUERY_SPILL_COMPUTE_CALLS: 197 query->end_result = rctx->num_spill_compute_calls; 198 break; 199 case R600_QUERY_DMA_CALLS: 200 query->end_result = rctx->num_dma_calls; 201 break; 202 case R600_QUERY_CP_DMA_CALLS: 203 query->end_result = rctx->num_cp_dma_calls; 204 break; 205 case R600_QUERY_NUM_VS_FLUSHES: 206 query->end_result = rctx->num_vs_flushes; 207 break; 208 case R600_QUERY_NUM_PS_FLUSHES: 209 query->end_result = rctx->num_ps_flushes; 210 break; 211 case R600_QUERY_NUM_CS_FLUSHES: 212 query->end_result = rctx->num_cs_flushes; 213 break; 214 case R600_QUERY_NUM_FB_CACHE_FLUSHES: 215 query->end_result = rctx->num_fb_cache_flushes; 216 break; 217 case R600_QUERY_NUM_L2_INVALIDATES: 218 query->end_result = rctx->num_L2_invalidates; 219 break; 220 case R600_QUERY_NUM_L2_WRITEBACKS: 221 query->end_result = rctx->num_L2_writebacks; 222 break; 223 case R600_QUERY_REQUESTED_VRAM: 224 case R600_QUERY_REQUESTED_GTT: 225 case R600_QUERY_MAPPED_VRAM: 226 case R600_QUERY_MAPPED_GTT: 227 case R600_QUERY_VRAM_USAGE: 228 case R600_QUERY_GTT_USAGE: 229 case R600_QUERY_GPU_TEMPERATURE: 230 case R600_QUERY_CURRENT_GPU_SCLK: 231 case R600_QUERY_CURRENT_GPU_MCLK: 232 case R600_QUERY_BUFFER_WAIT_TIME: 233 case R600_QUERY_NUM_GFX_IBS: 234 case R600_QUERY_NUM_SDMA_IBS: 235 case R600_QUERY_NUM_BYTES_MOVED: 236 case R600_QUERY_NUM_EVICTIONS: { 237 enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); 238 query->end_result = rctx->ws->query_value(rctx->ws, ws_id); 239 break; 240 } 241 case R600_QUERY_GPU_LOAD: 242 query->end_result = r600_end_counter_gui(rctx->screen, 243 query->begin_result); 244 query->begin_result = 0; 245 break; 246 case R600_QUERY_GPU_SHADERS_BUSY: 247 query->end_result = r600_end_counter_spi(rctx->screen, 248 query->begin_result); 249 query->begin_result = 0; 250 break; 251 case R600_QUERY_NUM_COMPILATIONS: 252 query->end_result = p_atomic_read(&rctx->screen->num_compilations); 253 break; 254 case R600_QUERY_NUM_SHADERS_CREATED: 255 query->end_result = p_atomic_read(&rctx->screen->num_shaders_created); 256 break; 257 case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO: 258 query->end_result = rctx->last_tex_ps_draw_ratio; 259 break; 260 case R600_QUERY_NUM_SHADER_CACHE_HITS: 261 query->end_result = 262 p_atomic_read(&rctx->screen->num_shader_cache_hits); 263 break; 264 case R600_QUERY_GPIN_ASIC_ID: 265 case R600_QUERY_GPIN_NUM_SIMD: 266 case R600_QUERY_GPIN_NUM_RB: 267 case R600_QUERY_GPIN_NUM_SPI: 268 case R600_QUERY_GPIN_NUM_SE: 269 break; 270 default: 271 unreachable("r600_query_sw_end: bad query type"); 272 } 273 274 return true; 275 } 276 277 static bool r600_query_sw_get_result(struct r600_common_context *rctx, 278 struct r600_query *rquery, 279 bool wait, 280 union pipe_query_result *result) 281 { 282 struct r600_query_sw *query = (struct r600_query_sw *)rquery; 283 284 switch (query->b.type) { 285 case PIPE_QUERY_TIMESTAMP_DISJOINT: 286 /* Convert from cycles per millisecond to cycles per second (Hz). */ 287 result->timestamp_disjoint.frequency = 288 (uint64_t)rctx->screen->info.clock_crystal_freq * 1000; 289 result->timestamp_disjoint.disjoint = false; 290 return true; 291 case PIPE_QUERY_GPU_FINISHED: { 292 struct pipe_screen *screen = rctx->b.screen; 293 result->b = screen->fence_finish(screen, &rctx->b, query->fence, 294 wait ? PIPE_TIMEOUT_INFINITE : 0); 295 return result->b; 296 } 297 298 case R600_QUERY_GPIN_ASIC_ID: 299 result->u32 = 0; 300 return true; 301 case R600_QUERY_GPIN_NUM_SIMD: 302 result->u32 = rctx->screen->info.num_good_compute_units; 303 return true; 304 case R600_QUERY_GPIN_NUM_RB: 305 result->u32 = rctx->screen->info.num_render_backends; 306 return true; 307 case R600_QUERY_GPIN_NUM_SPI: 308 result->u32 = 1; /* all supported chips have one SPI per SE */ 309 return true; 310 case R600_QUERY_GPIN_NUM_SE: 311 result->u32 = rctx->screen->info.max_se; 312 return true; 313 } 314 315 result->u64 = query->end_result - query->begin_result; 316 317 switch (query->b.type) { 318 case R600_QUERY_BUFFER_WAIT_TIME: 319 case R600_QUERY_GPU_TEMPERATURE: 320 result->u64 /= 1000; 321 break; 322 case R600_QUERY_CURRENT_GPU_SCLK: 323 case R600_QUERY_CURRENT_GPU_MCLK: 324 result->u64 *= 1000000; 325 break; 326 } 327 328 return true; 329 } 330 331 332 static struct r600_query_ops sw_query_ops = { 333 .destroy = r600_query_sw_destroy, 334 .begin = r600_query_sw_begin, 335 .end = r600_query_sw_end, 336 .get_result = r600_query_sw_get_result, 337 .get_result_resource = NULL 338 }; 339 340 static struct pipe_query *r600_query_sw_create(struct pipe_context *ctx, 341 unsigned query_type) 342 { 343 struct r600_query_sw *query; 344 345 query = CALLOC_STRUCT(r600_query_sw); 346 if (!query) 347 return NULL; 348 349 query->b.type = query_type; 350 query->b.ops = &sw_query_ops; 351 352 return (struct pipe_query *)query; 353 } 354 355 void r600_query_hw_destroy(struct r600_common_context *rctx, 356 struct r600_query *rquery) 357 { 358 struct r600_query_hw *query = (struct r600_query_hw *)rquery; 359 struct r600_query_buffer *prev = query->buffer.previous; 360 361 /* Release all query buffers. */ 362 while (prev) { 363 struct r600_query_buffer *qbuf = prev; 364 prev = prev->previous; 365 r600_resource_reference(&qbuf->buf, NULL); 366 FREE(qbuf); 367 } 368 369 r600_resource_reference(&query->buffer.buf, NULL); 370 FREE(rquery); 371 } 372 373 static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, 374 struct r600_query_hw *query) 375 { 376 unsigned buf_size = MAX2(query->result_size, 377 ctx->screen->info.min_alloc_size); 378 379 /* Queries are normally read by the CPU after 380 * being written by the gpu, hence staging is probably a good 381 * usage pattern. 382 */ 383 struct r600_resource *buf = (struct r600_resource*) 384 pipe_buffer_create(ctx->b.screen, 0, 385 PIPE_USAGE_STAGING, buf_size); 386 if (!buf) 387 return NULL; 388 389 if (!query->ops->prepare_buffer(ctx, query, buf)) { 390 r600_resource_reference(&buf, NULL); 391 return NULL; 392 } 393 394 return buf; 395 } 396 397 static bool r600_query_hw_prepare_buffer(struct r600_common_context *ctx, 398 struct r600_query_hw *query, 399 struct r600_resource *buffer) 400 { 401 /* Callers ensure that the buffer is currently unused by the GPU. */ 402 uint32_t *results = ctx->ws->buffer_map(buffer->buf, NULL, 403 PIPE_TRANSFER_WRITE | 404 PIPE_TRANSFER_UNSYNCHRONIZED); 405 if (!results) 406 return false; 407 408 memset(results, 0, buffer->b.b.width0); 409 410 if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER || 411 query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) { 412 unsigned num_results; 413 unsigned i, j; 414 415 /* Set top bits for unused backends. */ 416 num_results = buffer->b.b.width0 / query->result_size; 417 for (j = 0; j < num_results; j++) { 418 for (i = 0; i < ctx->max_db; i++) { 419 if (!(ctx->backend_mask & (1<<i))) { 420 results[(i * 4)+1] = 0x80000000; 421 results[(i * 4)+3] = 0x80000000; 422 } 423 } 424 results += 4 * ctx->max_db; 425 } 426 } 427 428 return true; 429 } 430 431 static void r600_query_hw_get_result_resource(struct r600_common_context *rctx, 432 struct r600_query *rquery, 433 bool wait, 434 enum pipe_query_value_type result_type, 435 int index, 436 struct pipe_resource *resource, 437 unsigned offset); 438 439 static struct r600_query_ops query_hw_ops = { 440 .destroy = r600_query_hw_destroy, 441 .begin = r600_query_hw_begin, 442 .end = r600_query_hw_end, 443 .get_result = r600_query_hw_get_result, 444 .get_result_resource = r600_query_hw_get_result_resource, 445 }; 446 447 static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, 448 struct r600_query_hw *query, 449 struct r600_resource *buffer, 450 uint64_t va); 451 static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, 452 struct r600_query_hw *query, 453 struct r600_resource *buffer, 454 uint64_t va); 455 static void r600_query_hw_add_result(struct r600_common_context *ctx, 456 struct r600_query_hw *, void *buffer, 457 union pipe_query_result *result); 458 static void r600_query_hw_clear_result(struct r600_query_hw *, 459 union pipe_query_result *); 460 461 static struct r600_query_hw_ops query_hw_default_hw_ops = { 462 .prepare_buffer = r600_query_hw_prepare_buffer, 463 .emit_start = r600_query_hw_do_emit_start, 464 .emit_stop = r600_query_hw_do_emit_stop, 465 .clear_result = r600_query_hw_clear_result, 466 .add_result = r600_query_hw_add_result, 467 }; 468 469 bool r600_query_hw_init(struct r600_common_context *rctx, 470 struct r600_query_hw *query) 471 { 472 query->buffer.buf = r600_new_query_buffer(rctx, query); 473 if (!query->buffer.buf) 474 return false; 475 476 return true; 477 } 478 479 static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx, 480 unsigned query_type, 481 unsigned index) 482 { 483 struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw); 484 if (!query) 485 return NULL; 486 487 query->b.type = query_type; 488 query->b.ops = &query_hw_ops; 489 query->ops = &query_hw_default_hw_ops; 490 491 switch (query_type) { 492 case PIPE_QUERY_OCCLUSION_COUNTER: 493 case PIPE_QUERY_OCCLUSION_PREDICATE: 494 query->result_size = 16 * rctx->max_db; 495 query->result_size += 16; /* for the fence + alignment */ 496 query->num_cs_dw_begin = 6; 497 query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rctx->screen); 498 break; 499 case PIPE_QUERY_TIME_ELAPSED: 500 query->result_size = 24; 501 query->num_cs_dw_begin = 8; 502 query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rctx->screen); 503 break; 504 case PIPE_QUERY_TIMESTAMP: 505 query->result_size = 16; 506 query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rctx->screen); 507 query->flags = R600_QUERY_HW_FLAG_NO_START; 508 break; 509 case PIPE_QUERY_PRIMITIVES_EMITTED: 510 case PIPE_QUERY_PRIMITIVES_GENERATED: 511 case PIPE_QUERY_SO_STATISTICS: 512 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 513 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ 514 query->result_size = 32; 515 query->num_cs_dw_begin = 6; 516 query->num_cs_dw_end = 6; 517 query->stream = index; 518 break; 519 case PIPE_QUERY_PIPELINE_STATISTICS: 520 /* 11 values on EG, 8 on R600. */ 521 query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16; 522 query->result_size += 8; /* for the fence + alignment */ 523 query->num_cs_dw_begin = 6; 524 query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rctx->screen); 525 break; 526 default: 527 assert(0); 528 FREE(query); 529 return NULL; 530 } 531 532 if (!r600_query_hw_init(rctx, query)) { 533 FREE(query); 534 return NULL; 535 } 536 537 return (struct pipe_query *)query; 538 } 539 540 static void r600_update_occlusion_query_state(struct r600_common_context *rctx, 541 unsigned type, int diff) 542 { 543 if (type == PIPE_QUERY_OCCLUSION_COUNTER || 544 type == PIPE_QUERY_OCCLUSION_PREDICATE) { 545 bool old_enable = rctx->num_occlusion_queries != 0; 546 bool old_perfect_enable = 547 rctx->num_perfect_occlusion_queries != 0; 548 bool enable, perfect_enable; 549 550 rctx->num_occlusion_queries += diff; 551 assert(rctx->num_occlusion_queries >= 0); 552 553 if (type == PIPE_QUERY_OCCLUSION_COUNTER) { 554 rctx->num_perfect_occlusion_queries += diff; 555 assert(rctx->num_perfect_occlusion_queries >= 0); 556 } 557 558 enable = rctx->num_occlusion_queries != 0; 559 perfect_enable = rctx->num_perfect_occlusion_queries != 0; 560 561 if (enable != old_enable || perfect_enable != old_perfect_enable) { 562 rctx->set_occlusion_query_state(&rctx->b, enable); 563 } 564 } 565 } 566 567 static unsigned event_type_for_stream(struct r600_query_hw *query) 568 { 569 switch (query->stream) { 570 default: 571 case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS; 572 case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1; 573 case 2: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS2; 574 case 3: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS3; 575 } 576 } 577 578 static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, 579 struct r600_query_hw *query, 580 struct r600_resource *buffer, 581 uint64_t va) 582 { 583 struct radeon_winsys_cs *cs = ctx->gfx.cs; 584 585 switch (query->b.type) { 586 case PIPE_QUERY_OCCLUSION_COUNTER: 587 case PIPE_QUERY_OCCLUSION_PREDICATE: 588 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 589 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); 590 radeon_emit(cs, va); 591 radeon_emit(cs, (va >> 32) & 0xFFFF); 592 break; 593 case PIPE_QUERY_PRIMITIVES_EMITTED: 594 case PIPE_QUERY_PRIMITIVES_GENERATED: 595 case PIPE_QUERY_SO_STATISTICS: 596 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 597 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 598 radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3)); 599 radeon_emit(cs, va); 600 radeon_emit(cs, (va >> 32) & 0xFFFF); 601 break; 602 case PIPE_QUERY_TIME_ELAPSED: 603 r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 604 0, 3, NULL, va, 0, 0); 605 break; 606 case PIPE_QUERY_PIPELINE_STATISTICS: 607 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 608 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); 609 radeon_emit(cs, va); 610 radeon_emit(cs, (va >> 32) & 0xFFFF); 611 break; 612 default: 613 assert(0); 614 } 615 r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, 616 RADEON_PRIO_QUERY); 617 } 618 619 static void r600_query_hw_emit_start(struct r600_common_context *ctx, 620 struct r600_query_hw *query) 621 { 622 uint64_t va; 623 624 if (!query->buffer.buf) 625 return; // previous buffer allocation failure 626 627 r600_update_occlusion_query_state(ctx, query->b.type, 1); 628 r600_update_prims_generated_query_state(ctx, query->b.type, 1); 629 630 ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end, 631 true); 632 633 /* Get a new query buffer if needed. */ 634 if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { 635 struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); 636 *qbuf = query->buffer; 637 query->buffer.results_end = 0; 638 query->buffer.previous = qbuf; 639 query->buffer.buf = r600_new_query_buffer(ctx, query); 640 if (!query->buffer.buf) 641 return; 642 } 643 644 /* emit begin query */ 645 va = query->buffer.buf->gpu_address + query->buffer.results_end; 646 647 query->ops->emit_start(ctx, query, query->buffer.buf, va); 648 649 ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end; 650 } 651 652 static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, 653 struct r600_query_hw *query, 654 struct r600_resource *buffer, 655 uint64_t va) 656 { 657 struct radeon_winsys_cs *cs = ctx->gfx.cs; 658 uint64_t fence_va = 0; 659 660 switch (query->b.type) { 661 case PIPE_QUERY_OCCLUSION_COUNTER: 662 case PIPE_QUERY_OCCLUSION_PREDICATE: 663 va += 8; 664 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 665 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); 666 radeon_emit(cs, va); 667 radeon_emit(cs, (va >> 32) & 0xFFFF); 668 669 fence_va = va + ctx->max_db * 16 - 8; 670 break; 671 case PIPE_QUERY_PRIMITIVES_EMITTED: 672 case PIPE_QUERY_PRIMITIVES_GENERATED: 673 case PIPE_QUERY_SO_STATISTICS: 674 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 675 va += query->result_size/2; 676 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 677 radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3)); 678 radeon_emit(cs, va); 679 radeon_emit(cs, (va >> 32) & 0xFFFF); 680 break; 681 case PIPE_QUERY_TIME_ELAPSED: 682 va += 8; 683 /* fall through */ 684 case PIPE_QUERY_TIMESTAMP: 685 r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 686 0, 3, NULL, va, 0, 0); 687 fence_va = va + 8; 688 break; 689 case PIPE_QUERY_PIPELINE_STATISTICS: { 690 unsigned sample_size = (query->result_size - 8) / 2; 691 692 va += sample_size; 693 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 694 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); 695 radeon_emit(cs, va); 696 radeon_emit(cs, (va >> 32) & 0xFFFF); 697 698 fence_va = va + sample_size; 699 break; 700 } 701 default: 702 assert(0); 703 } 704 r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, 705 RADEON_PRIO_QUERY); 706 707 if (fence_va) 708 r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1, 709 query->buffer.buf, fence_va, 0, 0x80000000); 710 } 711 712 static void r600_query_hw_emit_stop(struct r600_common_context *ctx, 713 struct r600_query_hw *query) 714 { 715 uint64_t va; 716 717 if (!query->buffer.buf) 718 return; // previous buffer allocation failure 719 720 /* The queries which need begin already called this in begin_query. */ 721 if (query->flags & R600_QUERY_HW_FLAG_NO_START) { 722 ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, false); 723 } 724 725 /* emit end query */ 726 va = query->buffer.buf->gpu_address + query->buffer.results_end; 727 728 query->ops->emit_stop(ctx, query, query->buffer.buf, va); 729 730 query->buffer.results_end += query->result_size; 731 732 if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) 733 ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end; 734 735 r600_update_occlusion_query_state(ctx, query->b.type, -1); 736 r600_update_prims_generated_query_state(ctx, query->b.type, -1); 737 } 738 739 static void r600_emit_query_predication(struct r600_common_context *ctx, 740 struct r600_atom *atom) 741 { 742 struct radeon_winsys_cs *cs = ctx->gfx.cs; 743 struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond; 744 struct r600_query_buffer *qbuf; 745 uint32_t op; 746 bool flag_wait; 747 748 if (!query) 749 return; 750 751 flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT || 752 ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT; 753 754 switch (query->b.type) { 755 case PIPE_QUERY_OCCLUSION_COUNTER: 756 case PIPE_QUERY_OCCLUSION_PREDICATE: 757 op = PRED_OP(PREDICATION_OP_ZPASS); 758 break; 759 case PIPE_QUERY_PRIMITIVES_EMITTED: 760 case PIPE_QUERY_PRIMITIVES_GENERATED: 761 case PIPE_QUERY_SO_STATISTICS: 762 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 763 op = PRED_OP(PREDICATION_OP_PRIMCOUNT); 764 break; 765 default: 766 assert(0); 767 return; 768 } 769 770 /* if true then invert, see GL_ARB_conditional_render_inverted */ 771 if (ctx->render_cond_invert) 772 op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visable/overflow */ 773 else 774 op |= PREDICATION_DRAW_VISIBLE; /* Draw if visable/overflow */ 775 776 op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW; 777 778 /* emit predicate packets for all data blocks */ 779 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { 780 unsigned results_base = 0; 781 uint64_t va = qbuf->buf->gpu_address; 782 783 while (results_base < qbuf->results_end) { 784 radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); 785 radeon_emit(cs, va + results_base); 786 radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF)); 787 r600_emit_reloc(ctx, &ctx->gfx, qbuf->buf, RADEON_USAGE_READ, 788 RADEON_PRIO_QUERY); 789 results_base += query->result_size; 790 791 /* set CONTINUE bit for all packets except the first */ 792 op |= PREDICATION_CONTINUE; 793 } 794 } 795 } 796 797 static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) 798 { 799 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 800 801 if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT || 802 query_type == PIPE_QUERY_GPU_FINISHED || 803 query_type >= PIPE_QUERY_DRIVER_SPECIFIC) 804 return r600_query_sw_create(ctx, query_type); 805 806 return r600_query_hw_create(rctx, query_type, index); 807 } 808 809 static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) 810 { 811 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 812 struct r600_query *rquery = (struct r600_query *)query; 813 814 rquery->ops->destroy(rctx, rquery); 815 } 816 817 static boolean r600_begin_query(struct pipe_context *ctx, 818 struct pipe_query *query) 819 { 820 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 821 struct r600_query *rquery = (struct r600_query *)query; 822 823 return rquery->ops->begin(rctx, rquery); 824 } 825 826 void r600_query_hw_reset_buffers(struct r600_common_context *rctx, 827 struct r600_query_hw *query) 828 { 829 struct r600_query_buffer *prev = query->buffer.previous; 830 831 /* Discard the old query buffers. */ 832 while (prev) { 833 struct r600_query_buffer *qbuf = prev; 834 prev = prev->previous; 835 r600_resource_reference(&qbuf->buf, NULL); 836 FREE(qbuf); 837 } 838 839 query->buffer.results_end = 0; 840 query->buffer.previous = NULL; 841 842 /* Obtain a new buffer if the current one can't be mapped without a stall. */ 843 if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) || 844 !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) { 845 r600_resource_reference(&query->buffer.buf, NULL); 846 query->buffer.buf = r600_new_query_buffer(rctx, query); 847 } else { 848 if (!query->ops->prepare_buffer(rctx, query, query->buffer.buf)) 849 r600_resource_reference(&query->buffer.buf, NULL); 850 } 851 } 852 853 bool r600_query_hw_begin(struct r600_common_context *rctx, 854 struct r600_query *rquery) 855 { 856 struct r600_query_hw *query = (struct r600_query_hw *)rquery; 857 858 if (query->flags & R600_QUERY_HW_FLAG_NO_START) { 859 assert(0); 860 return false; 861 } 862 863 if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES)) 864 r600_query_hw_reset_buffers(rctx, query); 865 866 r600_query_hw_emit_start(rctx, query); 867 if (!query->buffer.buf) 868 return false; 869 870 LIST_ADDTAIL(&query->list, &rctx->active_queries); 871 return true; 872 } 873 874 static bool r600_end_query(struct pipe_context *ctx, struct pipe_query *query) 875 { 876 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 877 struct r600_query *rquery = (struct r600_query *)query; 878 879 return rquery->ops->end(rctx, rquery); 880 } 881 882 bool r600_query_hw_end(struct r600_common_context *rctx, 883 struct r600_query *rquery) 884 { 885 struct r600_query_hw *query = (struct r600_query_hw *)rquery; 886 887 if (query->flags & R600_QUERY_HW_FLAG_NO_START) 888 r600_query_hw_reset_buffers(rctx, query); 889 890 r600_query_hw_emit_stop(rctx, query); 891 892 if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) 893 LIST_DELINIT(&query->list); 894 895 if (!query->buffer.buf) 896 return false; 897 898 return true; 899 } 900 901 static void r600_get_hw_query_params(struct r600_common_context *rctx, 902 struct r600_query_hw *rquery, int index, 903 struct r600_hw_query_params *params) 904 { 905 params->pair_stride = 0; 906 params->pair_count = 1; 907 908 switch (rquery->b.type) { 909 case PIPE_QUERY_OCCLUSION_COUNTER: 910 case PIPE_QUERY_OCCLUSION_PREDICATE: 911 params->start_offset = 0; 912 params->end_offset = 8; 913 params->fence_offset = rctx->max_db * 16; 914 params->pair_stride = 16; 915 params->pair_count = rctx->max_db; 916 break; 917 case PIPE_QUERY_TIME_ELAPSED: 918 params->start_offset = 0; 919 params->end_offset = 8; 920 params->fence_offset = 16; 921 break; 922 case PIPE_QUERY_TIMESTAMP: 923 params->start_offset = 0; 924 params->end_offset = 0; 925 params->fence_offset = 8; 926 break; 927 case PIPE_QUERY_PRIMITIVES_EMITTED: 928 params->start_offset = 8; 929 params->end_offset = 24; 930 params->fence_offset = params->end_offset + 4; 931 break; 932 case PIPE_QUERY_PRIMITIVES_GENERATED: 933 params->start_offset = 0; 934 params->end_offset = 16; 935 params->fence_offset = params->end_offset + 4; 936 break; 937 case PIPE_QUERY_SO_STATISTICS: 938 params->start_offset = 8 - index * 8; 939 params->end_offset = 24 - index * 8; 940 params->fence_offset = params->end_offset + 4; 941 break; 942 case PIPE_QUERY_PIPELINE_STATISTICS: 943 { 944 /* Offsets apply to EG+ */ 945 static const unsigned offsets[] = {56, 48, 24, 32, 40, 16, 8, 0, 64, 72, 80}; 946 params->start_offset = offsets[index]; 947 params->end_offset = 88 + offsets[index]; 948 params->fence_offset = 2 * 88; 949 break; 950 } 951 default: 952 unreachable("r600_get_hw_query_params unsupported"); 953 } 954 } 955 956 static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index, 957 bool test_status_bit) 958 { 959 uint32_t *current_result = (uint32_t*)map; 960 uint64_t start, end; 961 962 start = (uint64_t)current_result[start_index] | 963 (uint64_t)current_result[start_index+1] << 32; 964 end = (uint64_t)current_result[end_index] | 965 (uint64_t)current_result[end_index+1] << 32; 966 967 if (!test_status_bit || 968 ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { 969 return end - start; 970 } 971 return 0; 972 } 973 974 static void r600_query_hw_add_result(struct r600_common_context *ctx, 975 struct r600_query_hw *query, 976 void *buffer, 977 union pipe_query_result *result) 978 { 979 switch (query->b.type) { 980 case PIPE_QUERY_OCCLUSION_COUNTER: { 981 for (unsigned i = 0; i < ctx->max_db; ++i) { 982 unsigned results_base = i * 16; 983 result->u64 += 984 r600_query_read_result(buffer + results_base, 0, 2, true); 985 } 986 break; 987 } 988 case PIPE_QUERY_OCCLUSION_PREDICATE: { 989 for (unsigned i = 0; i < ctx->max_db; ++i) { 990 unsigned results_base = i * 16; 991 result->b = result->b || 992 r600_query_read_result(buffer + results_base, 0, 2, true) != 0; 993 } 994 break; 995 } 996 case PIPE_QUERY_TIME_ELAPSED: 997 result->u64 += r600_query_read_result(buffer, 0, 2, false); 998 break; 999 case PIPE_QUERY_TIMESTAMP: 1000 result->u64 = *(uint64_t*)buffer; 1001 break; 1002 case PIPE_QUERY_PRIMITIVES_EMITTED: 1003 /* SAMPLE_STREAMOUTSTATS stores this structure: 1004 * { 1005 * u64 NumPrimitivesWritten; 1006 * u64 PrimitiveStorageNeeded; 1007 * } 1008 * We only need NumPrimitivesWritten here. */ 1009 result->u64 += r600_query_read_result(buffer, 2, 6, true); 1010 break; 1011 case PIPE_QUERY_PRIMITIVES_GENERATED: 1012 /* Here we read PrimitiveStorageNeeded. */ 1013 result->u64 += r600_query_read_result(buffer, 0, 4, true); 1014 break; 1015 case PIPE_QUERY_SO_STATISTICS: 1016 result->so_statistics.num_primitives_written += 1017 r600_query_read_result(buffer, 2, 6, true); 1018 result->so_statistics.primitives_storage_needed += 1019 r600_query_read_result(buffer, 0, 4, true); 1020 break; 1021 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1022 result->b = result->b || 1023 r600_query_read_result(buffer, 2, 6, true) != 1024 r600_query_read_result(buffer, 0, 4, true); 1025 break; 1026 case PIPE_QUERY_PIPELINE_STATISTICS: 1027 if (ctx->chip_class >= EVERGREEN) { 1028 result->pipeline_statistics.ps_invocations += 1029 r600_query_read_result(buffer, 0, 22, false); 1030 result->pipeline_statistics.c_primitives += 1031 r600_query_read_result(buffer, 2, 24, false); 1032 result->pipeline_statistics.c_invocations += 1033 r600_query_read_result(buffer, 4, 26, false); 1034 result->pipeline_statistics.vs_invocations += 1035 r600_query_read_result(buffer, 6, 28, false); 1036 result->pipeline_statistics.gs_invocations += 1037 r600_query_read_result(buffer, 8, 30, false); 1038 result->pipeline_statistics.gs_primitives += 1039 r600_query_read_result(buffer, 10, 32, false); 1040 result->pipeline_statistics.ia_primitives += 1041 r600_query_read_result(buffer, 12, 34, false); 1042 result->pipeline_statistics.ia_vertices += 1043 r600_query_read_result(buffer, 14, 36, false); 1044 result->pipeline_statistics.hs_invocations += 1045 r600_query_read_result(buffer, 16, 38, false); 1046 result->pipeline_statistics.ds_invocations += 1047 r600_query_read_result(buffer, 18, 40, false); 1048 result->pipeline_statistics.cs_invocations += 1049 r600_query_read_result(buffer, 20, 42, false); 1050 } else { 1051 result->pipeline_statistics.ps_invocations += 1052 r600_query_read_result(buffer, 0, 16, false); 1053 result->pipeline_statistics.c_primitives += 1054 r600_query_read_result(buffer, 2, 18, false); 1055 result->pipeline_statistics.c_invocations += 1056 r600_query_read_result(buffer, 4, 20, false); 1057 result->pipeline_statistics.vs_invocations += 1058 r600_query_read_result(buffer, 6, 22, false); 1059 result->pipeline_statistics.gs_invocations += 1060 r600_query_read_result(buffer, 8, 24, false); 1061 result->pipeline_statistics.gs_primitives += 1062 r600_query_read_result(buffer, 10, 26, false); 1063 result->pipeline_statistics.ia_primitives += 1064 r600_query_read_result(buffer, 12, 28, false); 1065 result->pipeline_statistics.ia_vertices += 1066 r600_query_read_result(buffer, 14, 30, false); 1067 } 1068 #if 0 /* for testing */ 1069 printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, " 1070 "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, " 1071 "Clipper prims=%llu, PS=%llu, CS=%llu\n", 1072 result->pipeline_statistics.ia_vertices, 1073 result->pipeline_statistics.ia_primitives, 1074 result->pipeline_statistics.vs_invocations, 1075 result->pipeline_statistics.hs_invocations, 1076 result->pipeline_statistics.ds_invocations, 1077 result->pipeline_statistics.gs_invocations, 1078 result->pipeline_statistics.gs_primitives, 1079 result->pipeline_statistics.c_invocations, 1080 result->pipeline_statistics.c_primitives, 1081 result->pipeline_statistics.ps_invocations, 1082 result->pipeline_statistics.cs_invocations); 1083 #endif 1084 break; 1085 default: 1086 assert(0); 1087 } 1088 } 1089 1090 static boolean r600_get_query_result(struct pipe_context *ctx, 1091 struct pipe_query *query, boolean wait, 1092 union pipe_query_result *result) 1093 { 1094 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 1095 struct r600_query *rquery = (struct r600_query *)query; 1096 1097 return rquery->ops->get_result(rctx, rquery, wait, result); 1098 } 1099 1100 static void r600_get_query_result_resource(struct pipe_context *ctx, 1101 struct pipe_query *query, 1102 boolean wait, 1103 enum pipe_query_value_type result_type, 1104 int index, 1105 struct pipe_resource *resource, 1106 unsigned offset) 1107 { 1108 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 1109 struct r600_query *rquery = (struct r600_query *)query; 1110 1111 rquery->ops->get_result_resource(rctx, rquery, wait, result_type, index, 1112 resource, offset); 1113 } 1114 1115 static void r600_query_hw_clear_result(struct r600_query_hw *query, 1116 union pipe_query_result *result) 1117 { 1118 util_query_clear_result(result, query->b.type); 1119 } 1120 1121 bool r600_query_hw_get_result(struct r600_common_context *rctx, 1122 struct r600_query *rquery, 1123 bool wait, union pipe_query_result *result) 1124 { 1125 struct r600_query_hw *query = (struct r600_query_hw *)rquery; 1126 struct r600_query_buffer *qbuf; 1127 1128 query->ops->clear_result(query, result); 1129 1130 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { 1131 unsigned results_base = 0; 1132 void *map; 1133 1134 map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, 1135 PIPE_TRANSFER_READ | 1136 (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); 1137 if (!map) 1138 return false; 1139 1140 while (results_base != qbuf->results_end) { 1141 query->ops->add_result(rctx, query, map + results_base, 1142 result); 1143 results_base += query->result_size; 1144 } 1145 } 1146 1147 /* Convert the time to expected units. */ 1148 if (rquery->type == PIPE_QUERY_TIME_ELAPSED || 1149 rquery->type == PIPE_QUERY_TIMESTAMP) { 1150 result->u64 = (1000000 * result->u64) / rctx->screen->info.clock_crystal_freq; 1151 } 1152 return true; 1153 } 1154 1155 /* Create the compute shader that is used to collect the results. 1156 * 1157 * One compute grid with a single thread is launched for every query result 1158 * buffer. The thread (optionally) reads a previous summary buffer, then 1159 * accumulates data from the query result buffer, and writes the result either 1160 * to a summary buffer to be consumed by the next grid invocation or to the 1161 * user-supplied buffer. 1162 * 1163 * Data layout: 1164 * 1165 * CONST 1166 * 0.x = end_offset 1167 * 0.y = result_stride 1168 * 0.z = result_count 1169 * 0.w = bit field: 1170 * 1: read previously accumulated values 1171 * 2: write accumulated values for chaining 1172 * 4: write result available 1173 * 8: convert result to boolean (0/1) 1174 * 16: only read one dword and use that as result 1175 * 32: apply timestamp conversion 1176 * 64: store full 64 bits result 1177 * 128: store signed 32 bits result 1178 * 1.x = fence_offset 1179 * 1.y = pair_stride 1180 * 1.z = pair_count 1181 * 1182 * BUFFER[0] = query result buffer 1183 * BUFFER[1] = previous summary buffer 1184 * BUFFER[2] = next summary buffer or user-supplied buffer 1185 */ 1186 static void r600_create_query_result_shader(struct r600_common_context *rctx) 1187 { 1188 /* TEMP[0].xy = accumulated result so far 1189 * TEMP[0].z = result not available 1190 * 1191 * TEMP[1].x = current result index 1192 * TEMP[1].y = current pair index 1193 */ 1194 static const char text_tmpl[] = 1195 "COMP\n" 1196 "PROPERTY CS_FIXED_BLOCK_WIDTH 1\n" 1197 "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" 1198 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" 1199 "DCL BUFFER[0]\n" 1200 "DCL BUFFER[1]\n" 1201 "DCL BUFFER[2]\n" 1202 "DCL CONST[0..1]\n" 1203 "DCL TEMP[0..5]\n" 1204 "IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n" 1205 "IMM[1] UINT32 {1, 2, 4, 8}\n" 1206 "IMM[2] UINT32 {16, 32, 64, 128}\n" 1207 "IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */ 1208 1209 "AND TEMP[5], CONST[0].wwww, IMM[2].xxxx\n" 1210 "UIF TEMP[5]\n" 1211 /* Check result availability. */ 1212 "LOAD TEMP[1].x, BUFFER[0], CONST[1].xxxx\n" 1213 "ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n" 1214 "MOV TEMP[1], TEMP[0].zzzz\n" 1215 "NOT TEMP[0].z, TEMP[0].zzzz\n" 1216 1217 /* Load result if available. */ 1218 "UIF TEMP[1]\n" 1219 "LOAD TEMP[0].xy, BUFFER[0], IMM[0].xxxx\n" 1220 "ENDIF\n" 1221 "ELSE\n" 1222 /* Load previously accumulated result if requested. */ 1223 "MOV TEMP[0], IMM[0].xxxx\n" 1224 "AND TEMP[4], CONST[0].wwww, IMM[1].xxxx\n" 1225 "UIF TEMP[4]\n" 1226 "LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n" 1227 "ENDIF\n" 1228 1229 "MOV TEMP[1].x, IMM[0].xxxx\n" 1230 "BGNLOOP\n" 1231 /* Break if accumulated result so far is not available. */ 1232 "UIF TEMP[0].zzzz\n" 1233 "BRK\n" 1234 "ENDIF\n" 1235 1236 /* Break if result_index >= result_count. */ 1237 "USGE TEMP[5], TEMP[1].xxxx, CONST[0].zzzz\n" 1238 "UIF TEMP[5]\n" 1239 "BRK\n" 1240 "ENDIF\n" 1241 1242 /* Load fence and check result availability */ 1243 "UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0].yyyy, CONST[1].xxxx\n" 1244 "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n" 1245 "ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n" 1246 "NOT TEMP[0].z, TEMP[0].zzzz\n" 1247 "UIF TEMP[0].zzzz\n" 1248 "BRK\n" 1249 "ENDIF\n" 1250 1251 "MOV TEMP[1].y, IMM[0].xxxx\n" 1252 "BGNLOOP\n" 1253 /* Load start and end. */ 1254 "UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0].yyyy\n" 1255 "UMAD TEMP[5].x, TEMP[1].yyyy, CONST[1].yyyy, TEMP[5].xxxx\n" 1256 "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n" 1257 1258 "UADD TEMP[5].x, TEMP[5].xxxx, CONST[0].xxxx\n" 1259 "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].xxxx\n" 1260 1261 "U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n" 1262 "U64ADD TEMP[0].xy, TEMP[0], TEMP[3]\n" 1263 1264 /* Increment pair index */ 1265 "UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n" 1266 "USGE TEMP[5], TEMP[1].yyyy, CONST[1].zzzz\n" 1267 "UIF TEMP[5]\n" 1268 "BRK\n" 1269 "ENDIF\n" 1270 "ENDLOOP\n" 1271 1272 /* Increment result index */ 1273 "UADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx\n" 1274 "ENDLOOP\n" 1275 "ENDIF\n" 1276 1277 "AND TEMP[4], CONST[0].wwww, IMM[1].yyyy\n" 1278 "UIF TEMP[4]\n" 1279 /* Store accumulated data for chaining. */ 1280 "STORE BUFFER[2].xyz, IMM[0].xxxx, TEMP[0]\n" 1281 "ELSE\n" 1282 "AND TEMP[4], CONST[0].wwww, IMM[1].zzzz\n" 1283 "UIF TEMP[4]\n" 1284 /* Store result availability. */ 1285 "NOT TEMP[0].z, TEMP[0]\n" 1286 "AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n" 1287 "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].zzzz\n" 1288 1289 "AND TEMP[4], CONST[0].wwww, IMM[2].zzzz\n" 1290 "UIF TEMP[4]\n" 1291 "STORE BUFFER[2].y, IMM[0].xxxx, IMM[0].xxxx\n" 1292 "ENDIF\n" 1293 "ELSE\n" 1294 /* Store result if it is available. */ 1295 "NOT TEMP[4], TEMP[0].zzzz\n" 1296 "UIF TEMP[4]\n" 1297 /* Apply timestamp conversion */ 1298 "AND TEMP[4], CONST[0].wwww, IMM[2].yyyy\n" 1299 "UIF TEMP[4]\n" 1300 "U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n" 1301 "U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n" 1302 "ENDIF\n" 1303 1304 /* Convert to boolean */ 1305 "AND TEMP[4], CONST[0].wwww, IMM[1].wwww\n" 1306 "UIF TEMP[4]\n" 1307 "U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[0].xxxx\n" 1308 "AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n" 1309 "MOV TEMP[0].y, IMM[0].xxxx\n" 1310 "ENDIF\n" 1311 1312 "AND TEMP[4], CONST[0].wwww, IMM[2].zzzz\n" 1313 "UIF TEMP[4]\n" 1314 "STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0].xyxy\n" 1315 "ELSE\n" 1316 /* Clamping */ 1317 "UIF TEMP[0].yyyy\n" 1318 "MOV TEMP[0].x, IMM[0].wwww\n" 1319 "ENDIF\n" 1320 1321 "AND TEMP[4], CONST[0].wwww, IMM[2].wwww\n" 1322 "UIF TEMP[4]\n" 1323 "UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n" 1324 "ENDIF\n" 1325 1326 "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].xxxx\n" 1327 "ENDIF\n" 1328 "ENDIF\n" 1329 "ENDIF\n" 1330 "ENDIF\n" 1331 1332 "END\n"; 1333 1334 char text[sizeof(text_tmpl) + 32]; 1335 struct tgsi_token tokens[1024]; 1336 struct pipe_compute_state state = {}; 1337 1338 /* Hard code the frequency into the shader so that the backend can 1339 * use the full range of optimizations for divide-by-constant. 1340 */ 1341 snprintf(text, sizeof(text), text_tmpl, 1342 rctx->screen->info.clock_crystal_freq); 1343 1344 if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { 1345 assert(false); 1346 return; 1347 } 1348 1349 state.ir_type = PIPE_SHADER_IR_TGSI; 1350 state.prog = tokens; 1351 1352 rctx->query_result_shader = rctx->b.create_compute_state(&rctx->b, &state); 1353 } 1354 1355 static void r600_restore_qbo_state(struct r600_common_context *rctx, 1356 struct r600_qbo_state *st) 1357 { 1358 rctx->b.bind_compute_state(&rctx->b, st->saved_compute); 1359 1360 rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 1361 pipe_resource_reference(&st->saved_const0.buffer, NULL); 1362 1363 rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo); 1364 for (unsigned i = 0; i < 3; ++i) 1365 pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL); 1366 } 1367 1368 static void r600_query_hw_get_result_resource(struct r600_common_context *rctx, 1369 struct r600_query *rquery, 1370 bool wait, 1371 enum pipe_query_value_type result_type, 1372 int index, 1373 struct pipe_resource *resource, 1374 unsigned offset) 1375 { 1376 struct r600_query_hw *query = (struct r600_query_hw *)rquery; 1377 struct r600_query_buffer *qbuf; 1378 struct r600_query_buffer *qbuf_prev; 1379 struct pipe_resource *tmp_buffer = NULL; 1380 unsigned tmp_buffer_offset = 0; 1381 struct r600_qbo_state saved_state = {}; 1382 struct pipe_grid_info grid = {}; 1383 struct pipe_constant_buffer constant_buffer = {}; 1384 struct pipe_shader_buffer ssbo[3]; 1385 struct r600_hw_query_params params; 1386 struct { 1387 uint32_t end_offset; 1388 uint32_t result_stride; 1389 uint32_t result_count; 1390 uint32_t config; 1391 uint32_t fence_offset; 1392 uint32_t pair_stride; 1393 uint32_t pair_count; 1394 } consts; 1395 1396 if (!rctx->query_result_shader) { 1397 r600_create_query_result_shader(rctx); 1398 if (!rctx->query_result_shader) 1399 return; 1400 } 1401 1402 if (query->buffer.previous) { 1403 u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 16, 1404 &tmp_buffer_offset, &tmp_buffer); 1405 if (!tmp_buffer) 1406 return; 1407 } 1408 1409 rctx->save_qbo_state(&rctx->b, &saved_state); 1410 1411 r600_get_hw_query_params(rctx, query, index >= 0 ? index : 0, ¶ms); 1412 consts.end_offset = params.end_offset - params.start_offset; 1413 consts.fence_offset = params.fence_offset - params.start_offset; 1414 consts.result_stride = query->result_size; 1415 consts.pair_stride = params.pair_stride; 1416 consts.pair_count = params.pair_count; 1417 1418 constant_buffer.buffer_size = sizeof(consts); 1419 constant_buffer.user_buffer = &consts; 1420 1421 ssbo[1].buffer = tmp_buffer; 1422 ssbo[1].buffer_offset = tmp_buffer_offset; 1423 ssbo[1].buffer_size = 16; 1424 1425 ssbo[2] = ssbo[1]; 1426 1427 rctx->b.bind_compute_state(&rctx->b, rctx->query_result_shader); 1428 1429 grid.block[0] = 1; 1430 grid.block[1] = 1; 1431 grid.block[2] = 1; 1432 grid.grid[0] = 1; 1433 grid.grid[1] = 1; 1434 grid.grid[2] = 1; 1435 1436 consts.config = 0; 1437 if (index < 0) 1438 consts.config |= 4; 1439 if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE || 1440 query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) 1441 consts.config |= 8; 1442 else if (query->b.type == PIPE_QUERY_TIMESTAMP || 1443 query->b.type == PIPE_QUERY_TIME_ELAPSED) 1444 consts.config |= 32; 1445 1446 switch (result_type) { 1447 case PIPE_QUERY_TYPE_U64: 1448 case PIPE_QUERY_TYPE_I64: 1449 consts.config |= 64; 1450 break; 1451 case PIPE_QUERY_TYPE_I32: 1452 consts.config |= 128; 1453 break; 1454 case PIPE_QUERY_TYPE_U32: 1455 break; 1456 } 1457 1458 rctx->flags |= rctx->screen->barrier_flags.cp_to_L2; 1459 1460 for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) { 1461 if (query->b.type != PIPE_QUERY_TIMESTAMP) { 1462 qbuf_prev = qbuf->previous; 1463 consts.result_count = qbuf->results_end / query->result_size; 1464 consts.config &= ~3; 1465 if (qbuf != &query->buffer) 1466 consts.config |= 1; 1467 if (qbuf->previous) 1468 consts.config |= 2; 1469 } else { 1470 /* Only read the last timestamp. */ 1471 qbuf_prev = NULL; 1472 consts.result_count = 0; 1473 consts.config |= 16; 1474 params.start_offset += qbuf->results_end - query->result_size; 1475 } 1476 1477 rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer); 1478 1479 ssbo[0].buffer = &qbuf->buf->b.b; 1480 ssbo[0].buffer_offset = params.start_offset; 1481 ssbo[0].buffer_size = qbuf->results_end - params.start_offset; 1482 1483 if (!qbuf->previous) { 1484 ssbo[2].buffer = resource; 1485 ssbo[2].buffer_offset = offset; 1486 ssbo[2].buffer_size = 8; 1487 1488 ((struct r600_resource *)resource)->TC_L2_dirty = true; 1489 } 1490 1491 rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo); 1492 1493 if (wait && qbuf == &query->buffer) { 1494 uint64_t va; 1495 1496 /* Wait for result availability. Wait only for readiness 1497 * of the last entry, since the fence writes should be 1498 * serialized in the CP. 1499 */ 1500 va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size; 1501 va += params.fence_offset; 1502 1503 r600_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000); 1504 } 1505 1506 rctx->b.launch_grid(&rctx->b, &grid); 1507 rctx->flags |= rctx->screen->barrier_flags.compute_to_L2; 1508 } 1509 1510 r600_restore_qbo_state(rctx, &saved_state); 1511 pipe_resource_reference(&tmp_buffer, NULL); 1512 } 1513 1514 static void r600_render_condition(struct pipe_context *ctx, 1515 struct pipe_query *query, 1516 boolean condition, 1517 uint mode) 1518 { 1519 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 1520 struct r600_query_hw *rquery = (struct r600_query_hw *)query; 1521 struct r600_query_buffer *qbuf; 1522 struct r600_atom *atom = &rctx->render_cond_atom; 1523 1524 rctx->render_cond = query; 1525 rctx->render_cond_invert = condition; 1526 rctx->render_cond_mode = mode; 1527 1528 /* Compute the size of SET_PREDICATION packets. */ 1529 atom->num_dw = 0; 1530 if (query) { 1531 for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) 1532 atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; 1533 } 1534 1535 rctx->set_atom_dirty(rctx, atom, query != NULL); 1536 } 1537 1538 void r600_suspend_queries(struct r600_common_context *ctx) 1539 { 1540 struct r600_query_hw *query; 1541 1542 LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) { 1543 r600_query_hw_emit_stop(ctx, query); 1544 } 1545 assert(ctx->num_cs_dw_queries_suspend == 0); 1546 } 1547 1548 static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx, 1549 struct list_head *query_list) 1550 { 1551 struct r600_query_hw *query; 1552 unsigned num_dw = 0; 1553 1554 LIST_FOR_EACH_ENTRY(query, query_list, list) { 1555 /* begin + end */ 1556 num_dw += query->num_cs_dw_begin + query->num_cs_dw_end; 1557 1558 /* Workaround for the fact that 1559 * num_cs_dw_nontimer_queries_suspend is incremented for every 1560 * resumed query, which raises the bar in need_cs_space for 1561 * queries about to be resumed. 1562 */ 1563 num_dw += query->num_cs_dw_end; 1564 } 1565 /* primitives generated query */ 1566 num_dw += ctx->streamout.enable_atom.num_dw; 1567 /* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */ 1568 num_dw += 13; 1569 1570 return num_dw; 1571 } 1572 1573 void r600_resume_queries(struct r600_common_context *ctx) 1574 { 1575 struct r600_query_hw *query; 1576 unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries); 1577 1578 assert(ctx->num_cs_dw_queries_suspend == 0); 1579 1580 /* Check CS space here. Resuming must not be interrupted by flushes. */ 1581 ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, true); 1582 1583 LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) { 1584 r600_query_hw_emit_start(ctx, query); 1585 } 1586 } 1587 1588 /* Get backends mask */ 1589 void r600_query_init_backend_mask(struct r600_common_context *ctx) 1590 { 1591 struct radeon_winsys_cs *cs = ctx->gfx.cs; 1592 struct r600_resource *buffer; 1593 uint32_t *results; 1594 unsigned num_backends = ctx->screen->info.num_render_backends; 1595 unsigned i, mask = 0; 1596 1597 /* if backend_map query is supported by the kernel */ 1598 if (ctx->screen->info.r600_gb_backend_map_valid) { 1599 unsigned num_tile_pipes = ctx->screen->info.num_tile_pipes; 1600 unsigned backend_map = ctx->screen->info.r600_gb_backend_map; 1601 unsigned item_width, item_mask; 1602 1603 if (ctx->chip_class >= EVERGREEN) { 1604 item_width = 4; 1605 item_mask = 0x7; 1606 } else { 1607 item_width = 2; 1608 item_mask = 0x3; 1609 } 1610 1611 while (num_tile_pipes--) { 1612 i = backend_map & item_mask; 1613 mask |= (1<<i); 1614 backend_map >>= item_width; 1615 } 1616 if (mask != 0) { 1617 ctx->backend_mask = mask; 1618 return; 1619 } 1620 } 1621 1622 /* otherwise backup path for older kernels */ 1623 1624 /* create buffer for event data */ 1625 buffer = (struct r600_resource*) 1626 pipe_buffer_create(ctx->b.screen, 0, 1627 PIPE_USAGE_STAGING, ctx->max_db*16); 1628 if (!buffer) 1629 goto err; 1630 1631 /* initialize buffer with zeroes */ 1632 results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE); 1633 if (results) { 1634 memset(results, 0, ctx->max_db * 4 * 4); 1635 1636 /* emit EVENT_WRITE for ZPASS_DONE */ 1637 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 1638 radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); 1639 radeon_emit(cs, buffer->gpu_address); 1640 radeon_emit(cs, buffer->gpu_address >> 32); 1641 1642 r600_emit_reloc(ctx, &ctx->gfx, buffer, 1643 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); 1644 1645 /* analyze results */ 1646 results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ); 1647 if (results) { 1648 for(i = 0; i < ctx->max_db; i++) { 1649 /* at least highest bit will be set if backend is used */ 1650 if (results[i*4 + 1]) 1651 mask |= (1<<i); 1652 } 1653 } 1654 } 1655 1656 r600_resource_reference(&buffer, NULL); 1657 1658 if (mask != 0) { 1659 ctx->backend_mask = mask; 1660 return; 1661 } 1662 1663 err: 1664 /* fallback to old method - set num_backends lower bits to 1 */ 1665 ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends); 1666 return; 1667 } 1668 1669 #define XFULL(name_, query_type_, type_, result_type_, group_id_) \ 1670 { \ 1671 .name = name_, \ 1672 .query_type = R600_QUERY_##query_type_, \ 1673 .type = PIPE_DRIVER_QUERY_TYPE_##type_, \ 1674 .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \ 1675 .group_id = group_id_ \ 1676 } 1677 1678 #define X(name_, query_type_, type_, result_type_) \ 1679 XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0) 1680 1681 #define XG(group_, name_, query_type_, type_, result_type_) \ 1682 XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_) 1683 1684 static struct pipe_driver_query_info r600_driver_query_list[] = { 1685 X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE), 1686 X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE), 1687 X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE), 1688 X("draw-calls", DRAW_CALLS, UINT64, AVERAGE), 1689 X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE), 1690 X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE), 1691 X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE), 1692 X("dma-calls", DMA_CALLS, UINT64, AVERAGE), 1693 X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE), 1694 X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE), 1695 X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE), 1696 X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE), 1697 X("num-fb-cache-flushes", NUM_FB_CACHE_FLUSHES, UINT64, AVERAGE), 1698 X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE), 1699 X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE), 1700 X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE), 1701 X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE), 1702 X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE), 1703 X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE), 1704 X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE), 1705 X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE), 1706 X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE), 1707 X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE), 1708 X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE), 1709 X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE), 1710 X("GTT-usage", GTT_USAGE, BYTES, AVERAGE), 1711 X("back-buffer-ps-draw-ratio", BACK_BUFFER_PS_DRAW_RATIO, UINT64, AVERAGE), 1712 1713 /* GPIN queries are for the benefit of old versions of GPUPerfStudio, 1714 * which use it as a fallback path to detect the GPU type. 1715 * 1716 * Note: The names of these queries are significant for GPUPerfStudio 1717 * (and possibly their order as well). */ 1718 XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE), 1719 XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE), 1720 XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE), 1721 XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE), 1722 XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE), 1723 1724 /* The following queries must be at the end of the list because their 1725 * availability is adjusted dynamically based on the DRM version. */ 1726 X("GPU-load", GPU_LOAD, UINT64, AVERAGE), 1727 X("GPU-shaders-busy", GPU_SHADERS_BUSY, UINT64, AVERAGE), 1728 X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE), 1729 X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE), 1730 X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE), 1731 }; 1732 1733 #undef X 1734 #undef XG 1735 #undef XFULL 1736 1737 static unsigned r600_get_num_queries(struct r600_common_screen *rscreen) 1738 { 1739 if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) 1740 return ARRAY_SIZE(r600_driver_query_list); 1741 else if (rscreen->info.drm_major == 3) 1742 return ARRAY_SIZE(r600_driver_query_list) - 3; 1743 else 1744 return ARRAY_SIZE(r600_driver_query_list) - 5; 1745 } 1746 1747 static int r600_get_driver_query_info(struct pipe_screen *screen, 1748 unsigned index, 1749 struct pipe_driver_query_info *info) 1750 { 1751 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 1752 unsigned num_queries = r600_get_num_queries(rscreen); 1753 1754 if (!info) { 1755 unsigned num_perfcounters = 1756 r600_get_perfcounter_info(rscreen, 0, NULL); 1757 1758 return num_queries + num_perfcounters; 1759 } 1760 1761 if (index >= num_queries) 1762 return r600_get_perfcounter_info(rscreen, index - num_queries, info); 1763 1764 *info = r600_driver_query_list[index]; 1765 1766 switch (info->query_type) { 1767 case R600_QUERY_REQUESTED_VRAM: 1768 case R600_QUERY_VRAM_USAGE: 1769 case R600_QUERY_MAPPED_VRAM: 1770 info->max_value.u64 = rscreen->info.vram_size; 1771 break; 1772 case R600_QUERY_REQUESTED_GTT: 1773 case R600_QUERY_GTT_USAGE: 1774 case R600_QUERY_MAPPED_GTT: 1775 info->max_value.u64 = rscreen->info.gart_size; 1776 break; 1777 case R600_QUERY_GPU_TEMPERATURE: 1778 info->max_value.u64 = 125; 1779 break; 1780 } 1781 1782 if (info->group_id != ~(unsigned)0 && rscreen->perfcounters) 1783 info->group_id += rscreen->perfcounters->num_groups; 1784 1785 return 1; 1786 } 1787 1788 /* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware 1789 * performance counter groups, so be careful when changing this and related 1790 * functions. 1791 */ 1792 static int r600_get_driver_query_group_info(struct pipe_screen *screen, 1793 unsigned index, 1794 struct pipe_driver_query_group_info *info) 1795 { 1796 struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; 1797 unsigned num_pc_groups = 0; 1798 1799 if (rscreen->perfcounters) 1800 num_pc_groups = rscreen->perfcounters->num_groups; 1801 1802 if (!info) 1803 return num_pc_groups + R600_NUM_SW_QUERY_GROUPS; 1804 1805 if (index < num_pc_groups) 1806 return r600_get_perfcounter_group_info(rscreen, index, info); 1807 1808 index -= num_pc_groups; 1809 if (index >= R600_NUM_SW_QUERY_GROUPS) 1810 return 0; 1811 1812 info->name = "GPIN"; 1813 info->max_active_queries = 5; 1814 info->num_queries = 5; 1815 return 1; 1816 } 1817 1818 void r600_query_init(struct r600_common_context *rctx) 1819 { 1820 rctx->b.create_query = r600_create_query; 1821 rctx->b.create_batch_query = r600_create_batch_query; 1822 rctx->b.destroy_query = r600_destroy_query; 1823 rctx->b.begin_query = r600_begin_query; 1824 rctx->b.end_query = r600_end_query; 1825 rctx->b.get_query_result = r600_get_query_result; 1826 rctx->b.get_query_result_resource = r600_get_query_result_resource; 1827 rctx->render_cond_atom.emit = r600_emit_query_predication; 1828 1829 if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0) 1830 rctx->b.render_condition = r600_render_condition; 1831 1832 LIST_INITHEAD(&rctx->active_queries); 1833 } 1834 1835 void r600_init_screen_query_functions(struct r600_common_screen *rscreen) 1836 { 1837 rscreen->b.get_driver_query_info = r600_get_driver_query_info; 1838 rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info; 1839 } 1840