1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Nicolai Hhnle <nicolai.haehnle (at) amd.com> 25 * 26 */ 27 28 #include "util/u_memory.h" 29 #include "r600_query.h" 30 #include "r600_pipe_common.h" 31 #include "r600d_common.h" 32 33 /* Max counters per HW block */ 34 #define R600_QUERY_MAX_COUNTERS 16 35 36 static struct r600_perfcounter_block * 37 lookup_counter(struct r600_perfcounters *pc, unsigned index, 38 unsigned *base_gid, unsigned *sub_index) 39 { 40 struct r600_perfcounter_block *block = pc->blocks; 41 unsigned bid; 42 43 *base_gid = 0; 44 for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { 45 unsigned total = block->num_groups * block->num_selectors; 46 47 if (index < total) { 48 *sub_index = index; 49 return block; 50 } 51 52 index -= total; 53 *base_gid += block->num_groups; 54 } 55 56 return NULL; 57 } 58 59 static struct r600_perfcounter_block * 60 lookup_group(struct r600_perfcounters *pc, unsigned *index) 61 { 62 unsigned bid; 63 struct r600_perfcounter_block *block = pc->blocks; 64 65 for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { 66 if (*index < block->num_groups) 67 return block; 68 *index -= block->num_groups; 69 } 70 71 return NULL; 72 } 73 74 struct r600_pc_group { 75 struct r600_pc_group *next; 76 struct r600_perfcounter_block *block; 77 unsigned sub_gid; /* only used during init */ 78 unsigned result_base; /* only used during init */ 79 int se; 80 int instance; 81 unsigned num_counters; 82 unsigned selectors[R600_QUERY_MAX_COUNTERS]; 83 }; 84 85 struct r600_pc_counter { 86 unsigned base; 87 unsigned qwords; 88 unsigned stride; /* in uint64s */ 89 }; 90 91 #define R600_PC_SHADERS_WINDOWING (1 << 31) 92 93 struct r600_query_pc { 94 struct r600_query_hw b; 95 96 unsigned shaders; 97 unsigned num_counters; 98 struct r600_pc_counter *counters; 99 struct r600_pc_group *groups; 100 }; 101 102 static void r600_pc_query_destroy(struct r600_common_screen *rscreen, 103 struct r600_query *rquery) 104 { 105 struct r600_query_pc *query = (struct r600_query_pc *)rquery; 106 107 while (query->groups) { 108 struct r600_pc_group *group = query->groups; 109 query->groups = group->next; 110 FREE(group); 111 } 112 113 FREE(query->counters); 114 115 r600_query_hw_destroy(rscreen, rquery); 116 } 117 118 static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen, 119 struct r600_query_hw *hwquery, 120 struct r600_resource *buffer) 121 { 122 /* no-op */ 123 return true; 124 } 125 126 static void r600_pc_query_emit_start(struct r600_common_context *ctx, 127 struct r600_query_hw *hwquery, 128 struct r600_resource *buffer, uint64_t va) 129 { 130 struct r600_perfcounters *pc = ctx->screen->perfcounters; 131 struct r600_query_pc *query = (struct r600_query_pc *)hwquery; 132 struct r600_pc_group *group; 133 int current_se = -1; 134 int current_instance = -1; 135 136 if (query->shaders) 137 pc->emit_shaders(ctx, query->shaders); 138 139 for (group = query->groups; group; group = group->next) { 140 struct r600_perfcounter_block *block = group->block; 141 142 if (group->se != current_se || group->instance != current_instance) { 143 current_se = group->se; 144 current_instance = group->instance; 145 pc->emit_instance(ctx, group->se, group->instance); 146 } 147 148 pc->emit_select(ctx, block, group->num_counters, group->selectors); 149 } 150 151 if (current_se != -1 || current_instance != -1) 152 pc->emit_instance(ctx, -1, -1); 153 154 pc->emit_start(ctx, buffer, va); 155 } 156 157 static void r600_pc_query_emit_stop(struct r600_common_context *ctx, 158 struct r600_query_hw *hwquery, 159 struct r600_resource *buffer, uint64_t va) 160 { 161 struct r600_perfcounters *pc = ctx->screen->perfcounters; 162 struct r600_query_pc *query = (struct r600_query_pc *)hwquery; 163 struct r600_pc_group *group; 164 165 pc->emit_stop(ctx, buffer, va); 166 167 for (group = query->groups; group; group = group->next) { 168 struct r600_perfcounter_block *block = group->block; 169 unsigned se = group->se >= 0 ? group->se : 0; 170 unsigned se_end = se + 1; 171 172 if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0)) 173 se_end = ctx->screen->info.max_se; 174 175 do { 176 unsigned instance = group->instance >= 0 ? group->instance : 0; 177 178 do { 179 pc->emit_instance(ctx, se, instance); 180 pc->emit_read(ctx, block, 181 group->num_counters, group->selectors, 182 buffer, va); 183 va += sizeof(uint64_t) * group->num_counters; 184 } while (group->instance < 0 && ++instance < block->num_instances); 185 } while (++se < se_end); 186 } 187 188 pc->emit_instance(ctx, -1, -1); 189 } 190 191 static void r600_pc_query_clear_result(struct r600_query_hw *hwquery, 192 union pipe_query_result *result) 193 { 194 struct r600_query_pc *query = (struct r600_query_pc *)hwquery; 195 196 memset(result, 0, sizeof(result->batch[0]) * query->num_counters); 197 } 198 199 static void r600_pc_query_add_result(struct r600_common_screen *rscreen, 200 struct r600_query_hw *hwquery, 201 void *buffer, 202 union pipe_query_result *result) 203 { 204 struct r600_query_pc *query = (struct r600_query_pc *)hwquery; 205 uint64_t *results = buffer; 206 unsigned i, j; 207 208 for (i = 0; i < query->num_counters; ++i) { 209 struct r600_pc_counter *counter = &query->counters[i]; 210 211 for (j = 0; j < counter->qwords; ++j) { 212 uint32_t value = results[counter->base + j * counter->stride]; 213 result->batch[i].u64 += value; 214 } 215 } 216 } 217 218 static struct r600_query_ops batch_query_ops = { 219 .destroy = r600_pc_query_destroy, 220 .begin = r600_query_hw_begin, 221 .end = r600_query_hw_end, 222 .get_result = r600_query_hw_get_result 223 }; 224 225 static struct r600_query_hw_ops batch_query_hw_ops = { 226 .prepare_buffer = r600_pc_query_prepare_buffer, 227 .emit_start = r600_pc_query_emit_start, 228 .emit_stop = r600_pc_query_emit_stop, 229 .clear_result = r600_pc_query_clear_result, 230 .add_result = r600_pc_query_add_result, 231 }; 232 233 static struct r600_pc_group *get_group_state(struct r600_common_screen *screen, 234 struct r600_query_pc *query, 235 struct r600_perfcounter_block *block, 236 unsigned sub_gid) 237 { 238 struct r600_pc_group *group = query->groups; 239 240 while (group) { 241 if (group->block == block && group->sub_gid == sub_gid) 242 return group; 243 group = group->next; 244 } 245 246 group = CALLOC_STRUCT(r600_pc_group); 247 if (!group) 248 return NULL; 249 250 group->block = block; 251 group->sub_gid = sub_gid; 252 253 if (block->flags & R600_PC_BLOCK_SHADER) { 254 unsigned sub_gids = block->num_instances; 255 unsigned shader_id; 256 unsigned shaders; 257 unsigned query_shaders; 258 259 if (block->flags & R600_PC_BLOCK_SE_GROUPS) 260 sub_gids = sub_gids * screen->info.max_se; 261 shader_id = sub_gid / sub_gids; 262 sub_gid = sub_gid % sub_gids; 263 264 shaders = screen->perfcounters->shader_type_bits[shader_id]; 265 266 query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING; 267 if (query_shaders && query_shaders != shaders) { 268 fprintf(stderr, "r600_perfcounter: incompatible shader groups\n"); 269 FREE(group); 270 return NULL; 271 } 272 query->shaders = shaders; 273 } 274 275 if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) { 276 // A non-zero value in query->shaders ensures that the shader 277 // masking is reset unless the user explicitly requests one. 278 query->shaders = R600_PC_SHADERS_WINDOWING; 279 } 280 281 if (block->flags & R600_PC_BLOCK_SE_GROUPS) { 282 group->se = sub_gid / block->num_instances; 283 sub_gid = sub_gid % block->num_instances; 284 } else { 285 group->se = -1; 286 } 287 288 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) { 289 group->instance = sub_gid; 290 } else { 291 group->instance = -1; 292 } 293 294 group->next = query->groups; 295 query->groups = group; 296 297 return group; 298 } 299 300 struct pipe_query *r600_create_batch_query(struct pipe_context *ctx, 301 unsigned num_queries, 302 unsigned *query_types) 303 { 304 struct r600_common_screen *screen = 305 (struct r600_common_screen *)ctx->screen; 306 struct r600_perfcounters *pc = screen->perfcounters; 307 struct r600_perfcounter_block *block; 308 struct r600_pc_group *group; 309 struct r600_query_pc *query; 310 unsigned base_gid, sub_gid, sub_index; 311 unsigned i, j; 312 313 if (!pc) 314 return NULL; 315 316 query = CALLOC_STRUCT(r600_query_pc); 317 if (!query) 318 return NULL; 319 320 query->b.b.ops = &batch_query_ops; 321 query->b.ops = &batch_query_hw_ops; 322 323 query->num_counters = num_queries; 324 325 /* Collect selectors per group */ 326 for (i = 0; i < num_queries; ++i) { 327 unsigned sub_gid; 328 329 if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER) 330 goto error; 331 332 block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER, 333 &base_gid, &sub_index); 334 if (!block) 335 goto error; 336 337 sub_gid = sub_index / block->num_selectors; 338 sub_index = sub_index % block->num_selectors; 339 340 group = get_group_state(screen, query, block, sub_gid); 341 if (!group) 342 goto error; 343 344 if (group->num_counters >= block->num_counters) { 345 fprintf(stderr, 346 "perfcounter group %s: too many selected\n", 347 block->basename); 348 goto error; 349 } 350 group->selectors[group->num_counters] = sub_index; 351 ++group->num_counters; 352 } 353 354 /* Compute result bases and CS size per group */ 355 query->b.num_cs_dw_begin = pc->num_start_cs_dwords; 356 query->b.num_cs_dw_end = pc->num_stop_cs_dwords; 357 358 query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */ 359 query->b.num_cs_dw_end += pc->num_instance_cs_dwords; 360 361 i = 0; 362 for (group = query->groups; group; group = group->next) { 363 struct r600_perfcounter_block *block = group->block; 364 unsigned select_dw, read_dw; 365 unsigned instances = 1; 366 367 if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0) 368 instances = screen->info.max_se; 369 if (group->instance < 0) 370 instances *= block->num_instances; 371 372 group->result_base = i; 373 query->b.result_size += sizeof(uint64_t) * instances * group->num_counters; 374 i += instances * group->num_counters; 375 376 pc->get_size(block, group->num_counters, group->selectors, 377 &select_dw, &read_dw); 378 query->b.num_cs_dw_begin += select_dw; 379 query->b.num_cs_dw_end += instances * read_dw; 380 query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */ 381 query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords; 382 } 383 384 if (query->shaders) { 385 if (query->shaders == R600_PC_SHADERS_WINDOWING) 386 query->shaders = 0xffffffff; 387 query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords; 388 } 389 390 /* Map user-supplied query array to result indices */ 391 query->counters = CALLOC(num_queries, sizeof(*query->counters)); 392 for (i = 0; i < num_queries; ++i) { 393 struct r600_pc_counter *counter = &query->counters[i]; 394 struct r600_perfcounter_block *block; 395 396 block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER, 397 &base_gid, &sub_index); 398 399 sub_gid = sub_index / block->num_selectors; 400 sub_index = sub_index % block->num_selectors; 401 402 group = get_group_state(screen, query, block, sub_gid); 403 assert(group != NULL); 404 405 for (j = 0; j < group->num_counters; ++j) { 406 if (group->selectors[j] == sub_index) 407 break; 408 } 409 410 counter->base = group->result_base + j; 411 counter->stride = group->num_counters; 412 413 counter->qwords = 1; 414 if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0) 415 counter->qwords = screen->info.max_se; 416 if (group->instance < 0) 417 counter->qwords *= block->num_instances; 418 } 419 420 if (!r600_query_hw_init(screen, &query->b)) 421 goto error; 422 423 return (struct pipe_query *)query; 424 425 error: 426 r600_pc_query_destroy(screen, &query->b.b); 427 return NULL; 428 } 429 430 static bool r600_init_block_names(struct r600_common_screen *screen, 431 struct r600_perfcounter_block *block) 432 { 433 unsigned i, j, k; 434 unsigned groups_shader = 1, groups_se = 1, groups_instance = 1; 435 unsigned namelen; 436 char *groupname; 437 char *p; 438 439 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) 440 groups_instance = block->num_instances; 441 if (block->flags & R600_PC_BLOCK_SE_GROUPS) 442 groups_se = screen->info.max_se; 443 if (block->flags & R600_PC_BLOCK_SHADER) 444 groups_shader = screen->perfcounters->num_shader_types; 445 446 namelen = strlen(block->basename); 447 block->group_name_stride = namelen + 1; 448 if (block->flags & R600_PC_BLOCK_SHADER) 449 block->group_name_stride += 3; 450 if (block->flags & R600_PC_BLOCK_SE_GROUPS) { 451 assert(groups_se <= 10); 452 block->group_name_stride += 1; 453 454 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) 455 block->group_name_stride += 1; 456 } 457 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) { 458 assert(groups_instance <= 100); 459 block->group_name_stride += 2; 460 } 461 462 block->group_names = MALLOC(block->num_groups * block->group_name_stride); 463 if (!block->group_names) 464 return false; 465 466 groupname = block->group_names; 467 for (i = 0; i < groups_shader; ++i) { 468 const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i]; 469 unsigned shaderlen = strlen(shader_suffix); 470 for (j = 0; j < groups_se; ++j) { 471 for (k = 0; k < groups_instance; ++k) { 472 strcpy(groupname, block->basename); 473 p = groupname + namelen; 474 475 if (block->flags & R600_PC_BLOCK_SHADER) { 476 strcpy(p, shader_suffix); 477 p += shaderlen; 478 } 479 480 if (block->flags & R600_PC_BLOCK_SE_GROUPS) { 481 p += sprintf(p, "%d", j); 482 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) 483 *p++ = '_'; 484 } 485 486 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) 487 p += sprintf(p, "%d", k); 488 489 groupname += block->group_name_stride; 490 } 491 } 492 } 493 494 assert(block->num_selectors <= 1000); 495 block->selector_name_stride = block->group_name_stride + 4; 496 block->selector_names = MALLOC(block->num_groups * block->num_selectors * 497 block->selector_name_stride); 498 if (!block->selector_names) 499 return false; 500 501 groupname = block->group_names; 502 p = block->selector_names; 503 for (i = 0; i < block->num_groups; ++i) { 504 for (j = 0; j < block->num_selectors; ++j) { 505 sprintf(p, "%s_%03d", groupname, j); 506 p += block->selector_name_stride; 507 } 508 groupname += block->group_name_stride; 509 } 510 511 return true; 512 } 513 514 int r600_get_perfcounter_info(struct r600_common_screen *screen, 515 unsigned index, 516 struct pipe_driver_query_info *info) 517 { 518 struct r600_perfcounters *pc = screen->perfcounters; 519 struct r600_perfcounter_block *block; 520 unsigned base_gid, sub; 521 522 if (!pc) 523 return 0; 524 525 if (!info) { 526 unsigned bid, num_queries = 0; 527 528 for (bid = 0; bid < pc->num_blocks; ++bid) { 529 num_queries += pc->blocks[bid].num_selectors * 530 pc->blocks[bid].num_groups; 531 } 532 533 return num_queries; 534 } 535 536 block = lookup_counter(pc, index, &base_gid, &sub); 537 if (!block) 538 return 0; 539 540 if (!block->selector_names) { 541 if (!r600_init_block_names(screen, block)) 542 return 0; 543 } 544 info->name = block->selector_names + sub * block->selector_name_stride; 545 info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index; 546 info->max_value.u64 = 0; 547 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; 548 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE; 549 info->group_id = base_gid + sub / block->num_selectors; 550 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; 551 if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups) 552 info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST; 553 return 1; 554 } 555 556 int r600_get_perfcounter_group_info(struct r600_common_screen *screen, 557 unsigned index, 558 struct pipe_driver_query_group_info *info) 559 { 560 struct r600_perfcounters *pc = screen->perfcounters; 561 struct r600_perfcounter_block *block; 562 563 if (!pc) 564 return 0; 565 566 if (!info) 567 return pc->num_groups; 568 569 block = lookup_group(pc, &index); 570 if (!block) 571 return 0; 572 573 if (!block->group_names) { 574 if (!r600_init_block_names(screen, block)) 575 return 0; 576 } 577 info->name = block->group_names + index * block->group_name_stride; 578 info->num_queries = block->num_selectors; 579 info->max_active_queries = block->num_counters; 580 return 1; 581 } 582 583 void r600_perfcounters_destroy(struct r600_common_screen *rscreen) 584 { 585 if (rscreen->perfcounters) 586 rscreen->perfcounters->cleanup(rscreen); 587 } 588 589 bool r600_perfcounters_init(struct r600_perfcounters *pc, 590 unsigned num_blocks) 591 { 592 pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block)); 593 if (!pc->blocks) 594 return false; 595 596 pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false); 597 pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false); 598 599 return true; 600 } 601 602 void r600_perfcounters_add_block(struct r600_common_screen *rscreen, 603 struct r600_perfcounters *pc, 604 const char *name, unsigned flags, 605 unsigned counters, unsigned selectors, 606 unsigned instances, void *data) 607 { 608 struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks]; 609 610 assert(counters <= R600_QUERY_MAX_COUNTERS); 611 612 block->basename = name; 613 block->flags = flags; 614 block->num_counters = counters; 615 block->num_selectors = selectors; 616 block->num_instances = MAX2(instances, 1); 617 block->data = data; 618 619 if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE)) 620 block->flags |= R600_PC_BLOCK_SE_GROUPS; 621 if (pc->separate_instance && block->num_instances > 1) 622 block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS; 623 624 if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) { 625 block->num_groups = block->num_instances; 626 } else { 627 block->num_groups = 1; 628 } 629 630 if (block->flags & R600_PC_BLOCK_SE_GROUPS) 631 block->num_groups *= rscreen->info.max_se; 632 if (block->flags & R600_PC_BLOCK_SHADER) 633 block->num_groups *= pc->num_shader_types; 634 635 ++pc->num_blocks; 636 pc->num_groups += block->num_groups; 637 } 638 639 void r600_perfcounters_do_destroy(struct r600_perfcounters *pc) 640 { 641 unsigned i; 642 643 for (i = 0; i < pc->num_blocks; ++i) { 644 FREE(pc->blocks[i].group_names); 645 FREE(pc->blocks[i].selector_names); 646 } 647 FREE(pc->blocks); 648 FREE(pc); 649 } 650