1 /* 2 * Copyright 2015 Samuel Pitoiset 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include "nvc0/nvc0_context.h" 24 #include "nvc0/nvc0_query_hw_metric.h" 25 #include "nvc0/nvc0_query_hw_sm.h" 26 27 #define _Q(i,n,t,d) { NVC0_HW_METRIC_QUERY_##i, n, PIPE_DRIVER_QUERY_TYPE_##t, d } 28 static const struct nvc0_hw_metric_cfg { 29 unsigned id; 30 const char *name; 31 enum pipe_driver_query_type type; 32 const char *desc; 33 } nvc0_hw_metric_queries[] = { 34 _Q(ACHIEVED_OCCUPANCY, 35 "metric-achieved_occupancy", 36 PERCENTAGE, 37 "Ratio of the average active warps per active cycle to the maximum " 38 "number of warps supported on a multiprocessor"), 39 40 _Q(BRANCH_EFFICIENCY, 41 "metric-branch_efficiency", 42 PERCENTAGE, 43 "Ratio of non-divergent branches to total branches"), 44 45 _Q(INST_ISSUED, 46 "metric-inst_issued", 47 UINT64, 48 "The number of instructions issued"), 49 50 _Q(INST_PER_WRAP, 51 "metric-inst_per_wrap", 52 UINT64, 53 "Average number of instructions executed by each warp"), 54 55 _Q(INST_REPLAY_OVERHEAD, 56 "metric-inst_replay_overhead", 57 UINT64, 58 "Average number of replays for each instruction executed"), 59 60 _Q(ISSUED_IPC, 61 "metric-issued_ipc", 62 UINT64, 63 "Instructions issued per cycle"), 64 65 _Q(ISSUE_SLOTS, 66 "metric-issue_slots", 67 UINT64, 68 "The number of issue slots used"), 69 70 _Q(ISSUE_SLOT_UTILIZATION, 71 "metric-issue_slot_utilization", 72 PERCENTAGE, 73 "Percentage of issue slots that issued at least one instruction, " 74 "averaged across all cycles"), 75 76 _Q(IPC, 77 "metric-ipc", 78 UINT64, 79 "Instructions executed per cycle"), 80 81 _Q(SHARED_REPLAY_OVERHEAD, 82 "metric-shared_replay_overhead", 83 UINT64, 84 "Average number of replays due to shared memory conflicts for each " 85 "instruction executed"), 86 87 _Q(WARP_EXECUTION_EFFICIENCY, 88 "metric-warp_execution_efficiency", 89 PERCENTAGE, 90 "Ratio of the average active threads per warp to the maximum number of " 91 "threads per warp supported on a multiprocessor"), 92 93 _Q(WARP_NONPRED_EXECUTION_EFFICIENCY, 94 "metric-warp_nonpred_execution_efficiency", 95 PERCENTAGE, 96 "Ratio of the average active threads per warp executing non-predicated " 97 "instructions to the maximum number of threads per warp supported on a " 98 "multiprocessor"), 99 }; 100 101 #undef _Q 102 103 static inline const struct nvc0_hw_metric_cfg * 104 nvc0_hw_metric_get_cfg(unsigned metric_id) 105 { 106 unsigned i; 107 108 for (i = 0; i < ARRAY_SIZE(nvc0_hw_metric_queries); i++) { 109 if (nvc0_hw_metric_queries[i].id == metric_id) 110 return &nvc0_hw_metric_queries[i]; 111 } 112 assert(0); 113 return NULL; 114 } 115 116 struct nvc0_hw_metric_query_cfg { 117 unsigned type; 118 uint32_t queries[8]; 119 uint32_t num_queries; 120 }; 121 122 #define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n) 123 124 /* ==== Compute capability 2.0 (GF100/GF110) ==== */ 125 static const struct nvc0_hw_metric_query_cfg 126 sm20_achieved_occupancy = 127 { 128 .type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY, 129 .queries[0] = _SM(ACTIVE_WARPS), 130 .queries[1] = _SM(ACTIVE_CYCLES), 131 .num_queries = 2, 132 }; 133 134 static const struct nvc0_hw_metric_query_cfg 135 sm20_branch_efficiency = 136 { 137 .type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY, 138 .queries[0] = _SM(BRANCH), 139 .queries[1] = _SM(DIVERGENT_BRANCH), 140 .num_queries = 2, 141 }; 142 143 static const struct nvc0_hw_metric_query_cfg 144 sm20_inst_per_wrap = 145 { 146 .type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP, 147 .queries[0] = _SM(INST_EXECUTED), 148 .queries[1] = _SM(WARPS_LAUNCHED), 149 .num_queries = 2, 150 }; 151 152 static const struct nvc0_hw_metric_query_cfg 153 sm20_inst_replay_overhead = 154 { 155 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD, 156 .queries[0] = _SM(INST_ISSUED), 157 .queries[1] = _SM(INST_EXECUTED), 158 .num_queries = 2, 159 }; 160 161 static const struct nvc0_hw_metric_query_cfg 162 sm20_issued_ipc = 163 { 164 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC, 165 .queries[0] = _SM(INST_ISSUED), 166 .queries[1] = _SM(ACTIVE_CYCLES), 167 .num_queries = 2, 168 }; 169 170 static const struct nvc0_hw_metric_query_cfg 171 sm20_issue_slot_utilization = 172 { 173 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION, 174 .queries[0] = _SM(INST_ISSUED), 175 .queries[1] = _SM(ACTIVE_CYCLES), 176 .num_queries = 2, 177 }; 178 179 static const struct nvc0_hw_metric_query_cfg 180 sm20_ipc = 181 { 182 .type = NVC0_HW_METRIC_QUERY_IPC, 183 .queries[0] = _SM(INST_EXECUTED), 184 .queries[1] = _SM(ACTIVE_CYCLES), 185 .num_queries = 2, 186 }; 187 188 static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] = 189 { 190 &sm20_achieved_occupancy, 191 &sm20_branch_efficiency, 192 &sm20_inst_per_wrap, 193 &sm20_inst_replay_overhead, 194 &sm20_ipc, 195 &sm20_issued_ipc, 196 &sm20_issue_slot_utilization, 197 }; 198 199 /* ==== Compute capability 2.1 (GF108+ except GF110) ==== */ 200 static const struct nvc0_hw_metric_query_cfg 201 sm21_inst_issued = 202 { 203 .type = NVC0_HW_METRIC_QUERY_INST_ISSUED, 204 .queries[0] = _SM(INST_ISSUED1_0), 205 .queries[1] = _SM(INST_ISSUED1_1), 206 .queries[2] = _SM(INST_ISSUED2_0), 207 .queries[3] = _SM(INST_ISSUED2_1), 208 .num_queries = 4, 209 }; 210 211 static const struct nvc0_hw_metric_query_cfg 212 sm21_inst_replay_overhead = 213 { 214 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD, 215 .queries[0] = _SM(INST_ISSUED1_0), 216 .queries[1] = _SM(INST_ISSUED1_1), 217 .queries[2] = _SM(INST_ISSUED2_0), 218 .queries[3] = _SM(INST_ISSUED2_1), 219 .queries[4] = _SM(INST_EXECUTED), 220 .num_queries = 5, 221 }; 222 223 static const struct nvc0_hw_metric_query_cfg 224 sm21_issued_ipc = 225 { 226 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC, 227 .queries[0] = _SM(INST_ISSUED1_0), 228 .queries[1] = _SM(INST_ISSUED1_1), 229 .queries[2] = _SM(INST_ISSUED2_0), 230 .queries[3] = _SM(INST_ISSUED2_1), 231 .queries[4] = _SM(ACTIVE_CYCLES), 232 .num_queries = 5, 233 }; 234 235 static const struct nvc0_hw_metric_query_cfg 236 sm21_issue_slots = 237 { 238 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS, 239 .queries[0] = _SM(INST_ISSUED1_0), 240 .queries[1] = _SM(INST_ISSUED1_1), 241 .queries[2] = _SM(INST_ISSUED2_0), 242 .queries[3] = _SM(INST_ISSUED2_1), 243 .num_queries = 4, 244 }; 245 246 static const struct nvc0_hw_metric_query_cfg 247 sm21_issue_slot_utilization = 248 { 249 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION, 250 .queries[0] = _SM(INST_ISSUED1_0), 251 .queries[1] = _SM(INST_ISSUED1_1), 252 .queries[2] = _SM(INST_ISSUED2_0), 253 .queries[3] = _SM(INST_ISSUED2_1), 254 .queries[4] = _SM(ACTIVE_CYCLES), 255 .num_queries = 5, 256 }; 257 258 static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] = 259 { 260 &sm20_achieved_occupancy, 261 &sm20_branch_efficiency, 262 &sm21_inst_issued, 263 &sm20_inst_per_wrap, 264 &sm21_inst_replay_overhead, 265 &sm20_ipc, 266 &sm21_issued_ipc, 267 &sm21_issue_slots, 268 &sm21_issue_slot_utilization, 269 }; 270 271 /* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */ 272 static const struct nvc0_hw_metric_query_cfg 273 sm30_inst_issued = 274 { 275 .type = NVC0_HW_METRIC_QUERY_INST_ISSUED, 276 .queries[0] = _SM(INST_ISSUED1), 277 .queries[1] = _SM(INST_ISSUED2), 278 .num_queries = 2, 279 }; 280 281 static const struct nvc0_hw_metric_query_cfg 282 sm30_inst_replay_overhead = 283 { 284 .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD, 285 .queries[0] = _SM(INST_ISSUED1), 286 .queries[1] = _SM(INST_ISSUED2), 287 .queries[2] = _SM(INST_EXECUTED), 288 .num_queries = 3, 289 }; 290 291 static const struct nvc0_hw_metric_query_cfg 292 sm30_issued_ipc = 293 { 294 .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC, 295 .queries[0] = _SM(INST_ISSUED1), 296 .queries[1] = _SM(INST_ISSUED2), 297 .queries[2] = _SM(ACTIVE_CYCLES), 298 .num_queries = 3, 299 }; 300 301 static const struct nvc0_hw_metric_query_cfg 302 sm30_issue_slots = 303 { 304 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS, 305 .queries[0] = _SM(INST_ISSUED1), 306 .queries[1] = _SM(INST_ISSUED2), 307 .num_queries = 2, 308 }; 309 310 static const struct nvc0_hw_metric_query_cfg 311 sm30_issue_slot_utilization = 312 { 313 .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION, 314 .queries[0] = _SM(INST_ISSUED1), 315 .queries[1] = _SM(INST_ISSUED2), 316 .queries[2] = _SM(ACTIVE_CYCLES), 317 .num_queries = 3, 318 }; 319 320 static const struct nvc0_hw_metric_query_cfg 321 sm30_shared_replay_overhead = 322 { 323 .type = NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD, 324 .queries[0] = _SM(SHARED_LD_REPLAY), 325 .queries[1] = _SM(SHARED_ST_REPLAY), 326 .queries[2] = _SM(INST_EXECUTED), 327 .num_queries = 3, 328 }; 329 330 static const struct nvc0_hw_metric_query_cfg 331 sm30_warp_execution_efficiency = 332 { 333 .type = NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY, 334 .queries[0] = _SM(INST_EXECUTED), 335 .queries[1] = _SM(TH_INST_EXECUTED), 336 .num_queries = 2, 337 }; 338 339 static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] = 340 { 341 &sm20_achieved_occupancy, 342 &sm20_branch_efficiency, 343 &sm30_inst_issued, 344 &sm20_inst_per_wrap, 345 &sm30_inst_replay_overhead, 346 &sm20_ipc, 347 &sm30_issued_ipc, 348 &sm30_issue_slots, 349 &sm30_issue_slot_utilization, 350 &sm30_shared_replay_overhead, 351 &sm30_warp_execution_efficiency, 352 }; 353 354 /* ==== Compute capability 3.5 (GK110/GK208) ==== */ 355 static const struct nvc0_hw_metric_query_cfg 356 sm35_warp_nonpred_execution_efficiency = 357 { 358 .type = NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY, 359 .queries[0] = _SM(INST_EXECUTED), 360 .queries[1] = _SM(NOT_PRED_OFF_INST_EXECUTED), 361 .num_queries = 2, 362 }; 363 364 static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] = 365 { 366 &sm20_achieved_occupancy, 367 &sm30_inst_issued, 368 &sm20_inst_per_wrap, 369 &sm30_inst_replay_overhead, 370 &sm20_ipc, 371 &sm30_issued_ipc, 372 &sm30_issue_slots, 373 &sm30_issue_slot_utilization, 374 &sm30_shared_replay_overhead, 375 &sm30_warp_execution_efficiency, 376 &sm35_warp_nonpred_execution_efficiency, 377 }; 378 379 /* ==== Compute capability 5.0 (GM107/GM108) ==== */ 380 static const struct nvc0_hw_metric_query_cfg *sm50_hw_metric_queries[] = 381 { 382 &sm20_achieved_occupancy, 383 &sm20_branch_efficiency, 384 &sm30_inst_issued, 385 &sm20_inst_per_wrap, 386 &sm30_inst_replay_overhead, 387 &sm20_ipc, 388 &sm30_issued_ipc, 389 &sm30_issue_slots, 390 &sm30_issue_slot_utilization, 391 &sm30_warp_execution_efficiency, 392 &sm35_warp_nonpred_execution_efficiency, 393 }; 394 395 #undef _SM 396 397 static inline const struct nvc0_hw_metric_query_cfg ** 398 nvc0_hw_metric_get_queries(struct nvc0_screen *screen) 399 { 400 struct nouveau_device *dev = screen->base.device; 401 402 switch (screen->base.class_3d) { 403 case GM200_3D_CLASS: 404 case GM107_3D_CLASS: 405 return sm50_hw_metric_queries; 406 case NVF0_3D_CLASS: 407 return sm35_hw_metric_queries; 408 case NVE4_3D_CLASS: 409 return sm30_hw_metric_queries; 410 default: 411 if (dev->chipset == 0xc0 || dev->chipset == 0xc8) 412 return sm20_hw_metric_queries; 413 return sm21_hw_metric_queries; 414 } 415 assert(0); 416 return NULL; 417 } 418 419 unsigned 420 nvc0_hw_metric_get_num_queries(struct nvc0_screen *screen) 421 { 422 struct nouveau_device *dev = screen->base.device; 423 424 switch (screen->base.class_3d) { 425 case GM200_3D_CLASS: 426 case GM107_3D_CLASS: 427 return ARRAY_SIZE(sm50_hw_metric_queries); 428 case NVF0_3D_CLASS: 429 return ARRAY_SIZE(sm35_hw_metric_queries); 430 case NVE4_3D_CLASS: 431 return ARRAY_SIZE(sm30_hw_metric_queries); 432 default: 433 if (dev->chipset == 0xc0 || dev->chipset == 0xc8) 434 return ARRAY_SIZE(sm20_hw_metric_queries); 435 return ARRAY_SIZE(sm21_hw_metric_queries); 436 } 437 return 0; 438 } 439 440 static const struct nvc0_hw_metric_query_cfg * 441 nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) 442 { 443 const struct nvc0_hw_metric_query_cfg **queries; 444 struct nvc0_screen *screen = nvc0->screen; 445 struct nvc0_query *q = &hq->base; 446 unsigned num_queries; 447 unsigned i; 448 449 num_queries = nvc0_hw_metric_get_num_queries(screen); 450 queries = nvc0_hw_metric_get_queries(screen); 451 452 for (i = 0; i < num_queries; i++) { 453 if (NVC0_HW_METRIC_QUERY(queries[i]->type) == q->type) 454 return queries[i]; 455 } 456 assert(0); 457 return NULL; 458 } 459 460 static void 461 nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0, 462 struct nvc0_hw_query *hq) 463 { 464 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); 465 unsigned i; 466 467 for (i = 0; i < hmq->num_queries; i++) 468 if (hmq->queries[i]->funcs->destroy_query) 469 hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]); 470 FREE(hmq); 471 } 472 473 static boolean 474 nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) 475 { 476 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); 477 boolean ret = false; 478 unsigned i; 479 480 for (i = 0; i < hmq->num_queries; i++) { 481 ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]); 482 if (!ret) 483 return ret; 484 } 485 return ret; 486 } 487 488 static void 489 nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) 490 { 491 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); 492 unsigned i; 493 494 for (i = 0; i < hmq->num_queries; i++) 495 hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]); 496 } 497 498 static uint64_t 499 sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) 500 { 501 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { 502 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: 503 /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */ 504 if (res64[1]) 505 return ((res64[0] / (double)res64[1]) / 48) * 100; 506 break; 507 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY: 508 /* (branch / (branch + divergent_branch)) * 100 */ 509 if (res64[0] + res64[1]) 510 return (res64[0] / (double)(res64[0] + res64[1])) * 100; 511 break; 512 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP: 513 /* inst_executed / warps_launched */ 514 if (res64[1]) 515 return res64[0] / (double)res64[1]; 516 break; 517 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD: 518 /* (inst_issued - inst_executed) / inst_executed */ 519 if (res64[1]) 520 return (res64[0] - res64[1]) / (double)res64[1]; 521 break; 522 case NVC0_HW_METRIC_QUERY_ISSUED_IPC: 523 /* inst_issued / active_cycles */ 524 if (res64[1]) 525 return res64[0] / (double)res64[1]; 526 break; 527 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION: 528 /* ((inst_issued / 2) / active_cycles) * 100 */ 529 if (res64[1]) 530 return ((res64[0] / 2) / (double)res64[1]) * 100; 531 break; 532 case NVC0_HW_METRIC_QUERY_IPC: 533 /* inst_executed / active_cycles */ 534 if (res64[1]) 535 return res64[0] / (double)res64[1]; 536 break; 537 default: 538 debug_printf("invalid metric type: %d\n", 539 hq->base.type - NVC0_HW_METRIC_QUERY(0)); 540 break; 541 } 542 return 0; 543 } 544 545 static uint64_t 546 sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) 547 { 548 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { 549 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: 550 return sm20_hw_metric_calc_result(hq, res64); 551 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY: 552 return sm20_hw_metric_calc_result(hq, res64); 553 case NVC0_HW_METRIC_QUERY_INST_ISSUED: 554 /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */ 555 return res64[0] + res64[1] + (res64[2] + res64[3]) * 2; 556 break; 557 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP: 558 return sm20_hw_metric_calc_result(hq, res64); 559 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD: 560 /* (metric-inst_issued - inst_executed) / inst_executed */ 561 if (res64[4]) 562 return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) - 563 res64[4]) / (double)res64[4]); 564 break; 565 case NVC0_HW_METRIC_QUERY_ISSUED_IPC: 566 /* metric-inst_issued / active_cycles */ 567 if (res64[4]) 568 return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) / 569 (double)res64[4]; 570 break; 571 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS: 572 /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */ 573 return res64[0] + res64[1] + res64[2] + res64[3]; 574 break; 575 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION: 576 /* ((metric-issue_slots / 2) / active_cycles) * 100 */ 577 if (res64[4]) 578 return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) / 579 (double)res64[4]) * 100; 580 break; 581 case NVC0_HW_METRIC_QUERY_IPC: 582 return sm20_hw_metric_calc_result(hq, res64); 583 default: 584 debug_printf("invalid metric type: %d\n", 585 hq->base.type - NVC0_HW_METRIC_QUERY(0)); 586 break; 587 } 588 return 0; 589 } 590 591 static uint64_t 592 sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) 593 { 594 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { 595 case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: 596 /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */ 597 if (res64[1]) 598 return ((res64[0] / (double)res64[1]) / 64) * 100; 599 break; 600 case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY: 601 return sm20_hw_metric_calc_result(hq, res64); 602 case NVC0_HW_METRIC_QUERY_INST_ISSUED: 603 /* inst_issued1 + inst_issued2 * 2 */ 604 return res64[0] + res64[1] * 2; 605 case NVC0_HW_METRIC_QUERY_INST_PER_WRAP: 606 return sm20_hw_metric_calc_result(hq, res64); 607 case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD: 608 /* (metric-inst_issued - inst_executed) / inst_executed */ 609 if (res64[2]) 610 return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]); 611 break; 612 case NVC0_HW_METRIC_QUERY_ISSUED_IPC: 613 /* metric-inst_issued / active_cycles */ 614 if (res64[2]) 615 return (res64[0] + res64[1] * 2) / (double)res64[2]; 616 break; 617 case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS: 618 /* inst_issued1 + inst_issued2 */ 619 return res64[0] + res64[1]; 620 case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION: 621 /* ((metric-issue_slots / 2) / active_cycles) * 100 */ 622 if (res64[2]) 623 return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100; 624 break; 625 case NVC0_HW_METRIC_QUERY_IPC: 626 return sm20_hw_metric_calc_result(hq, res64); 627 case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD: 628 /* (shared_load_replay + shared_store_replay) / inst_executed */ 629 if (res64[2]) 630 return (res64[0] + res64[1]) / (double)res64[2]; 631 break; 632 case NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY: 633 /* thread_inst_executed / (inst_executed * max. number of threads per 634 * wrap) * 100 */ 635 if (res64[0]) 636 return (res64[1] / ((double)res64[0] * 32)) * 100; 637 break; 638 default: 639 debug_printf("invalid metric type: %d\n", 640 hq->base.type - NVC0_HW_METRIC_QUERY(0)); 641 break; 642 } 643 return 0; 644 } 645 646 static uint64_t 647 sm35_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) 648 { 649 switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { 650 case NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY: 651 /* not_predicated_off_thread_inst_executed / (inst_executed * max. number 652 * of threads per wrap) * 100 */ 653 if (res64[0]) 654 return (res64[1] / ((double)res64[0] * 32)) * 100; 655 break; 656 default: 657 return sm30_hw_metric_calc_result(hq, res64); 658 } 659 return 0; 660 } 661 662 static boolean 663 nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0, 664 struct nvc0_hw_query *hq, boolean wait, 665 union pipe_query_result *result) 666 { 667 struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); 668 struct nvc0_screen *screen = nvc0->screen; 669 struct nouveau_device *dev = screen->base.device; 670 union pipe_query_result results[8] = {}; 671 uint64_t res64[8] = {}; 672 uint64_t value = 0; 673 boolean ret = false; 674 unsigned i; 675 676 for (i = 0; i < hmq->num_queries; i++) { 677 ret = hmq->queries[i]->funcs->get_query_result(nvc0, hmq->queries[i], 678 wait, &results[i]); 679 if (!ret) 680 return ret; 681 res64[i] = *(uint64_t *)&results[i]; 682 } 683 684 switch (screen->base.class_3d) { 685 case GM200_3D_CLASS: 686 case GM107_3D_CLASS: 687 case NVF0_3D_CLASS: 688 value = sm35_hw_metric_calc_result(hq, res64); 689 break; 690 case NVE4_3D_CLASS: 691 value = sm30_hw_metric_calc_result(hq, res64); 692 break; 693 default: 694 if (dev->chipset == 0xc0 || dev->chipset == 0xc8) 695 value = sm20_hw_metric_calc_result(hq, res64); 696 else 697 value = sm21_hw_metric_calc_result(hq, res64); 698 break; 699 } 700 701 *(uint64_t *)result = value; 702 return ret; 703 } 704 705 static const struct nvc0_hw_query_funcs hw_metric_query_funcs = { 706 .destroy_query = nvc0_hw_metric_destroy_query, 707 .begin_query = nvc0_hw_metric_begin_query, 708 .end_query = nvc0_hw_metric_end_query, 709 .get_query_result = nvc0_hw_metric_get_query_result, 710 }; 711 712 struct nvc0_hw_query * 713 nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type) 714 { 715 const struct nvc0_hw_metric_query_cfg *cfg; 716 struct nvc0_hw_metric_query *hmq; 717 struct nvc0_hw_query *hq; 718 unsigned i; 719 720 if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST) 721 return NULL; 722 723 hmq = CALLOC_STRUCT(nvc0_hw_metric_query); 724 if (!hmq) 725 return NULL; 726 727 hq = &hmq->base; 728 hq->funcs = &hw_metric_query_funcs; 729 hq->base.type = type; 730 731 cfg = nvc0_hw_metric_query_get_cfg(nvc0, hq); 732 733 for (i = 0; i < cfg->num_queries; i++) { 734 hmq->queries[i] = nvc0_hw_sm_create_query(nvc0, cfg->queries[i]); 735 if (!hmq->queries[i]) { 736 nvc0_hw_metric_destroy_query(nvc0, hq); 737 return NULL; 738 } 739 hmq->num_queries++; 740 } 741 742 return hq; 743 } 744 745 int 746 nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id, 747 struct pipe_driver_query_info *info) 748 { 749 int count = 0; 750 751 if (screen->base.drm->version >= 0x01000101) { 752 if (screen->compute) 753 count = nvc0_hw_metric_get_num_queries(screen); 754 } 755 756 if (!info) 757 return count; 758 759 if (id < count) { 760 if (screen->compute) { 761 if (screen->base.class_3d <= GM200_3D_CLASS) { 762 const struct nvc0_hw_metric_query_cfg **queries = 763 nvc0_hw_metric_get_queries(screen); 764 const struct nvc0_hw_metric_cfg *cfg = 765 nvc0_hw_metric_get_cfg(queries[id]->type); 766 767 info->name = cfg->name; 768 info->query_type = NVC0_HW_METRIC_QUERY(queries[id]->type); 769 info->type = cfg->type; 770 info->group_id = NVC0_HW_METRIC_QUERY_GROUP; 771 return 1; 772 } 773 } 774 } 775 return 0; 776 } 777