1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 2012-2013 LunarG, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Chia-I Wu <olv (at) lunarg.com> 26 */ 27 28 #include <stdlib.h> /* for qsort() */ 29 #include "toy_compiler.h" 30 #include "toy_legalize.h" 31 32 /** 33 * Live interval of a VRF register. 34 */ 35 struct linear_scan_live_interval { 36 int vrf; 37 int startpoint; 38 int endpoint; 39 40 /* 41 * should this be assigned a consecutive register of the previous 42 * interval's? 43 */ 44 bool consecutive; 45 46 int reg; 47 48 struct list_head list; 49 }; 50 51 /** 52 * Linear scan. 53 */ 54 struct linear_scan { 55 struct linear_scan_live_interval *intervals; 56 int max_vrf, num_vrfs; 57 58 int num_regs; 59 60 struct list_head active_list; 61 int *free_regs; 62 int num_free_regs; 63 64 int *vrf_mapping; 65 }; 66 67 /** 68 * Return a chunk of registers to the free register pool. 69 */ 70 static void 71 linear_scan_free_regs(struct linear_scan *ls, int reg, int count) 72 { 73 unsigned i; 74 75 for (i = 0; i < count; i++) 76 ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i; 77 } 78 79 static int 80 linear_scan_compare_regs(const void *elem1, const void *elem2) 81 { 82 const int *reg1 = elem1; 83 const int *reg2 = elem2; 84 85 /* in reverse order */ 86 return (*reg2 - *reg1); 87 } 88 89 /** 90 * Allocate a chunk of registers from the free register pool. 91 */ 92 static int 93 linear_scan_allocate_regs(struct linear_scan *ls, int count) 94 { 95 bool sorted = false; 96 int reg; 97 98 /* simple cases */ 99 if (count > ls->num_free_regs) 100 return -1; 101 else if (count == 1) 102 return ls->free_regs[--ls->num_free_regs]; 103 104 /* TODO a free register pool */ 105 /* TODO reserve some regs for spilling */ 106 while (true) { 107 bool found = false; 108 int start; 109 110 /* 111 * find a chunk of registers that have consecutive register 112 * numbers 113 */ 114 for (start = ls->num_free_regs - 1; start >= count - 1; start--) { 115 int i; 116 117 for (i = 1; i < count; i++) { 118 if (ls->free_regs[start - i] != ls->free_regs[start] + i) 119 break; 120 } 121 122 if (i >= count) { 123 found = true; 124 break; 125 } 126 } 127 128 if (found) { 129 reg = ls->free_regs[start]; 130 131 if (start != ls->num_free_regs - 1) { 132 start++; 133 memmove(&ls->free_regs[start - count], 134 &ls->free_regs[start], 135 sizeof(*ls->free_regs) * (ls->num_free_regs - start)); 136 } 137 ls->num_free_regs -= count; 138 break; 139 } 140 else if (!sorted) { 141 /* sort and retry */ 142 qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs), 143 linear_scan_compare_regs); 144 sorted = true; 145 } 146 else { 147 /* failed */ 148 reg = -1; 149 break; 150 } 151 } 152 153 return reg; 154 } 155 156 /** 157 * Add an interval to the active list. 158 */ 159 static void 160 linear_scan_add_active(struct linear_scan *ls, 161 struct linear_scan_live_interval *interval) 162 { 163 struct linear_scan_live_interval *pos; 164 165 /* keep the active list sorted by endpoints */ 166 LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) { 167 if (pos->endpoint >= interval->endpoint) 168 break; 169 } 170 171 list_addtail(&interval->list, &pos->list); 172 } 173 174 /** 175 * Remove an interval from the active list. 176 */ 177 static void 178 linear_scan_remove_active(struct linear_scan *ls, 179 struct linear_scan_live_interval *interval) 180 { 181 list_del(&interval->list); 182 } 183 184 /** 185 * Remove intervals that are no longer active from the active list. 186 */ 187 static void 188 linear_scan_expire_active(struct linear_scan *ls, int pc) 189 { 190 struct linear_scan_live_interval *interval, *next; 191 192 LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) { 193 /* 194 * since we sort intervals on the active list by their endpoints, we 195 * know that this and the rest of the intervals are still active. 196 */ 197 if (interval->endpoint >= pc) 198 break; 199 200 linear_scan_remove_active(ls, interval); 201 202 /* recycle the reg */ 203 linear_scan_free_regs(ls, interval->reg, 1); 204 } 205 } 206 207 /** 208 * Spill an interval. 209 */ 210 static void 211 linear_scan_spill(struct linear_scan *ls, 212 struct linear_scan_live_interval *interval, 213 bool is_active) 214 { 215 assert(!"no spilling support"); 216 } 217 218 /** 219 * Spill a range of intervals. 220 */ 221 static void 222 linear_scan_spill_range(struct linear_scan *ls, int first, int count) 223 { 224 unsigned i; 225 226 for (i = 0; i < count; i++) { 227 struct linear_scan_live_interval *interval = &ls->intervals[first + i]; 228 229 linear_scan_spill(ls, interval, false); 230 } 231 } 232 233 /** 234 * Perform linear scan to allocate registers for the intervals. 235 */ 236 static bool 237 linear_scan_run(struct linear_scan *ls) 238 { 239 int i; 240 241 i = 0; 242 while (i < ls->num_vrfs) { 243 struct linear_scan_live_interval *first = &ls->intervals[i]; 244 int reg, count; 245 246 /* 247 * GEN6_OPCODE_SEND may write to multiple consecutive registers and we need to 248 * support that 249 */ 250 for (count = 1; i + count < ls->num_vrfs; count++) { 251 const struct linear_scan_live_interval *interval = 252 &ls->intervals[i + count]; 253 254 if (interval->startpoint != first->startpoint || 255 !interval->consecutive) 256 break; 257 } 258 259 reg = linear_scan_allocate_regs(ls, count); 260 261 /* expire intervals that are no longer active and try again */ 262 if (reg < 0) { 263 linear_scan_expire_active(ls, first->startpoint); 264 reg = linear_scan_allocate_regs(ls, count); 265 } 266 267 /* have to spill some intervals */ 268 if (reg < 0) { 269 struct linear_scan_live_interval *last_active = 270 container_of(ls->active_list.prev, 271 (struct linear_scan_live_interval *) NULL, list); 272 273 /* heuristically spill the interval that ends last */ 274 if (count > 1 || last_active->endpoint < first->endpoint) { 275 linear_scan_spill_range(ls, i, count); 276 i += count; 277 continue; 278 } 279 280 /* make some room for the new interval */ 281 linear_scan_spill(ls, last_active, true); 282 reg = linear_scan_allocate_regs(ls, count); 283 if (reg < 0) { 284 assert(!"failed to spill any register"); 285 return false; 286 } 287 } 288 289 while (count--) { 290 struct linear_scan_live_interval *interval = &ls->intervals[i++]; 291 292 interval->reg = reg++; 293 linear_scan_add_active(ls, interval); 294 295 ls->vrf_mapping[interval->vrf] = interval->reg; 296 297 /* 298 * this should and must be the case because of how we initialized the 299 * intervals 300 */ 301 assert(interval->vrf - first->vrf == interval->reg - first->reg); 302 } 303 } 304 305 return true; 306 } 307 308 /** 309 * Add a new interval. 310 */ 311 static void 312 linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc) 313 { 314 if (ls->intervals[vrf].vrf) 315 return; 316 317 ls->intervals[vrf].vrf = vrf; 318 ls->intervals[vrf].startpoint = pc; 319 320 ls->num_vrfs++; 321 if (vrf > ls->max_vrf) 322 ls->max_vrf = vrf; 323 } 324 325 /** 326 * Perform (oversimplified?) live variable analysis. 327 */ 328 static void 329 linear_scan_init_live_intervals(struct linear_scan *ls, 330 struct toy_compiler *tc) 331 { 332 const struct toy_inst *inst; 333 int pc, do_pc, while_pc; 334 335 pc = 0; 336 do_pc = -1; 337 while_pc = -1; 338 339 tc_head(tc); 340 while ((inst = tc_next_no_skip(tc)) != NULL) { 341 const int startpoint = (pc <= while_pc) ? do_pc : pc; 342 const int endpoint = (pc <= while_pc) ? while_pc : pc; 343 int vrf, i; 344 345 /* 346 * assume all registers used in this outermost loop are live through out 347 * the whole loop 348 */ 349 if (inst->marker) { 350 if (pc > while_pc) { 351 struct toy_inst *inst2; 352 int loop_level = 1; 353 354 assert(inst->opcode == TOY_OPCODE_DO); 355 do_pc = pc; 356 while_pc = pc + 1; 357 358 /* find the matching GEN6_OPCODE_WHILE */ 359 LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next, 360 &tc->instructions, list) { 361 if (inst2->marker) { 362 assert(inst->opcode == TOY_OPCODE_DO); 363 loop_level++; 364 continue; 365 } 366 367 if (inst2->opcode == GEN6_OPCODE_WHILE) { 368 loop_level--; 369 if (!loop_level) 370 break; 371 } 372 while_pc++; 373 } 374 } 375 376 continue; 377 } 378 379 if (inst->dst.file == TOY_FILE_VRF) { 380 int num_dst; 381 382 /* TODO this is a hack */ 383 if (inst->opcode == GEN6_OPCODE_SEND || 384 inst->opcode == GEN6_OPCODE_SENDC) { 385 const uint32_t mdesc = inst->src[1].val32; 386 int response_length = (mdesc >> 20) & 0x1f; 387 388 num_dst = response_length; 389 if (num_dst > 1 && inst->exec_size == GEN6_EXECSIZE_16) 390 num_dst /= 2; 391 } 392 else { 393 num_dst = 1; 394 } 395 396 vrf = inst->dst.val32 / TOY_REG_WIDTH; 397 398 for (i = 0; i < num_dst; i++) { 399 /* first use */ 400 if (!ls->intervals[vrf].vrf) 401 linear_scan_add_live_interval(ls, vrf, startpoint); 402 403 ls->intervals[vrf].endpoint = endpoint; 404 ls->intervals[vrf].consecutive = (i > 0); 405 406 vrf++; 407 } 408 } 409 410 for (i = 0; i < ARRAY_SIZE(inst->src); i++) { 411 if (inst->src[i].file != TOY_FILE_VRF) 412 continue; 413 414 vrf = inst->src[i].val32 / TOY_REG_WIDTH; 415 416 /* first use */ 417 if (!ls->intervals[vrf].vrf) 418 linear_scan_add_live_interval(ls, vrf, startpoint); 419 420 ls->intervals[vrf].endpoint = endpoint; 421 } 422 423 pc++; 424 } 425 } 426 427 /** 428 * Clean up after performing linear scan. 429 */ 430 static void 431 linear_scan_cleanup(struct linear_scan *ls) 432 { 433 FREE(ls->vrf_mapping); 434 FREE(ls->intervals); 435 FREE(ls->free_regs); 436 } 437 438 static int 439 linear_scan_compare_live_intervals(const void *elem1, const void *elem2) 440 { 441 const struct linear_scan_live_interval *interval1 = elem1; 442 const struct linear_scan_live_interval *interval2 = elem2; 443 444 /* make unused elements appear at the end */ 445 if (!interval1->vrf) 446 return 1; 447 else if (!interval2->vrf) 448 return -1; 449 450 /* sort by startpoints first, and then by vrf */ 451 if (interval1->startpoint != interval2->startpoint) 452 return (interval1->startpoint - interval2->startpoint); 453 else 454 return (interval1->vrf - interval2->vrf); 455 456 } 457 458 /** 459 * Prepare for linear scan. 460 */ 461 static bool 462 linear_scan_init(struct linear_scan *ls, int num_regs, 463 struct toy_compiler *tc) 464 { 465 int num_intervals, i; 466 467 memset(ls, 0, sizeof(*ls)); 468 469 /* this may be much larger than ls->num_vrfs... */ 470 num_intervals = tc->next_vrf; 471 ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0])); 472 if (!ls->intervals) 473 return false; 474 475 linear_scan_init_live_intervals(ls, tc); 476 /* sort intervals by startpoints */ 477 qsort(ls->intervals, num_intervals, sizeof(*ls->intervals), 478 linear_scan_compare_live_intervals); 479 480 ls->num_regs = num_regs; 481 ls->num_free_regs = num_regs; 482 483 ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs)); 484 if (!ls->free_regs) { 485 FREE(ls->intervals); 486 return false; 487 } 488 489 /* add in reverse order as we will allocate from the tail */ 490 for (i = 0; i < ls->num_regs; i++) 491 ls->free_regs[i] = num_regs - i - 1; 492 493 list_inithead(&ls->active_list); 494 495 ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping)); 496 if (!ls->vrf_mapping) { 497 FREE(ls->intervals); 498 FREE(ls->free_regs); 499 return false; 500 } 501 502 return true; 503 } 504 505 /** 506 * Allocate registers with linear scan. 507 */ 508 static void 509 linear_scan_allocation(struct toy_compiler *tc, 510 int start_grf, int end_grf, 511 int num_grf_per_vrf) 512 { 513 const int num_grfs = end_grf - start_grf + 1; 514 struct linear_scan ls; 515 struct toy_inst *inst; 516 517 if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc)) 518 return; 519 520 if (!linear_scan_run(&ls)) { 521 tc_fail(tc, "failed to allocate registers"); 522 return; 523 } 524 525 526 tc_head(tc); 527 while ((inst = tc_next(tc)) != NULL) { 528 int i; 529 530 if (inst->dst.file == TOY_FILE_VRF) { 531 const uint32_t val32 = inst->dst.val32; 532 int reg = val32 / TOY_REG_WIDTH; 533 int subreg = val32 % TOY_REG_WIDTH; 534 535 /* map to GRF */ 536 reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf; 537 538 inst->dst.file = TOY_FILE_GRF; 539 inst->dst.val32 = reg * TOY_REG_WIDTH + subreg; 540 } 541 542 for (i = 0; i < ARRAY_SIZE(inst->src); i++) { 543 const uint32_t val32 = inst->src[i].val32; 544 int reg, subreg; 545 546 if (inst->src[i].file != TOY_FILE_VRF) 547 continue; 548 549 reg = val32 / TOY_REG_WIDTH; 550 subreg = val32 % TOY_REG_WIDTH; 551 552 /* map to GRF */ 553 reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf; 554 555 inst->src[i].file = TOY_FILE_GRF; 556 inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg; 557 } 558 } 559 560 linear_scan_cleanup(&ls); 561 } 562 563 /** 564 * Trivially allocate registers. 565 */ 566 static void 567 trivial_allocation(struct toy_compiler *tc, 568 int start_grf, int end_grf, 569 int num_grf_per_vrf) 570 { 571 struct toy_inst *inst; 572 int max_grf = -1; 573 574 tc_head(tc); 575 while ((inst = tc_next(tc)) != NULL) { 576 int i; 577 578 if (inst->dst.file == TOY_FILE_VRF) { 579 const uint32_t val32 = inst->dst.val32; 580 int reg = val32 / TOY_REG_WIDTH; 581 int subreg = val32 % TOY_REG_WIDTH; 582 583 reg = reg * num_grf_per_vrf + start_grf - 1; 584 585 inst->dst.file = TOY_FILE_GRF; 586 inst->dst.val32 = reg * TOY_REG_WIDTH + subreg; 587 588 if (reg > max_grf) 589 max_grf = reg; 590 } 591 592 for (i = 0; i < ARRAY_SIZE(inst->src); i++) { 593 const uint32_t val32 = inst->src[i].val32; 594 int reg, subreg; 595 596 if (inst->src[i].file != TOY_FILE_VRF) 597 continue; 598 599 reg = val32 / TOY_REG_WIDTH; 600 subreg = val32 % TOY_REG_WIDTH; 601 602 reg = reg * num_grf_per_vrf + start_grf - 1; 603 604 inst->src[i].file = TOY_FILE_GRF; 605 inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg; 606 607 if (reg > max_grf) 608 max_grf = reg; 609 } 610 } 611 612 if (max_grf + num_grf_per_vrf - 1 > end_grf) 613 tc_fail(tc, "failed to allocate registers"); 614 } 615 616 /** 617 * Allocate GRF registers to VRF registers. 618 */ 619 void 620 toy_compiler_allocate_registers(struct toy_compiler *tc, 621 int start_grf, int end_grf, 622 int num_grf_per_vrf) 623 { 624 if (true) 625 linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf); 626 else 627 trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf); 628 } 629