1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #define RA_DEBUG 0 28 29 #if RA_DEBUG 30 #define RA_DUMP(q) do { q } while (0) 31 #else 32 #define RA_DUMP(q) 33 #endif 34 35 #include "sb_shader.h" 36 #include "sb_pass.h" 37 38 namespace r600_sb { 39 40 int ra_coalesce::run() { 41 return sh.coal.run(); 42 } 43 44 void coalescer::add_edge(value* a, value* b, unsigned cost) { 45 assert(a->is_sgpr() && b->is_sgpr()); 46 edges.insert(new ra_edge(a,b, cost)); 47 } 48 49 void coalescer::create_chunk(value *v) { 50 51 assert(v->is_sgpr()); 52 53 ra_chunk *c = new ra_chunk(); 54 55 c->values.push_back(v); 56 57 if (v->is_chan_pinned()) 58 c->flags |= RCF_PIN_CHAN; 59 if (v->is_reg_pinned()) { 60 c->flags |= RCF_PIN_REG; 61 } 62 63 c->pin = v->pin_gpr; 64 65 RA_DUMP( 66 sblog << "create_chunk: "; 67 dump_chunk(c); 68 ); 69 70 all_chunks.push_back(c); 71 v->chunk = c; 72 73 } 74 75 void coalescer::unify_chunks(ra_edge *e) { 76 ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk; 77 78 RA_DUMP( 79 sblog << "unify_chunks: "; 80 dump_chunk(c1); 81 dump_chunk(c2); 82 ); 83 84 if (c2->is_chan_pinned() && !c1->is_chan_pinned()) { 85 c1->flags |= RCF_PIN_CHAN; 86 c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan()); 87 } 88 89 if (c2->is_reg_pinned() && !c1->is_reg_pinned()) { 90 c1->flags |= RCF_PIN_REG; 91 c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan()); 92 } 93 94 c1->values.reserve(c1->values.size() + c2->values.size()); 95 96 for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E; 97 ++I) { 98 (*I)->chunk = c1; 99 c1->values.push_back(*I); 100 } 101 102 chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2); 103 assert(F != all_chunks.end()); 104 105 all_chunks.erase(F); 106 107 c1->cost += c2->cost + e->cost; 108 delete c2; 109 } 110 111 bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) { 112 unsigned pin_flags = (c1->flags & c2->flags) & 113 (RCF_PIN_CHAN | RCF_PIN_REG); 114 115 if ((pin_flags & RCF_PIN_CHAN) && 116 c1->pin.chan() != c2->pin.chan()) 117 return true; 118 119 if ((pin_flags & RCF_PIN_REG) && 120 c1->pin.sel() != c2->pin.sel()) 121 return true; 122 123 for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E; 124 ++I) { 125 value *v1 = *I; 126 127 for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E; 128 ++I) { 129 value *v2 = *I; 130 131 if (!v1->v_equal(v2) && v1->interferences.contains(v2)) 132 return true; 133 } 134 } 135 return false; 136 } 137 138 void coalescer::build_chunks() { 139 140 for (edge_queue::iterator I = edges.begin(), E = edges.end(); 141 I != E; ++I) { 142 143 ra_edge *e = *I; 144 145 if (!e->a->chunk) 146 create_chunk(e->a); 147 148 if (!e->b->chunk) 149 create_chunk(e->b); 150 151 ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk; 152 153 if (c1 == c2) { 154 c1->cost += e->cost; 155 } else if (!chunks_interference(c1, c2)) 156 unify_chunks(e); 157 } 158 } 159 160 ra_constraint* coalescer::create_constraint(constraint_kind kind) { 161 ra_constraint *c = new ra_constraint(kind); 162 all_constraints.push_back(c); 163 return c; 164 } 165 166 void coalescer::dump_edges() { 167 sblog << "######## affinity edges\n"; 168 169 for (edge_queue::iterator I = edges.begin(), E = edges.end(); 170 I != E; ++I) { 171 ra_edge* e = *I; 172 sblog << " ra_edge "; 173 dump::dump_val(e->a); 174 sblog << " <-> "; 175 dump::dump_val(e->b); 176 sblog << " cost = " << e->cost << "\n"; 177 } 178 } 179 180 void coalescer::dump_chunks() { 181 sblog << "######## chunks\n"; 182 183 for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end(); 184 I != E; ++I) { 185 ra_chunk* c = *I; 186 dump_chunk(c); 187 } 188 } 189 190 191 void coalescer::dump_constraint_queue() { 192 sblog << "######## constraints\n"; 193 194 for (constraint_queue::iterator I = constraints.begin(), 195 E = constraints.end(); I != E; ++I) { 196 ra_constraint* c = *I; 197 dump_constraint(c); 198 } 199 } 200 201 void coalescer::dump_chunk(ra_chunk* c) { 202 sblog << " ra_chunk cost = " << c->cost << " : "; 203 dump::dump_vec(c->values); 204 205 if (c->flags & RCF_PIN_REG) 206 sblog << " REG = " << c->pin.sel(); 207 208 if (c->flags & RCF_PIN_CHAN) 209 sblog << " CHAN = " << c->pin.chan(); 210 211 sblog << (c->flags & RCF_GLOBAL ? " GLOBAL" : ""); 212 213 sblog << "\n"; 214 } 215 216 void coalescer::dump_constraint(ra_constraint* c) { 217 sblog << " ra_constraint: "; 218 switch (c->kind) { 219 case CK_PACKED_BS: sblog << "PACKED_BS"; break; 220 case CK_PHI: sblog << "PHI"; break; 221 case CK_SAME_REG: sblog << "SAME_REG"; break; 222 default: sblog << "UNKNOWN_KIND"; assert(0); break; 223 } 224 225 sblog << " cost = " << c->cost << " : "; 226 dump::dump_vec(c->values); 227 228 sblog << "\n"; 229 } 230 231 void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) { 232 233 for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E; 234 ++I) { 235 value *v = *I; 236 s.add_set(v->interferences); 237 } 238 s.remove_vec(c->values); 239 } 240 241 void coalescer::build_chunk_queue() { 242 for (chunk_vec::iterator I = all_chunks.begin(), 243 E = all_chunks.end(); I != E; ++I) { 244 ra_chunk *c = *I; 245 246 if (!c->is_fixed()) 247 chunks.insert(c); 248 } 249 } 250 251 void coalescer::build_constraint_queue() { 252 for (constraint_vec::iterator I = all_constraints.begin(), 253 E = all_constraints.end(); I != E; ++I) { 254 ra_constraint *c = *I; 255 unsigned cost = 0; 256 257 if (c->values.empty() || !c->values.front()->is_sgpr()) 258 continue; 259 260 if (c->kind != CK_SAME_REG) 261 continue; 262 263 for (vvec::iterator I = c->values.begin(), E = c->values.end(); 264 I != E; ++I) { 265 value *v = *I; 266 if (!v->chunk) 267 create_chunk(v); 268 else 269 cost += v->chunk->cost; 270 } 271 c->cost = cost; 272 constraints.insert(c); 273 } 274 } 275 276 void coalescer::color_chunks() { 277 278 for (chunk_queue::iterator I = chunks.begin(), E = chunks.end(); 279 I != E; ++I) { 280 ra_chunk *c = *I; 281 if (c->is_fixed() || c->values.size() == 1) 282 continue; 283 284 sb_bitset rb; 285 val_set interf; 286 287 get_chunk_interferences(c, interf); 288 289 RA_DUMP( 290 sblog << "color_chunks: "; 291 dump_chunk(c); 292 sblog << "\n interferences: "; 293 dump::dump_set(sh,interf); 294 sblog << "\n"; 295 ); 296 297 init_reg_bitset(rb, interf); 298 299 unsigned pass = c->is_reg_pinned() ? 0 : 1; 300 301 unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0; 302 unsigned ce = c->is_chan_pinned() ? cs + 1 : 4; 303 304 unsigned color = 0; 305 306 while (pass < 2) { 307 308 unsigned rs, re; 309 310 if (pass == 0) { 311 rs = c->pin.sel(); 312 re = rs + 1; 313 } else { 314 rs = 0; 315 re = sh.num_nontemp_gpr(); 316 } 317 318 for (unsigned reg = rs; reg < re; ++reg) { 319 for (unsigned chan = cs; chan < ce; ++chan) { 320 unsigned bit = sel_chan(reg, chan); 321 if (bit >= rb.size() || !rb.get(bit)) { 322 color = bit; 323 break; 324 } 325 } 326 if (color) 327 break; 328 } 329 330 if (color) 331 break; 332 333 ++pass; 334 } 335 336 assert(color); 337 color_chunk(c, color); 338 } 339 } 340 341 void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) { 342 343 for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) { 344 value *v = *I; 345 346 if (!v->is_any_gpr()) 347 continue; 348 349 unsigned gpr = v->get_final_gpr(); 350 if (!gpr) 351 continue; 352 353 if (gpr) { 354 if (gpr >= bs.size()) 355 bs.resize(gpr + 64); 356 bs.set(gpr, 1); 357 } 358 } 359 } 360 361 void coalescer::color_chunk(ra_chunk *c, sel_chan color) { 362 363 vvec vv = c->values; 364 365 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; 366 ++I) { 367 value *v = *I; 368 369 if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) { 370 detach_value(v); 371 continue; 372 } 373 374 if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) { 375 detach_value(v); 376 continue; 377 } 378 379 v->gpr = color; 380 381 if (v->constraint && v->constraint->kind == CK_PHI) 382 v->fix(); 383 384 385 RA_DUMP( 386 sblog << " assigned " << color << " to "; 387 dump::dump_val(v); 388 sblog << "\n"; 389 ); 390 } 391 392 c->pin = color; 393 394 if (c->is_reg_pinned()) { 395 c->fix(); 396 } 397 } 398 399 coalescer::~coalescer() { 400 401 // FIXME use pool allocator ?? 402 403 for (constraint_vec::iterator I = all_constraints.begin(), 404 E = all_constraints.end(); I != E; ++I) { 405 delete (*I); 406 } 407 408 for (chunk_vec::iterator I = all_chunks.begin(), 409 E = all_chunks.end(); I != E; ++I) { 410 delete (*I); 411 } 412 413 for (edge_queue::iterator I = edges.begin(), E = edges.end(); 414 I != E; ++I) { 415 delete (*I); 416 } 417 } 418 419 int coalescer::run() { 420 int r; 421 422 RA_DUMP( dump_edges(); ); 423 424 build_chunks(); 425 RA_DUMP( dump_chunks(); ); 426 427 build_constraint_queue(); 428 RA_DUMP( dump_constraint_queue(); ); 429 430 if ((r = color_constraints())) 431 return r; 432 433 build_chunk_queue(); 434 color_chunks(); 435 436 return 0; 437 } 438 439 void coalescer::color_phi_constraint(ra_constraint* c) { 440 } 441 442 ra_chunk* coalescer::detach_value(value *v) { 443 444 vvec::iterator F = std::find(v->chunk->values.begin(), 445 v->chunk->values.end(), v); 446 447 assert(F != v->chunk->values.end()); 448 v->chunk->values.erase(F); 449 create_chunk(v); 450 451 if (v->is_reg_pinned()) { 452 v->chunk->fix(); 453 } 454 455 RA_DUMP( 456 sblog << " detached : "; 457 dump_chunk(v->chunk); 458 ); 459 460 return v->chunk; 461 462 } 463 464 int coalescer::color_reg_constraint(ra_constraint *c) { 465 unsigned k, cnt = c->values.size(); 466 vvec & cv = c->values; 467 468 ra_chunk *ch[4]; 469 unsigned swz[4] = {0, 1, 2, 3}; 470 val_set interf[4]; 471 sb_bitset rb[4]; 472 473 bool reg_pinned = false; 474 unsigned pin_reg = ~0; 475 476 unsigned chan_mask = 0; 477 478 k = 0; 479 for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) { 480 value *v = *I; 481 482 if (!v->chunk) 483 create_chunk(v); 484 485 ch[k] = v->chunk; 486 487 if (v->chunk->is_chan_pinned()) { 488 unsigned chan = 1 << v->chunk->pin.chan(); 489 490 if (chan & chan_mask) { // channel already in use 491 ch[k] = detach_value(v); 492 assert(!ch[k]->is_chan_pinned()); 493 } else { 494 chan_mask |= chan; 495 } 496 } 497 498 if (v->chunk->is_reg_pinned()) { 499 if (!reg_pinned) { 500 reg_pinned = true; 501 pin_reg = v->chunk->pin.sel(); 502 } 503 } 504 505 get_chunk_interferences(ch[k], interf[k]); 506 init_reg_bitset(rb[k], interf[k]); 507 } 508 509 unsigned start_reg, end_reg; 510 511 start_reg = 0; 512 end_reg = sh.num_nontemp_gpr(); 513 514 unsigned min_reg = end_reg; 515 unsigned min_swz[4]; 516 unsigned i, pass = reg_pinned ? 0 : 1; 517 518 bool done = false; 519 520 while (pass < 2) { 521 522 unsigned rs, re; 523 524 if (pass == 0) { 525 re = pin_reg + 1; 526 rs = pin_reg; 527 } else { 528 re = end_reg; 529 rs = start_reg; 530 } 531 532 min_reg = re; 533 534 // cycle on swizzle combinations 535 do { 536 for (i = 0; i < cnt; ++i) { 537 if (ch[i]->flags & RCF_PIN_CHAN) 538 if (ch[i]->pin.chan() != swz[i]) 539 break; 540 } 541 if (i != cnt) 542 continue; 543 544 // looking for minimal reg number such that the constrained chunks 545 // may be colored with the current swizzle combination 546 for (unsigned reg = rs; reg < min_reg; ++reg) { 547 for (i = 0; i < cnt; ++i) { 548 unsigned bit = sel_chan(reg, swz[i]); 549 if (bit < rb[i].size() && rb[i].get(bit)) 550 break; 551 } 552 if (i == cnt) { 553 done = true; 554 min_reg = reg; 555 std::copy(swz, swz + 4, min_swz); 556 break; 557 } 558 } 559 560 if (pass == 0 && done) 561 break; 562 563 } while (std::next_permutation(swz, swz + 4)); 564 565 if (!done && pass) { 566 sblog << "sb: ra_coalesce - out of registers\n"; 567 return -1; 568 } 569 570 if (pass == 0 && done) 571 break; 572 573 ++pass; 574 }; 575 576 assert(done); 577 578 RA_DUMP( 579 sblog << "min reg = " << min_reg << " min_swz = " 580 << min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n"; 581 ); 582 583 for (i = 0; i < cnt; ++i) { 584 sel_chan color(min_reg, min_swz[i]); 585 ra_chunk *cc = ch[i]; 586 587 if (cc->is_fixed()) { 588 if (cc->pin != color) 589 cc = detach_value(cv[i]); 590 else 591 continue; 592 } 593 594 color_chunk(cc, color); 595 cc->fix(); 596 cc->set_prealloc(); 597 } 598 599 return 0; 600 } 601 602 int coalescer::color_constraints() { 603 int r; 604 605 for (constraint_queue::iterator I = constraints.begin(), 606 E = constraints.end(); I != E; ++I) { 607 608 ra_constraint *c = *I; 609 610 RA_DUMP( 611 sblog << "color_constraints: "; 612 dump_constraint(c); 613 ); 614 615 if (c->kind == CK_SAME_REG) { 616 if ((r = color_reg_constraint(c))) 617 return r; 618 } else if (c->kind == CK_PHI) 619 color_phi_constraint(c); 620 } 621 return 0; 622 } 623 624 } // namespace r600_sb 625