1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ 2 3 /* 4 * Copyright (C) 2014 Rob Clark <robclark (at) freedesktop.org> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Rob Clark <robclark (at) freedesktop.org> 27 */ 28 29 30 #include "util/u_math.h" 31 32 #include "ir3.h" 33 34 /* 35 * Instruction Scheduling: 36 * 37 * A recursive depth based scheduling algo. Recursively find an eligible 38 * instruction to schedule from the deepest instruction (recursing through 39 * it's unscheduled src instructions). Normally this would result in a 40 * lot of re-traversal of the same instructions, so we cache results in 41 * instr->data (and clear cached results that would be no longer valid 42 * after scheduling an instruction). 43 * 44 * There are a few special cases that need to be handled, since sched 45 * is currently independent of register allocation. Usages of address 46 * register (a0.x) or predicate register (p0.x) must be serialized. Ie. 47 * if you have two pairs of instructions that write the same special 48 * register and then read it, then those pairs cannot be interleaved. 49 * To solve this, when we are in such a scheduling "critical section", 50 * and we encounter a conflicting write to a special register, we try 51 * to schedule any remaining instructions that use that value first. 52 */ 53 54 struct ir3_sched_ctx { 55 struct ir3_block *block; /* the current block */ 56 struct list_head depth_list; /* depth sorted unscheduled instrs */ 57 struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/ 58 struct ir3_instruction *addr; /* current a0.x user, if any */ 59 struct ir3_instruction *pred; /* current p0.x user, if any */ 60 bool error; 61 }; 62 63 static bool is_sfu_or_mem(struct ir3_instruction *instr) 64 { 65 return is_sfu(instr) || is_mem(instr); 66 } 67 68 #define NULL_INSTR ((void *)~0) 69 70 static void 71 clear_cache(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) 72 { 73 list_for_each_entry (struct ir3_instruction, instr2, &ctx->depth_list, node) { 74 if ((instr2->data == instr) || (instr2->data == NULL_INSTR) || !instr) 75 instr2->data = NULL; 76 } 77 } 78 79 static void 80 schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) 81 { 82 debug_assert(ctx->block == instr->block); 83 84 /* maybe there is a better way to handle this than just stuffing 85 * a nop.. ideally we'd know about this constraint in the 86 * scheduling and depth calculation.. 87 */ 88 if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr)) 89 ir3_NOP(ctx->block); 90 91 /* remove from depth list: 92 */ 93 list_delinit(&instr->node); 94 95 if (writes_addr(instr)) { 96 debug_assert(ctx->addr == NULL); 97 ctx->addr = instr; 98 } 99 100 if (writes_pred(instr)) { 101 debug_assert(ctx->pred == NULL); 102 ctx->pred = instr; 103 } 104 105 instr->flags |= IR3_INSTR_MARK; 106 107 list_addtail(&instr->node, &instr->block->instr_list); 108 ctx->scheduled = instr; 109 110 if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) { 111 clear_cache(ctx, NULL); 112 } else { 113 /* invalidate only the necessary entries.. */ 114 clear_cache(ctx, instr); 115 } 116 } 117 118 static struct ir3_instruction * 119 deepest(struct ir3_instruction **srcs, unsigned nsrcs) 120 { 121 struct ir3_instruction *d = NULL; 122 unsigned i = 0, id = 0; 123 124 while ((i < nsrcs) && !(d = srcs[id = i])) 125 i++; 126 127 if (!d) 128 return NULL; 129 130 for (; i < nsrcs; i++) 131 if (srcs[i] && (srcs[i]->depth > d->depth)) 132 d = srcs[id = i]; 133 134 srcs[id] = NULL; 135 136 return d; 137 } 138 139 static unsigned 140 distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr, 141 unsigned maxd) 142 { 143 struct list_head *instr_list = &ctx->block->instr_list; 144 unsigned d = 0; 145 146 list_for_each_entry_rev (struct ir3_instruction, n, instr_list, node) { 147 if ((n == instr) || (d >= maxd)) 148 break; 149 if (is_alu(n) || is_flow(n)) 150 d++; 151 } 152 153 return d; 154 } 155 156 /* calculate delay for specified src: */ 157 static unsigned 158 delay_calc_srcn(struct ir3_sched_ctx *ctx, 159 struct ir3_instruction *assigner, 160 struct ir3_instruction *consumer, 161 unsigned srcn, bool soft) 162 { 163 unsigned delay = 0; 164 165 if (is_meta(assigner)) { 166 struct ir3_instruction *src; 167 foreach_ssa_src(src, assigner) { 168 unsigned d; 169 if (src->block != assigner->block) 170 break; 171 d = delay_calc_srcn(ctx, src, consumer, srcn, soft); 172 delay = MAX2(delay, d); 173 } 174 } else { 175 if (soft) { 176 if (is_sfu(assigner)) { 177 delay = 4; 178 } else { 179 delay = ir3_delayslots(assigner, consumer, srcn); 180 } 181 } else { 182 delay = ir3_delayslots(assigner, consumer, srcn); 183 } 184 delay -= distance(ctx, assigner, delay); 185 } 186 187 return delay; 188 } 189 190 /* calculate delay for instruction (maximum of delay for all srcs): */ 191 static unsigned 192 delay_calc(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr, bool soft) 193 { 194 unsigned delay = 0; 195 struct ir3_instruction *src; 196 197 foreach_ssa_src_n(src, i, instr) { 198 unsigned d; 199 /* for array writes, no need to delay on previous write: */ 200 if (i == 0) 201 continue; 202 if (src->block != instr->block) 203 continue; 204 d = delay_calc_srcn(ctx, src, instr, i, soft); 205 delay = MAX2(delay, d); 206 } 207 208 return delay; 209 } 210 211 struct ir3_sched_notes { 212 /* there is at least one kill which could be scheduled, except 213 * for unscheduled bary.f's: 214 */ 215 bool blocked_kill; 216 /* there is at least one instruction that could be scheduled, 217 * except for conflicting address/predicate register usage: 218 */ 219 bool addr_conflict, pred_conflict; 220 }; 221 222 static bool is_scheduled(struct ir3_instruction *instr) 223 { 224 return !!(instr->flags & IR3_INSTR_MARK); 225 } 226 227 /* could an instruction be scheduled if specified ssa src was scheduled? */ 228 static bool 229 could_sched(struct ir3_instruction *instr, struct ir3_instruction *src) 230 { 231 struct ir3_instruction *other_src; 232 foreach_ssa_src(other_src, instr) { 233 /* if dependency not scheduled, we aren't ready yet: */ 234 if ((src != other_src) && !is_scheduled(other_src)) { 235 return false; 236 } 237 } 238 return true; 239 } 240 241 /* Check if instruction is ok to schedule. Make sure it is not blocked 242 * by use of addr/predicate register, etc. 243 */ 244 static bool 245 check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, 246 struct ir3_instruction *instr) 247 { 248 /* For instructions that write address register we need to 249 * make sure there is at least one instruction that uses the 250 * addr value which is otherwise ready. 251 * 252 * TODO if any instructions use pred register and have other 253 * src args, we would need to do the same for writes_pred().. 254 */ 255 if (writes_addr(instr)) { 256 struct ir3 *ir = instr->block->shader; 257 bool ready = false; 258 for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) { 259 struct ir3_instruction *indirect = ir->indirects[i]; 260 if (!indirect) 261 continue; 262 if (indirect->address != instr) 263 continue; 264 ready = could_sched(indirect, instr); 265 } 266 267 /* nothing could be scheduled, so keep looking: */ 268 if (!ready) 269 return false; 270 } 271 272 /* if this is a write to address/predicate register, and that 273 * register is currently in use, we need to defer until it is 274 * free: 275 */ 276 if (writes_addr(instr) && ctx->addr) { 277 debug_assert(ctx->addr != instr); 278 notes->addr_conflict = true; 279 return false; 280 } 281 282 if (writes_pred(instr) && ctx->pred) { 283 debug_assert(ctx->pred != instr); 284 notes->pred_conflict = true; 285 return false; 286 } 287 288 /* if the instruction is a kill, we need to ensure *every* 289 * bary.f is scheduled. The hw seems unhappy if the thread 290 * gets killed before the end-input (ei) flag is hit. 291 * 292 * We could do this by adding each bary.f instruction as 293 * virtual ssa src for the kill instruction. But we have 294 * fixed length instr->regs[]. 295 * 296 * TODO this wouldn't be quite right if we had multiple 297 * basic blocks, if any block was conditional. We'd need 298 * to schedule the bary.f's outside of any block which 299 * was conditional that contained a kill.. I think.. 300 */ 301 if (is_kill(instr)) { 302 struct ir3 *ir = instr->block->shader; 303 304 for (unsigned i = 0; i < ir->baryfs_count; i++) { 305 struct ir3_instruction *baryf = ir->baryfs[i]; 306 if (baryf->flags & IR3_INSTR_UNUSED) 307 continue; 308 if (!is_scheduled(baryf)) { 309 notes->blocked_kill = true; 310 return false; 311 } 312 } 313 } 314 315 return true; 316 } 317 318 /* Find the best instruction to schedule from specified instruction or 319 * recursively it's ssa sources. 320 */ 321 static struct ir3_instruction * 322 find_instr_recursive(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, 323 struct ir3_instruction *instr) 324 { 325 struct ir3_instruction *srcs[__ssa_src_cnt(instr)]; 326 struct ir3_instruction *src; 327 unsigned nsrcs = 0; 328 329 if (is_scheduled(instr)) 330 return NULL; 331 332 /* use instr->data to cache the results of recursing up the 333 * instr src's. Otherwise the recursive algo can scale quite 334 * badly w/ shader size. But this takes some care to clear 335 * the cache appropriately when instructions are scheduled. 336 */ 337 if (instr->data) { 338 if (instr->data == NULL_INSTR) 339 return NULL; 340 return instr->data; 341 } 342 343 /* find unscheduled srcs: */ 344 foreach_ssa_src(src, instr) { 345 if (!is_scheduled(src)) { 346 debug_assert(nsrcs < ARRAY_SIZE(srcs)); 347 srcs[nsrcs++] = src; 348 } 349 } 350 351 /* if all our src's are already scheduled: */ 352 if (nsrcs == 0) { 353 if (check_instr(ctx, notes, instr)) { 354 instr->data = instr; 355 return instr; 356 } 357 return NULL; 358 } 359 360 while ((src = deepest(srcs, nsrcs))) { 361 struct ir3_instruction *candidate; 362 363 candidate = find_instr_recursive(ctx, notes, src); 364 if (!candidate) 365 continue; 366 367 if (check_instr(ctx, notes, candidate)) { 368 instr->data = candidate; 369 return candidate; 370 } 371 } 372 373 instr->data = NULL_INSTR; 374 return NULL; 375 } 376 377 /* find instruction to schedule: */ 378 static struct ir3_instruction * 379 find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, 380 bool soft) 381 { 382 struct ir3_instruction *best_instr = NULL; 383 unsigned min_delay = ~0; 384 385 /* TODO we'd really rather use the list/array of block outputs. But we 386 * don't have such a thing. Recursing *every* instruction in the list 387 * will result in a lot of repeated traversal, since instructions will 388 * get traversed both when they appear as ssa src to a later instruction 389 * as well as where they appear in the depth_list. 390 */ 391 list_for_each_entry_rev (struct ir3_instruction, instr, &ctx->depth_list, node) { 392 struct ir3_instruction *candidate; 393 unsigned delay; 394 395 candidate = find_instr_recursive(ctx, notes, instr); 396 if (!candidate) 397 continue; 398 399 delay = delay_calc(ctx, candidate, soft); 400 if (delay < min_delay) { 401 best_instr = candidate; 402 min_delay = delay; 403 } 404 405 if (min_delay == 0) 406 break; 407 } 408 409 return best_instr; 410 } 411 412 /* "spill" the address register by remapping any unscheduled 413 * instructions which depend on the current address register 414 * to a clone of the instruction which wrote the address reg. 415 */ 416 static struct ir3_instruction * 417 split_addr(struct ir3_sched_ctx *ctx) 418 { 419 struct ir3 *ir; 420 struct ir3_instruction *new_addr = NULL; 421 unsigned i; 422 423 debug_assert(ctx->addr); 424 425 ir = ctx->addr->block->shader; 426 427 for (i = 0; i < ir->indirects_count; i++) { 428 struct ir3_instruction *indirect = ir->indirects[i]; 429 430 if (!indirect) 431 continue; 432 433 /* skip instructions already scheduled: */ 434 if (is_scheduled(indirect)) 435 continue; 436 437 /* remap remaining instructions using current addr 438 * to new addr: 439 */ 440 if (indirect->address == ctx->addr) { 441 if (!new_addr) { 442 new_addr = ir3_instr_clone(ctx->addr); 443 /* original addr is scheduled, but new one isn't: */ 444 new_addr->flags &= ~IR3_INSTR_MARK; 445 } 446 ir3_instr_set_address(indirect, new_addr); 447 } 448 } 449 450 /* all remaining indirects remapped to new addr: */ 451 ctx->addr = NULL; 452 453 return new_addr; 454 } 455 456 /* "spill" the predicate register by remapping any unscheduled 457 * instructions which depend on the current predicate register 458 * to a clone of the instruction which wrote the address reg. 459 */ 460 static struct ir3_instruction * 461 split_pred(struct ir3_sched_ctx *ctx) 462 { 463 struct ir3 *ir; 464 struct ir3_instruction *new_pred = NULL; 465 unsigned i; 466 467 debug_assert(ctx->pred); 468 469 ir = ctx->pred->block->shader; 470 471 for (i = 0; i < ir->predicates_count; i++) { 472 struct ir3_instruction *predicated = ir->predicates[i]; 473 474 /* skip instructions already scheduled: */ 475 if (is_scheduled(predicated)) 476 continue; 477 478 /* remap remaining instructions using current pred 479 * to new pred: 480 * 481 * TODO is there ever a case when pred isn't first 482 * (and only) src? 483 */ 484 if (ssa(predicated->regs[1]) == ctx->pred) { 485 if (!new_pred) { 486 new_pred = ir3_instr_clone(ctx->pred); 487 /* original pred is scheduled, but new one isn't: */ 488 new_pred->flags &= ~IR3_INSTR_MARK; 489 } 490 predicated->regs[1]->instr = new_pred; 491 } 492 } 493 494 /* all remaining predicated remapped to new pred: */ 495 ctx->pred = NULL; 496 497 return new_pred; 498 } 499 500 static void 501 sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) 502 { 503 struct list_head unscheduled_list; 504 505 ctx->block = block; 506 507 /* addr/pred writes are per-block: */ 508 ctx->addr = NULL; 509 ctx->pred = NULL; 510 511 /* move all instructions to the unscheduled list, and 512 * empty the block's instruction list (to which we will 513 * be inserting). 514 */ 515 list_replace(&block->instr_list, &unscheduled_list); 516 list_inithead(&block->instr_list); 517 list_inithead(&ctx->depth_list); 518 519 /* first a pre-pass to schedule all meta:input/phi instructions 520 * (which need to appear first so that RA knows the register is 521 * occupied), and move remaining to depth sorted list: 522 */ 523 list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { 524 if ((instr->opc == OPC_META_INPUT) || (instr->opc == OPC_META_PHI)) { 525 schedule(ctx, instr); 526 } else { 527 ir3_insert_by_depth(instr, &ctx->depth_list); 528 } 529 } 530 531 while (!list_empty(&ctx->depth_list)) { 532 struct ir3_sched_notes notes = {0}; 533 struct ir3_instruction *instr; 534 535 instr = find_eligible_instr(ctx, ¬es, true); 536 if (!instr) 537 instr = find_eligible_instr(ctx, ¬es, false); 538 539 if (instr) { 540 unsigned delay = delay_calc(ctx, instr, false); 541 542 /* and if we run out of instructions that can be scheduled, 543 * then it is time for nop's: 544 */ 545 debug_assert(delay <= 6); 546 while (delay > 0) { 547 ir3_NOP(block); 548 delay--; 549 } 550 551 schedule(ctx, instr); 552 } else { 553 struct ir3_instruction *new_instr = NULL; 554 555 /* nothing available to schedule.. if we are blocked on 556 * address/predicate register conflict, then break the 557 * deadlock by cloning the instruction that wrote that 558 * reg: 559 */ 560 if (notes.addr_conflict) { 561 new_instr = split_addr(ctx); 562 } else if (notes.pred_conflict) { 563 new_instr = split_pred(ctx); 564 } else { 565 debug_assert(0); 566 ctx->error = true; 567 return; 568 } 569 570 if (new_instr) { 571 /* clearing current addr/pred can change what is 572 * available to schedule, so clear cache.. 573 */ 574 clear_cache(ctx, NULL); 575 576 ir3_insert_by_depth(new_instr, &ctx->depth_list); 577 /* the original instr that wrote addr/pred may have 578 * originated from a different block: 579 */ 580 new_instr->block = block; 581 } 582 } 583 } 584 585 /* And lastly, insert branch/jump instructions to take us to 586 * the next block. Later we'll strip back out the branches 587 * that simply jump to next instruction. 588 */ 589 if (block->successors[1]) { 590 /* if/else, conditional branches to "then" or "else": */ 591 struct ir3_instruction *br; 592 unsigned delay = 6; 593 594 debug_assert(ctx->pred); 595 debug_assert(block->condition); 596 597 delay -= distance(ctx, ctx->pred, delay); 598 599 while (delay > 0) { 600 ir3_NOP(block); 601 delay--; 602 } 603 604 /* create "else" branch first (since "then" block should 605 * frequently/always end up being a fall-thru): 606 */ 607 br = ir3_BR(block); 608 br->cat0.inv = true; 609 br->cat0.target = block->successors[1]; 610 611 /* NOTE: we have to hard code delay of 6 above, since 612 * we want to insert the nop's before constructing the 613 * branch. Throw in an assert so we notice if this 614 * ever breaks on future generation: 615 */ 616 debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6); 617 618 br = ir3_BR(block); 619 br->cat0.target = block->successors[0]; 620 621 } else if (block->successors[0]) { 622 /* otherwise unconditional jump to next block: */ 623 struct ir3_instruction *jmp; 624 625 jmp = ir3_JUMP(block); 626 jmp->cat0.target = block->successors[0]; 627 } 628 629 /* NOTE: if we kept track of the predecessors, we could do a better 630 * job w/ (jp) flags.. every node w/ > predecessor is a join point. 631 * Note that as we eliminate blocks which contain only an unconditional 632 * jump we probably need to propagate (jp) flag.. 633 */ 634 } 635 636 /* this is needed to ensure later RA stage succeeds: */ 637 static void 638 sched_insert_parallel_copies(struct ir3_block *block) 639 { 640 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { 641 if (instr->opc == OPC_META_PHI) { 642 struct ir3_register *reg, *reg2; 643 foreach_src(reg, instr) { 644 struct ir3_instruction *src = reg->instr; 645 struct ir3_instruction *mov = NULL; 646 647 /* after CP we could end up w/ duplicate phi srcs: */ 648 foreach_src(reg2, instr) { 649 if (reg == reg2) 650 break; 651 /* reg2 is before reg1 so already an inserted mov: */ 652 else if (reg2->instr->regs[1]->instr == src) { 653 mov = reg2->instr; 654 break; 655 } 656 } 657 658 if (!mov) { 659 mov = ir3_MOV(src->block, src, TYPE_U32); 660 mov->regs[0]->flags |= IR3_REG_PHI_SRC; 661 mov->regs[0]->instr = instr; 662 } 663 664 reg->instr = mov; 665 } 666 } 667 } 668 } 669 670 int ir3_sched(struct ir3 *ir) 671 { 672 struct ir3_sched_ctx ctx = {0}; 673 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { 674 sched_insert_parallel_copies(block); 675 } 676 ir3_clear_mark(ir); 677 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { 678 sched_block(&ctx, block); 679 } 680 if (ctx.error) 681 return -1; 682 return 0; 683 } 684 685 /* does instruction 'prior' need to be scheduled before 'instr'? */ 686 static bool 687 depends_on(struct ir3_instruction *instr, struct ir3_instruction *prior) 688 { 689 /* TODO for dependencies that are related to a specific object, ie 690 * a specific SSBO/image/array, we could relax this constraint to 691 * make accesses to unrelated objects not depend on each other (at 692 * least as long as not declared coherent) 693 */ 694 if (((instr->barrier_class & IR3_BARRIER_EVERYTHING) && prior->barrier_class) || 695 ((prior->barrier_class & IR3_BARRIER_EVERYTHING) && instr->barrier_class)) 696 return true; 697 return !!(instr->barrier_class & prior->barrier_conflict); 698 } 699 700 static void 701 add_barrier_deps(struct ir3_block *block, struct ir3_instruction *instr) 702 { 703 struct list_head *prev = instr->node.prev; 704 struct list_head *next = instr->node.next; 705 706 /* add dependencies on previous instructions that must be scheduled 707 * prior to the current instruction 708 */ 709 while (prev != &block->instr_list) { 710 struct ir3_instruction *pi = 711 LIST_ENTRY(struct ir3_instruction, prev, node); 712 713 prev = prev->prev; 714 715 if (is_meta(pi)) 716 continue; 717 718 if (instr->barrier_class == pi->barrier_class) { 719 ir3_instr_add_dep(instr, pi); 720 break; 721 } 722 723 if (depends_on(instr, pi)) 724 ir3_instr_add_dep(instr, pi); 725 } 726 727 /* add dependencies on this instruction to following instructions 728 * that must be scheduled after the current instruction: 729 */ 730 while (next != &block->instr_list) { 731 struct ir3_instruction *ni = 732 LIST_ENTRY(struct ir3_instruction, next, node); 733 734 next = next->next; 735 736 if (is_meta(ni)) 737 continue; 738 739 if (instr->barrier_class == ni->barrier_class) { 740 ir3_instr_add_dep(ni, instr); 741 break; 742 } 743 744 if (depends_on(ni, instr)) 745 ir3_instr_add_dep(ni, instr); 746 } 747 } 748 749 /* before scheduling a block, we need to add any necessary false-dependencies 750 * to ensure that: 751 * 752 * (1) barriers are scheduled in the right order wrt instructions related 753 * to the barrier 754 * 755 * (2) reads that come before a write actually get scheduled before the 756 * write 757 */ 758 static void 759 calculate_deps(struct ir3_block *block) 760 { 761 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { 762 if (instr->barrier_class) { 763 add_barrier_deps(block, instr); 764 } 765 } 766 } 767 768 void 769 ir3_sched_add_deps(struct ir3 *ir) 770 { 771 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { 772 calculate_deps(block); 773 } 774 } 775