Home | History | Annotate | Download | only in ir3
      1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
      2 
      3 /*
      4  * Copyright (C) 2014 Rob Clark <robclark (at) freedesktop.org>
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     23  * SOFTWARE.
     24  *
     25  * Authors:
     26  *    Rob Clark <robclark (at) freedesktop.org>
     27  */
     28 
     29 
     30 #include "util/u_math.h"
     31 
     32 #include "ir3.h"
     33 
     34 /*
     35  * Instruction Scheduling:
     36  *
     37  * A recursive depth based scheduling algo.  Recursively find an eligible
     38  * instruction to schedule from the deepest instruction (recursing through
     39  * it's unscheduled src instructions).  Normally this would result in a
     40  * lot of re-traversal of the same instructions, so we cache results in
     41  * instr->data (and clear cached results that would be no longer valid
     42  * after scheduling an instruction).
     43  *
     44  * There are a few special cases that need to be handled, since sched
     45  * is currently independent of register allocation.  Usages of address
     46  * register (a0.x) or predicate register (p0.x) must be serialized.  Ie.
     47  * if you have two pairs of instructions that write the same special
     48  * register and then read it, then those pairs cannot be interleaved.
     49  * To solve this, when we are in such a scheduling "critical section",
     50  * and we encounter a conflicting write to a special register, we try
     51  * to schedule any remaining instructions that use that value first.
     52  */
     53 
     54 struct ir3_sched_ctx {
     55 	struct ir3_block *block;           /* the current block */
     56 	struct list_head depth_list;       /* depth sorted unscheduled instrs */
     57 	struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/
     58 	struct ir3_instruction *addr;      /* current a0.x user, if any */
     59 	struct ir3_instruction *pred;      /* current p0.x user, if any */
     60 	bool error;
     61 };
     62 
     63 static bool is_sfu_or_mem(struct ir3_instruction *instr)
     64 {
     65 	return is_sfu(instr) || is_mem(instr);
     66 }
     67 
     68 #define NULL_INSTR ((void *)~0)
     69 
     70 static void
     71 clear_cache(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
     72 {
     73 	list_for_each_entry (struct ir3_instruction, instr2, &ctx->depth_list, node) {
     74 		if ((instr2->data == instr) || (instr2->data == NULL_INSTR) || !instr)
     75 			instr2->data = NULL;
     76 	}
     77 }
     78 
     79 static void
     80 schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
     81 {
     82 	debug_assert(ctx->block == instr->block);
     83 
     84 	/* maybe there is a better way to handle this than just stuffing
     85 	 * a nop.. ideally we'd know about this constraint in the
     86 	 * scheduling and depth calculation..
     87 	 */
     88 	if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr))
     89 		ir3_NOP(ctx->block);
     90 
     91 	/* remove from depth list:
     92 	 */
     93 	list_delinit(&instr->node);
     94 
     95 	if (writes_addr(instr)) {
     96 		debug_assert(ctx->addr == NULL);
     97 		ctx->addr = instr;
     98 	}
     99 
    100 	if (writes_pred(instr)) {
    101 		debug_assert(ctx->pred == NULL);
    102 		ctx->pred = instr;
    103 	}
    104 
    105 	instr->flags |= IR3_INSTR_MARK;
    106 
    107 	list_addtail(&instr->node, &instr->block->instr_list);
    108 	ctx->scheduled = instr;
    109 
    110 	if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) {
    111 		clear_cache(ctx, NULL);
    112 	} else {
    113 		/* invalidate only the necessary entries.. */
    114 		clear_cache(ctx, instr);
    115 	}
    116 }
    117 
    118 static struct ir3_instruction *
    119 deepest(struct ir3_instruction **srcs, unsigned nsrcs)
    120 {
    121 	struct ir3_instruction *d = NULL;
    122 	unsigned i = 0, id = 0;
    123 
    124 	while ((i < nsrcs) && !(d = srcs[id = i]))
    125 		i++;
    126 
    127 	if (!d)
    128 		return NULL;
    129 
    130 	for (; i < nsrcs; i++)
    131 		if (srcs[i] && (srcs[i]->depth > d->depth))
    132 			d = srcs[id = i];
    133 
    134 	srcs[id] = NULL;
    135 
    136 	return d;
    137 }
    138 
    139 static unsigned
    140 distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr,
    141 		unsigned maxd)
    142 {
    143 	struct list_head *instr_list = &ctx->block->instr_list;
    144 	unsigned d = 0;
    145 
    146 	list_for_each_entry_rev (struct ir3_instruction, n, instr_list, node) {
    147 		if ((n == instr) || (d >= maxd))
    148 			break;
    149 		if (is_alu(n) || is_flow(n))
    150 			d++;
    151 	}
    152 
    153 	return d;
    154 }
    155 
    156 /* calculate delay for specified src: */
    157 static unsigned
    158 delay_calc_srcn(struct ir3_sched_ctx *ctx,
    159 		struct ir3_instruction *assigner,
    160 		struct ir3_instruction *consumer,
    161 		unsigned srcn, bool soft)
    162 {
    163 	unsigned delay = 0;
    164 
    165 	if (is_meta(assigner)) {
    166 		struct ir3_instruction *src;
    167 		foreach_ssa_src(src, assigner) {
    168 			unsigned d;
    169 			if (src->block != assigner->block)
    170 				break;
    171 			d = delay_calc_srcn(ctx, src, consumer, srcn, soft);
    172 			delay = MAX2(delay, d);
    173 		}
    174 	} else {
    175 		if (soft) {
    176 			if (is_sfu(assigner)) {
    177 				delay = 4;
    178 			} else {
    179 				delay = ir3_delayslots(assigner, consumer, srcn);
    180 			}
    181 		} else {
    182 			delay = ir3_delayslots(assigner, consumer, srcn);
    183 		}
    184 		delay -= distance(ctx, assigner, delay);
    185 	}
    186 
    187 	return delay;
    188 }
    189 
    190 /* calculate delay for instruction (maximum of delay for all srcs): */
    191 static unsigned
    192 delay_calc(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr, bool soft)
    193 {
    194 	unsigned delay = 0;
    195 	struct ir3_instruction *src;
    196 
    197 	foreach_ssa_src_n(src, i, instr) {
    198 		unsigned d;
    199 		/* for array writes, no need to delay on previous write: */
    200 		if (i == 0)
    201 			continue;
    202 		if (src->block != instr->block)
    203 			continue;
    204 		d = delay_calc_srcn(ctx, src, instr, i, soft);
    205 		delay = MAX2(delay, d);
    206 	}
    207 
    208 	return delay;
    209 }
    210 
    211 struct ir3_sched_notes {
    212 	/* there is at least one kill which could be scheduled, except
    213 	 * for unscheduled bary.f's:
    214 	 */
    215 	bool blocked_kill;
    216 	/* there is at least one instruction that could be scheduled,
    217 	 * except for conflicting address/predicate register usage:
    218 	 */
    219 	bool addr_conflict, pred_conflict;
    220 };
    221 
    222 static bool is_scheduled(struct ir3_instruction *instr)
    223 {
    224 	return !!(instr->flags & IR3_INSTR_MARK);
    225 }
    226 
    227 /* could an instruction be scheduled if specified ssa src was scheduled? */
    228 static bool
    229 could_sched(struct ir3_instruction *instr, struct ir3_instruction *src)
    230 {
    231 	struct ir3_instruction *other_src;
    232 	foreach_ssa_src(other_src, instr) {
    233 		/* if dependency not scheduled, we aren't ready yet: */
    234 		if ((src != other_src) && !is_scheduled(other_src)) {
    235 			return false;
    236 		}
    237 	}
    238 	return true;
    239 }
    240 
    241 /* Check if instruction is ok to schedule.  Make sure it is not blocked
    242  * by use of addr/predicate register, etc.
    243  */
    244 static bool
    245 check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
    246 		struct ir3_instruction *instr)
    247 {
    248 	/* For instructions that write address register we need to
    249 	 * make sure there is at least one instruction that uses the
    250 	 * addr value which is otherwise ready.
    251 	 *
    252 	 * TODO if any instructions use pred register and have other
    253 	 * src args, we would need to do the same for writes_pred()..
    254 	 */
    255 	if (writes_addr(instr)) {
    256 		struct ir3 *ir = instr->block->shader;
    257 		bool ready = false;
    258 		for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) {
    259 			struct ir3_instruction *indirect = ir->indirects[i];
    260 			if (!indirect)
    261 				continue;
    262 			if (indirect->address != instr)
    263 				continue;
    264 			ready = could_sched(indirect, instr);
    265 		}
    266 
    267 		/* nothing could be scheduled, so keep looking: */
    268 		if (!ready)
    269 			return false;
    270 	}
    271 
    272 	/* if this is a write to address/predicate register, and that
    273 	 * register is currently in use, we need to defer until it is
    274 	 * free:
    275 	 */
    276 	if (writes_addr(instr) && ctx->addr) {
    277 		debug_assert(ctx->addr != instr);
    278 		notes->addr_conflict = true;
    279 		return false;
    280 	}
    281 
    282 	if (writes_pred(instr) && ctx->pred) {
    283 		debug_assert(ctx->pred != instr);
    284 		notes->pred_conflict = true;
    285 		return false;
    286 	}
    287 
    288 	/* if the instruction is a kill, we need to ensure *every*
    289 	 * bary.f is scheduled.  The hw seems unhappy if the thread
    290 	 * gets killed before the end-input (ei) flag is hit.
    291 	 *
    292 	 * We could do this by adding each bary.f instruction as
    293 	 * virtual ssa src for the kill instruction.  But we have
    294 	 * fixed length instr->regs[].
    295 	 *
    296 	 * TODO this wouldn't be quite right if we had multiple
    297 	 * basic blocks, if any block was conditional.  We'd need
    298 	 * to schedule the bary.f's outside of any block which
    299 	 * was conditional that contained a kill.. I think..
    300 	 */
    301 	if (is_kill(instr)) {
    302 		struct ir3 *ir = instr->block->shader;
    303 
    304 		for (unsigned i = 0; i < ir->baryfs_count; i++) {
    305 			struct ir3_instruction *baryf = ir->baryfs[i];
    306 			if (baryf->flags & IR3_INSTR_UNUSED)
    307 				continue;
    308 			if (!is_scheduled(baryf)) {
    309 				notes->blocked_kill = true;
    310 				return false;
    311 			}
    312 		}
    313 	}
    314 
    315 	return true;
    316 }
    317 
    318 /* Find the best instruction to schedule from specified instruction or
    319  * recursively it's ssa sources.
    320  */
    321 static struct ir3_instruction *
    322 find_instr_recursive(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
    323 		struct ir3_instruction *instr)
    324 {
    325 	struct ir3_instruction *srcs[__ssa_src_cnt(instr)];
    326 	struct ir3_instruction *src;
    327 	unsigned nsrcs = 0;
    328 
    329 	if (is_scheduled(instr))
    330 		return NULL;
    331 
    332 	/* use instr->data to cache the results of recursing up the
    333 	 * instr src's.  Otherwise the recursive algo can scale quite
    334 	 * badly w/ shader size.  But this takes some care to clear
    335 	 * the cache appropriately when instructions are scheduled.
    336 	 */
    337 	if (instr->data) {
    338 		if (instr->data == NULL_INSTR)
    339 			return NULL;
    340 		return instr->data;
    341 	}
    342 
    343 	/* find unscheduled srcs: */
    344 	foreach_ssa_src(src, instr) {
    345 		if (!is_scheduled(src)) {
    346 			debug_assert(nsrcs < ARRAY_SIZE(srcs));
    347 			srcs[nsrcs++] = src;
    348 		}
    349 	}
    350 
    351 	/* if all our src's are already scheduled: */
    352 	if (nsrcs == 0) {
    353 		if (check_instr(ctx, notes, instr)) {
    354 			instr->data = instr;
    355 			return instr;
    356 		}
    357 		return NULL;
    358 	}
    359 
    360 	while ((src = deepest(srcs, nsrcs))) {
    361 		struct ir3_instruction *candidate;
    362 
    363 		candidate = find_instr_recursive(ctx, notes, src);
    364 		if (!candidate)
    365 			continue;
    366 
    367 		if (check_instr(ctx, notes, candidate)) {
    368 			instr->data = candidate;
    369 			return candidate;
    370 		}
    371 	}
    372 
    373 	instr->data = NULL_INSTR;
    374 	return NULL;
    375 }
    376 
    377 /* find instruction to schedule: */
    378 static struct ir3_instruction *
    379 find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
    380 		bool soft)
    381 {
    382 	struct ir3_instruction *best_instr = NULL;
    383 	unsigned min_delay = ~0;
    384 
    385 	/* TODO we'd really rather use the list/array of block outputs.  But we
    386 	 * don't have such a thing.  Recursing *every* instruction in the list
    387 	 * will result in a lot of repeated traversal, since instructions will
    388 	 * get traversed both when they appear as ssa src to a later instruction
    389 	 * as well as where they appear in the depth_list.
    390 	 */
    391 	list_for_each_entry_rev (struct ir3_instruction, instr, &ctx->depth_list, node) {
    392 		struct ir3_instruction *candidate;
    393 		unsigned delay;
    394 
    395 		candidate = find_instr_recursive(ctx, notes, instr);
    396 		if (!candidate)
    397 			continue;
    398 
    399 		delay = delay_calc(ctx, candidate, soft);
    400 		if (delay < min_delay) {
    401 			best_instr = candidate;
    402 			min_delay = delay;
    403 		}
    404 
    405 		if (min_delay == 0)
    406 			break;
    407 	}
    408 
    409 	return best_instr;
    410 }
    411 
    412 /* "spill" the address register by remapping any unscheduled
    413  * instructions which depend on the current address register
    414  * to a clone of the instruction which wrote the address reg.
    415  */
    416 static struct ir3_instruction *
    417 split_addr(struct ir3_sched_ctx *ctx)
    418 {
    419 	struct ir3 *ir;
    420 	struct ir3_instruction *new_addr = NULL;
    421 	unsigned i;
    422 
    423 	debug_assert(ctx->addr);
    424 
    425 	ir = ctx->addr->block->shader;
    426 
    427 	for (i = 0; i < ir->indirects_count; i++) {
    428 		struct ir3_instruction *indirect = ir->indirects[i];
    429 
    430 		if (!indirect)
    431 			continue;
    432 
    433 		/* skip instructions already scheduled: */
    434 		if (is_scheduled(indirect))
    435 			continue;
    436 
    437 		/* remap remaining instructions using current addr
    438 		 * to new addr:
    439 		 */
    440 		if (indirect->address == ctx->addr) {
    441 			if (!new_addr) {
    442 				new_addr = ir3_instr_clone(ctx->addr);
    443 				/* original addr is scheduled, but new one isn't: */
    444 				new_addr->flags &= ~IR3_INSTR_MARK;
    445 			}
    446 			ir3_instr_set_address(indirect, new_addr);
    447 		}
    448 	}
    449 
    450 	/* all remaining indirects remapped to new addr: */
    451 	ctx->addr = NULL;
    452 
    453 	return new_addr;
    454 }
    455 
    456 /* "spill" the predicate register by remapping any unscheduled
    457  * instructions which depend on the current predicate register
    458  * to a clone of the instruction which wrote the address reg.
    459  */
    460 static struct ir3_instruction *
    461 split_pred(struct ir3_sched_ctx *ctx)
    462 {
    463 	struct ir3 *ir;
    464 	struct ir3_instruction *new_pred = NULL;
    465 	unsigned i;
    466 
    467 	debug_assert(ctx->pred);
    468 
    469 	ir = ctx->pred->block->shader;
    470 
    471 	for (i = 0; i < ir->predicates_count; i++) {
    472 		struct ir3_instruction *predicated = ir->predicates[i];
    473 
    474 		/* skip instructions already scheduled: */
    475 		if (is_scheduled(predicated))
    476 			continue;
    477 
    478 		/* remap remaining instructions using current pred
    479 		 * to new pred:
    480 		 *
    481 		 * TODO is there ever a case when pred isn't first
    482 		 * (and only) src?
    483 		 */
    484 		if (ssa(predicated->regs[1]) == ctx->pred) {
    485 			if (!new_pred) {
    486 				new_pred = ir3_instr_clone(ctx->pred);
    487 				/* original pred is scheduled, but new one isn't: */
    488 				new_pred->flags &= ~IR3_INSTR_MARK;
    489 			}
    490 			predicated->regs[1]->instr = new_pred;
    491 		}
    492 	}
    493 
    494 	/* all remaining predicated remapped to new pred: */
    495 	ctx->pred = NULL;
    496 
    497 	return new_pred;
    498 }
    499 
    500 static void
    501 sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
    502 {
    503 	struct list_head unscheduled_list;
    504 
    505 	ctx->block = block;
    506 
    507 	/* addr/pred writes are per-block: */
    508 	ctx->addr = NULL;
    509 	ctx->pred = NULL;
    510 
    511 	/* move all instructions to the unscheduled list, and
    512 	 * empty the block's instruction list (to which we will
    513 	 * be inserting).
    514 	 */
    515 	list_replace(&block->instr_list, &unscheduled_list);
    516 	list_inithead(&block->instr_list);
    517 	list_inithead(&ctx->depth_list);
    518 
    519 	/* first a pre-pass to schedule all meta:input/phi instructions
    520 	 * (which need to appear first so that RA knows the register is
    521 	 * occupied), and move remaining to depth sorted list:
    522 	 */
    523 	list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
    524 		if ((instr->opc == OPC_META_INPUT) || (instr->opc == OPC_META_PHI)) {
    525 			schedule(ctx, instr);
    526 		} else {
    527 			ir3_insert_by_depth(instr, &ctx->depth_list);
    528 		}
    529 	}
    530 
    531 	while (!list_empty(&ctx->depth_list)) {
    532 		struct ir3_sched_notes notes = {0};
    533 		struct ir3_instruction *instr;
    534 
    535 		instr = find_eligible_instr(ctx, &notes, true);
    536 		if (!instr)
    537 			instr = find_eligible_instr(ctx, &notes, false);
    538 
    539 		if (instr) {
    540 			unsigned delay = delay_calc(ctx, instr, false);
    541 
    542 			/* and if we run out of instructions that can be scheduled,
    543 			 * then it is time for nop's:
    544 			 */
    545 			debug_assert(delay <= 6);
    546 			while (delay > 0) {
    547 				ir3_NOP(block);
    548 				delay--;
    549 			}
    550 
    551 			schedule(ctx, instr);
    552 		} else {
    553 			struct ir3_instruction *new_instr = NULL;
    554 
    555 			/* nothing available to schedule.. if we are blocked on
    556 			 * address/predicate register conflict, then break the
    557 			 * deadlock by cloning the instruction that wrote that
    558 			 * reg:
    559 			 */
    560 			if (notes.addr_conflict) {
    561 				new_instr = split_addr(ctx);
    562 			} else if (notes.pred_conflict) {
    563 				new_instr = split_pred(ctx);
    564 			} else {
    565 				debug_assert(0);
    566 				ctx->error = true;
    567 				return;
    568 			}
    569 
    570 			if (new_instr) {
    571 				/* clearing current addr/pred can change what is
    572 				 * available to schedule, so clear cache..
    573 				 */
    574 				clear_cache(ctx, NULL);
    575 
    576 				ir3_insert_by_depth(new_instr, &ctx->depth_list);
    577 				/* the original instr that wrote addr/pred may have
    578 				 * originated from a different block:
    579 				 */
    580 				new_instr->block = block;
    581 			}
    582 		}
    583 	}
    584 
    585 	/* And lastly, insert branch/jump instructions to take us to
    586 	 * the next block.  Later we'll strip back out the branches
    587 	 * that simply jump to next instruction.
    588 	 */
    589 	if (block->successors[1]) {
    590 		/* if/else, conditional branches to "then" or "else": */
    591 		struct ir3_instruction *br;
    592 		unsigned delay = 6;
    593 
    594 		debug_assert(ctx->pred);
    595 		debug_assert(block->condition);
    596 
    597 		delay -= distance(ctx, ctx->pred, delay);
    598 
    599 		while (delay > 0) {
    600 			ir3_NOP(block);
    601 			delay--;
    602 		}
    603 
    604 		/* create "else" branch first (since "then" block should
    605 		 * frequently/always end up being a fall-thru):
    606 		 */
    607 		br = ir3_BR(block);
    608 		br->cat0.inv = true;
    609 		br->cat0.target = block->successors[1];
    610 
    611 		/* NOTE: we have to hard code delay of 6 above, since
    612 		 * we want to insert the nop's before constructing the
    613 		 * branch.  Throw in an assert so we notice if this
    614 		 * ever breaks on future generation:
    615 		 */
    616 		debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6);
    617 
    618 		br = ir3_BR(block);
    619 		br->cat0.target = block->successors[0];
    620 
    621 	} else if (block->successors[0]) {
    622 		/* otherwise unconditional jump to next block: */
    623 		struct ir3_instruction *jmp;
    624 
    625 		jmp = ir3_JUMP(block);
    626 		jmp->cat0.target = block->successors[0];
    627 	}
    628 
    629 	/* NOTE: if we kept track of the predecessors, we could do a better
    630 	 * job w/ (jp) flags.. every node w/ > predecessor is a join point.
    631 	 * Note that as we eliminate blocks which contain only an unconditional
    632 	 * jump we probably need to propagate (jp) flag..
    633 	 */
    634 }
    635 
    636 /* this is needed to ensure later RA stage succeeds: */
    637 static void
    638 sched_insert_parallel_copies(struct ir3_block *block)
    639 {
    640 	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
    641 		if (instr->opc == OPC_META_PHI) {
    642 			struct ir3_register *reg, *reg2;
    643 			foreach_src(reg, instr) {
    644 				struct ir3_instruction *src = reg->instr;
    645 				struct ir3_instruction *mov = NULL;
    646 
    647 				/* after CP we could end up w/ duplicate phi srcs: */
    648 				foreach_src(reg2, instr) {
    649 					if (reg == reg2)
    650 						break;
    651 					/* reg2 is before reg1 so already an inserted mov: */
    652 					else if (reg2->instr->regs[1]->instr == src) {
    653 						mov = reg2->instr;
    654 						break;
    655 					}
    656 				}
    657 
    658 				if (!mov) {
    659 					mov = ir3_MOV(src->block, src, TYPE_U32);
    660 					mov->regs[0]->flags |= IR3_REG_PHI_SRC;
    661 					mov->regs[0]->instr = instr;
    662 				}
    663 
    664 				reg->instr = mov;
    665 			}
    666 		}
    667 	}
    668 }
    669 
    670 int ir3_sched(struct ir3 *ir)
    671 {
    672 	struct ir3_sched_ctx ctx = {0};
    673 	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
    674 		sched_insert_parallel_copies(block);
    675 	}
    676 	ir3_clear_mark(ir);
    677 	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
    678 		sched_block(&ctx, block);
    679 	}
    680 	if (ctx.error)
    681 		return -1;
    682 	return 0;
    683 }
    684 
    685 /* does instruction 'prior' need to be scheduled before 'instr'? */
    686 static bool
    687 depends_on(struct ir3_instruction *instr, struct ir3_instruction *prior)
    688 {
    689 	/* TODO for dependencies that are related to a specific object, ie
    690 	 * a specific SSBO/image/array, we could relax this constraint to
    691 	 * make accesses to unrelated objects not depend on each other (at
    692 	 * least as long as not declared coherent)
    693 	 */
    694 	if (((instr->barrier_class & IR3_BARRIER_EVERYTHING) && prior->barrier_class) ||
    695 			((prior->barrier_class & IR3_BARRIER_EVERYTHING) && instr->barrier_class))
    696 		return true;
    697 	return !!(instr->barrier_class & prior->barrier_conflict);
    698 }
    699 
    700 static void
    701 add_barrier_deps(struct ir3_block *block, struct ir3_instruction *instr)
    702 {
    703 	struct list_head *prev = instr->node.prev;
    704 	struct list_head *next = instr->node.next;
    705 
    706 	/* add dependencies on previous instructions that must be scheduled
    707 	 * prior to the current instruction
    708 	 */
    709 	while (prev != &block->instr_list) {
    710 		struct ir3_instruction *pi =
    711 			LIST_ENTRY(struct ir3_instruction, prev, node);
    712 
    713 		prev = prev->prev;
    714 
    715 		if (is_meta(pi))
    716 			continue;
    717 
    718 		if (instr->barrier_class == pi->barrier_class) {
    719 			ir3_instr_add_dep(instr, pi);
    720 			break;
    721 		}
    722 
    723 		if (depends_on(instr, pi))
    724 			ir3_instr_add_dep(instr, pi);
    725 	}
    726 
    727 	/* add dependencies on this instruction to following instructions
    728 	 * that must be scheduled after the current instruction:
    729 	 */
    730 	while (next != &block->instr_list) {
    731 		struct ir3_instruction *ni =
    732 			LIST_ENTRY(struct ir3_instruction, next, node);
    733 
    734 		next = next->next;
    735 
    736 		if (is_meta(ni))
    737 			continue;
    738 
    739 		if (instr->barrier_class == ni->barrier_class) {
    740 			ir3_instr_add_dep(ni, instr);
    741 			break;
    742 		}
    743 
    744 		if (depends_on(ni, instr))
    745 			ir3_instr_add_dep(ni, instr);
    746 	}
    747 }
    748 
    749 /* before scheduling a block, we need to add any necessary false-dependencies
    750  * to ensure that:
    751  *
    752  *  (1) barriers are scheduled in the right order wrt instructions related
    753  *      to the barrier
    754  *
    755  *  (2) reads that come before a write actually get scheduled before the
    756  *      write
    757  */
    758 static void
    759 calculate_deps(struct ir3_block *block)
    760 {
    761 	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
    762 		if (instr->barrier_class) {
    763 			add_barrier_deps(block, instr);
    764 		}
    765 	}
    766 }
    767 
    768 void
    769 ir3_sched_add_deps(struct ir3 *ir)
    770 {
    771 	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
    772 		calculate_deps(block);
    773 	}
    774 }
    775