Home | History | Annotate | Download | only in ir3
      1 /*
      2  * Copyright  2014 Intel Corporation
      3  * Copyright  2015 Red Hat
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice (including the next
     13  * paragraph) shall be included in all copies or substantial portions of the
     14  * Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     22  * IN THE SOFTWARE.
     23  *
     24  * Authors:
     25  *    Jason Ekstrand (jason (at) jlekstrand.net)
     26  *    Rob Clark (robclark (at) freedesktop.org)
     27  *
     28  */
     29 
     30 #include "ir3_nir.h"
     31 #include "compiler/nir/nir_builder.h"
     32 #include "compiler/nir/nir_control_flow.h"
     33 
     34 /* Based on nir_opt_peephole_select, and hacked up to more aggressively
     35  * flatten anything that can be flattened
     36  *
     37  * This *might* be something that other drivers could use.  On the other
     38  * hand, I think most other hw has predicated instructions or similar
     39  * to select which side of if/else writes back result (and therefore
     40  * not having to assign unique registers to both sides of the if/else.
     41  * (And hopefully those drivers don't also have crazy scheduling reqs
     42  * and can more easily do this in their backend.)
     43  *
     44  * TODO eventually when we have proper flow control in the backend:
     45  *
     46  *  + Probably weight differently normal ALUs vs SFUs (cos/rcp/exp)
     47  *    since executing extra SFUs for the branch-not-taken path will
     48  *    generally be much more expensive.
     49  *
     50  *    Possibly what constitutes an ALU vs SFU differs between hw
     51  *    backends.. but that seems doubtful.
     52  *
     53  *  + Account for texture fetch and memory accesses (incl UBOs)
     54  *    since these will be more expensive..
     55  *
     56  *  + When if-condition is const (or uniform) or we have some way
     57  *    to know that all threads in the warp take the same branch
     58  *    then we should prefer to not flatten the if/else..
     59  */
     60 
     61 static bool
     62 valid_dest(nir_block *block, nir_dest *dest)
     63 {
     64 	/* It must be SSA */
     65 	if (!dest->is_ssa)
     66 		return false;
     67 
     68 	/* We only lower blocks that do not contain other blocks
     69 	 * (so this is run iteratively in a loop).  Therefore if
     70 	 * we get this far, it should not have any if_uses:
     71 	 */
     72 	assert(list_empty(&dest->ssa.if_uses));
     73 
     74 	/* The only uses of this definition must be phi's in the
     75 	 * successor or in the current block
     76 	 */
     77 	nir_foreach_use(use, &dest->ssa) {
     78 		nir_instr *dest_instr = use->parent_instr;
     79 		if (dest_instr->block == block)
     80 			continue;
     81 		if ((dest_instr->type == nir_instr_type_phi) &&
     82 				(dest_instr->block == block->successors[0]))
     83 			continue;
     84 		return false;
     85 	}
     86 
     87 	return true;
     88 }
     89 
     90 static bool
     91 block_check_for_allowed_instrs(nir_block *block)
     92 {
     93 	nir_foreach_instr(instr, block) {
     94 		switch (instr->type) {
     95 		case nir_instr_type_intrinsic: {
     96 			nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
     97 			const nir_intrinsic_info *info =
     98 					&nir_intrinsic_infos[intr->intrinsic];
     99 
    100 			switch (intr->intrinsic) {
    101 			case nir_intrinsic_discard_if:
    102 				/* to simplify things, we want discard_if src in ssa: */
    103 				if (!intr->src[0].is_ssa)
    104 					return false;
    105 				/* fallthrough */
    106 			case nir_intrinsic_discard:
    107 				/* discard/discard_if can be reordered, but only
    108 				 * with some special care
    109 				 */
    110 				break;
    111 			case nir_intrinsic_store_output:
    112 				/* TODO technically, if both if and else store
    113 				 * the same output, we can hoist that out to
    114 				 * the end of the block w/ a phi..
    115 				 * In practice, the tgsi shaders we already get
    116 				 * do this for us, so I think we don't need to
    117 				 */
    118 			default:
    119 				if (!(info->flags & NIR_INTRINSIC_CAN_REORDER))
    120 					return false;
    121 			}
    122 
    123 			break;
    124 		}
    125 
    126 		case nir_instr_type_tex: {
    127 			nir_tex_instr *tex = nir_instr_as_tex(instr);
    128 			if (!valid_dest(block, &tex->dest))
    129 				return false;
    130 			break;
    131 		}
    132 		case nir_instr_type_phi: {
    133 			nir_phi_instr *phi = nir_instr_as_phi(instr);
    134 			if (!valid_dest(block, &phi->dest))
    135 				return false;
    136 			break;
    137 		}
    138 		case nir_instr_type_alu: {
    139 			nir_alu_instr *alu = nir_instr_as_alu(instr);
    140 			if (!valid_dest(block, &alu->dest.dest))
    141 				return false;
    142 			break;
    143 		}
    144 
    145 		case nir_instr_type_load_const:
    146 		case nir_instr_type_ssa_undef:
    147 			break; /* always ssa dest */
    148 
    149 		default:
    150 			return false;
    151 		}
    152 	}
    153 
    154 	return true;
    155 }
    156 
    157 /* flatten an then or else block: */
    158 static void
    159 flatten_block(nir_builder *bld, nir_block *if_block, nir_block *prev_block,
    160 		nir_ssa_def *condition, bool invert)
    161 {
    162 	nir_foreach_instr_safe(instr, if_block) {
    163 		if (instr->type == nir_instr_type_intrinsic) {
    164 			nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
    165 			if ((intr->intrinsic == nir_intrinsic_discard) ||
    166 					(intr->intrinsic == nir_intrinsic_discard_if)) {
    167 				nir_ssa_def *discard_cond;
    168 
    169 				bld->cursor = nir_after_instr(
    170 						nir_block_last_instr(prev_block));
    171 
    172 				if (invert) {
    173 					condition = nir_inot(bld, condition);
    174 					invert = false;
    175 				}
    176 
    177 				if (intr->intrinsic == nir_intrinsic_discard) {
    178 					discard_cond = condition;
    179 				} else {
    180 					assert(intr->src[0].is_ssa);
    181 					/* discard_if gets re-written w/ src and'd: */
    182 					discard_cond = nir_iand(bld, condition, intr->src[0].ssa);
    183 				}
    184 
    185 				nir_intrinsic_instr *discard_if =
    186 						nir_intrinsic_instr_create(bld->shader,
    187 								nir_intrinsic_discard_if);
    188 				discard_if->src[0] = nir_src_for_ssa(discard_cond);
    189 
    190 				nir_instr_insert_after(nir_block_last_instr(prev_block),
    191 						&discard_if->instr);
    192 				nir_instr_remove(instr);
    193 				instr = NULL;
    194 			}
    195 		}
    196 		/* if not an handled specially, just move to prev block: */
    197 		if (instr) {
    198 			/* NOTE: exec_node_remove() is safe here (vs nir_instr_remove()
    199 			 * since we are re-adding the instructin back in to the prev
    200 			 * block (so no dangling SSA uses)
    201 			 */
    202 			exec_node_remove(&instr->node);
    203 			instr->block = prev_block;
    204 			exec_list_push_tail(&prev_block->instr_list, &instr->node);
    205 		}
    206 	}
    207 }
    208 
    209 static bool
    210 lower_if_else_block(nir_block *block, nir_builder *b, void *mem_ctx)
    211 {
    212 	/* If the block is empty, then it certainly doesn't have any phi nodes,
    213 	 * so we can skip it.  This also ensures that we do an early skip on the
    214 	 * end block of the function which isn't actually attached to the CFG.
    215 	 */
    216 	if (exec_list_is_empty(&block->instr_list))
    217 		return false;
    218 
    219 	if (nir_cf_node_is_first(&block->cf_node))
    220 		return false;
    221 
    222 	nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
    223 	if (prev_node->type != nir_cf_node_if)
    224 		return false;
    225 
    226 	nir_if *if_stmt = nir_cf_node_as_if(prev_node);
    227 	nir_block *then_block = nir_if_first_then_block(if_stmt);
    228 	nir_block *else_block = nir_if_first_else_block(if_stmt);
    229 
    230 	/* We can only have one block in each side ... */
    231 	if (nir_if_last_then_block(if_stmt) != then_block ||
    232 			nir_if_last_else_block(if_stmt) != else_block)
    233 		return false;
    234 
    235 	/* ... and those blocks must only contain "allowed" instructions. */
    236 	if (!block_check_for_allowed_instrs(then_block) ||
    237 			!block_check_for_allowed_instrs(else_block))
    238 		return false;
    239 
    240 	/* condition should be ssa too, which simplifies flatten_block: */
    241 	if (!if_stmt->condition.is_ssa)
    242 		return false;
    243 
    244 	/* At this point, we know that the previous CFG node is an if-then
    245 	 * statement containing only moves to phi nodes in this block.  We can
    246 	 * just remove that entire CF node and replace all of the phi nodes with
    247 	 * selects.
    248 	 */
    249 
    250 	nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node));
    251 	assert(prev_block->cf_node.type == nir_cf_node_block);
    252 
    253 	/* First, we move the remaining instructions from the blocks to the
    254 	 * block before.  There are a few things that need handling specially
    255 	 * like discard/discard_if.
    256 	 */
    257 	flatten_block(b, then_block, prev_block,
    258 			if_stmt->condition.ssa, false);
    259 	flatten_block(b, else_block, prev_block,
    260 			if_stmt->condition.ssa, true);
    261 
    262 	nir_foreach_instr_safe(instr, block) {
    263 		if (instr->type != nir_instr_type_phi)
    264 			break;
    265 
    266 		nir_phi_instr *phi = nir_instr_as_phi(instr);
    267 		nir_alu_instr *sel = nir_alu_instr_create(mem_ctx, nir_op_bcsel);
    268 		nir_src_copy(&sel->src[0].src, &if_stmt->condition, mem_ctx);
    269 		/* Splat the condition to all channels */
    270 		memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
    271 
    272 		assert(exec_list_length(&phi->srcs) == 2);
    273 		nir_foreach_phi_src(src, phi) {
    274 			assert(src->pred == then_block || src->pred == else_block);
    275 			assert(src->src.is_ssa);
    276 
    277 			unsigned idx = src->pred == then_block ? 1 : 2;
    278 			nir_src_copy(&sel->src[idx].src, &src->src, mem_ctx);
    279 		}
    280 
    281 		nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
    282 				phi->dest.ssa.num_components, 32, phi->dest.ssa.name);
    283 		sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
    284 
    285 		nir_ssa_def_rewrite_uses(&phi->dest.ssa,
    286 				nir_src_for_ssa(&sel->dest.dest.ssa));
    287 
    288 		nir_instr_insert_before(&phi->instr, &sel->instr);
    289 		nir_instr_remove(&phi->instr);
    290 	}
    291 
    292 	nir_cf_node_remove(&if_stmt->cf_node);
    293 	return true;
    294 }
    295 
    296 static bool
    297 lower_if_else_impl(nir_function_impl *impl)
    298 {
    299 	void *mem_ctx = ralloc_parent(impl);
    300 	nir_builder b;
    301 	nir_builder_init(&b, impl);
    302 
    303 	bool progress = false;
    304 	nir_foreach_block_safe(block, impl) {
    305 		progress |= lower_if_else_block(block, &b, mem_ctx);
    306 	}
    307 
    308 	if (progress)
    309 		nir_metadata_preserve(impl, nir_metadata_none);
    310 
    311 	return progress;
    312 }
    313 
    314 bool
    315 ir3_nir_lower_if_else(nir_shader *shader)
    316 {
    317 	bool progress = false;
    318 
    319 	nir_foreach_function(function, shader) {
    320 		if (function->impl)
    321 			progress |= lower_if_else_impl(function->impl);
    322 	}
    323 
    324 	return progress;
    325 }
    326