1 /* 2 * Copyright 2015 Red Hat 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "nir.h" 25 #include "nir_control_flow.h" 26 27 /* Secret Decoder Ring: 28 * clone_foo(): 29 * Allocate and clone a foo. 30 * __clone_foo(): 31 * Clone body of foo (ie. parent class, embedded struct, etc) 32 */ 33 34 typedef struct { 35 /* True if we are cloning an entire shader. */ 36 bool global_clone; 37 38 /* If true allows the clone operation to fall back to the original pointer 39 * if no clone pointer is found in the remap table. This allows us to 40 * clone a loop body without having to add srcs from outside the loop to 41 * the remap table. This is useful for loop unrolling. 42 */ 43 bool allow_remap_fallback; 44 45 /* maps orig ptr -> cloned ptr: */ 46 struct hash_table *remap_table; 47 48 /* List of phi sources. */ 49 struct list_head phi_srcs; 50 51 /* new shader object, used as memctx for just about everything else: */ 52 nir_shader *ns; 53 } clone_state; 54 55 static void 56 init_clone_state(clone_state *state, struct hash_table *remap_table, 57 bool global, bool allow_remap_fallback) 58 { 59 state->global_clone = global; 60 state->allow_remap_fallback = allow_remap_fallback; 61 62 if (remap_table) { 63 state->remap_table = remap_table; 64 } else { 65 state->remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 66 _mesa_key_pointer_equal); 67 } 68 69 list_inithead(&state->phi_srcs); 70 } 71 72 static void 73 free_clone_state(clone_state *state) 74 { 75 _mesa_hash_table_destroy(state->remap_table, NULL); 76 } 77 78 static inline void * 79 _lookup_ptr(clone_state *state, const void *ptr, bool global) 80 { 81 struct hash_entry *entry; 82 83 if (!ptr) 84 return NULL; 85 86 if (!state->global_clone && global) 87 return (void *)ptr; 88 89 entry = _mesa_hash_table_search(state->remap_table, ptr); 90 if (!entry) { 91 assert(state->allow_remap_fallback); 92 return (void *)ptr; 93 } 94 95 return entry->data; 96 } 97 98 static void 99 add_remap(clone_state *state, void *nptr, const void *ptr) 100 { 101 _mesa_hash_table_insert(state->remap_table, ptr, nptr); 102 } 103 104 static void * 105 remap_local(clone_state *state, const void *ptr) 106 { 107 return _lookup_ptr(state, ptr, false); 108 } 109 110 static void * 111 remap_global(clone_state *state, const void *ptr) 112 { 113 return _lookup_ptr(state, ptr, true); 114 } 115 116 static nir_register * 117 remap_reg(clone_state *state, const nir_register *reg) 118 { 119 return _lookup_ptr(state, reg, reg->is_global); 120 } 121 122 static nir_variable * 123 remap_var(clone_state *state, const nir_variable *var) 124 { 125 return _lookup_ptr(state, var, nir_variable_is_global(var)); 126 } 127 128 nir_constant * 129 nir_constant_clone(const nir_constant *c, nir_variable *nvar) 130 { 131 nir_constant *nc = ralloc(nvar, nir_constant); 132 133 memcpy(nc->values, c->values, sizeof(nc->values)); 134 nc->num_elements = c->num_elements; 135 nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements); 136 for (unsigned i = 0; i < c->num_elements; i++) { 137 nc->elements[i] = nir_constant_clone(c->elements[i], nvar); 138 } 139 140 return nc; 141 } 142 143 /* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid 144 * having to deal with locals and globals separately: 145 */ 146 nir_variable * 147 nir_variable_clone(const nir_variable *var, nir_shader *shader) 148 { 149 nir_variable *nvar = rzalloc(shader, nir_variable); 150 151 nvar->type = var->type; 152 nvar->name = ralloc_strdup(nvar, var->name); 153 nvar->data = var->data; 154 nvar->num_state_slots = var->num_state_slots; 155 nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots); 156 memcpy(nvar->state_slots, var->state_slots, 157 var->num_state_slots * sizeof(nir_state_slot)); 158 if (var->constant_initializer) { 159 nvar->constant_initializer = 160 nir_constant_clone(var->constant_initializer, nvar); 161 } 162 nvar->interface_type = var->interface_type; 163 164 return nvar; 165 } 166 167 static nir_variable * 168 clone_variable(clone_state *state, const nir_variable *var) 169 { 170 nir_variable *nvar = nir_variable_clone(var, state->ns); 171 add_remap(state, nvar, var); 172 173 return nvar; 174 } 175 176 /* clone list of nir_variable: */ 177 static void 178 clone_var_list(clone_state *state, struct exec_list *dst, 179 const struct exec_list *list) 180 { 181 exec_list_make_empty(dst); 182 foreach_list_typed(nir_variable, var, node, list) { 183 nir_variable *nvar = clone_variable(state, var); 184 exec_list_push_tail(dst, &nvar->node); 185 } 186 } 187 188 /* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create() 189 * to avoid having to deal with locals and globals separately: 190 */ 191 static nir_register * 192 clone_register(clone_state *state, const nir_register *reg) 193 { 194 nir_register *nreg = rzalloc(state->ns, nir_register); 195 add_remap(state, nreg, reg); 196 197 nreg->num_components = reg->num_components; 198 nreg->bit_size = reg->bit_size; 199 nreg->num_array_elems = reg->num_array_elems; 200 nreg->index = reg->index; 201 nreg->name = ralloc_strdup(nreg, reg->name); 202 nreg->is_global = reg->is_global; 203 nreg->is_packed = reg->is_packed; 204 205 /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */ 206 list_inithead(&nreg->uses); 207 list_inithead(&nreg->defs); 208 list_inithead(&nreg->if_uses); 209 210 return nreg; 211 } 212 213 /* clone list of nir_register: */ 214 static void 215 clone_reg_list(clone_state *state, struct exec_list *dst, 216 const struct exec_list *list) 217 { 218 exec_list_make_empty(dst); 219 foreach_list_typed(nir_register, reg, node, list) { 220 nir_register *nreg = clone_register(state, reg); 221 exec_list_push_tail(dst, &nreg->node); 222 } 223 } 224 225 static void 226 __clone_src(clone_state *state, void *ninstr_or_if, 227 nir_src *nsrc, const nir_src *src) 228 { 229 nsrc->is_ssa = src->is_ssa; 230 if (src->is_ssa) { 231 nsrc->ssa = remap_local(state, src->ssa); 232 } else { 233 nsrc->reg.reg = remap_reg(state, src->reg.reg); 234 if (src->reg.indirect) { 235 nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src); 236 __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect); 237 } 238 nsrc->reg.base_offset = src->reg.base_offset; 239 } 240 } 241 242 static void 243 __clone_dst(clone_state *state, nir_instr *ninstr, 244 nir_dest *ndst, const nir_dest *dst) 245 { 246 ndst->is_ssa = dst->is_ssa; 247 if (dst->is_ssa) { 248 nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, 249 dst->ssa.bit_size, dst->ssa.name); 250 add_remap(state, &ndst->ssa, &dst->ssa); 251 } else { 252 ndst->reg.reg = remap_reg(state, dst->reg.reg); 253 if (dst->reg.indirect) { 254 ndst->reg.indirect = ralloc(ninstr, nir_src); 255 __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect); 256 } 257 ndst->reg.base_offset = dst->reg.base_offset; 258 } 259 } 260 261 static nir_deref *clone_deref(clone_state *state, const nir_deref *deref, 262 nir_instr *ninstr, nir_deref *parent); 263 264 static nir_deref_var * 265 clone_deref_var(clone_state *state, const nir_deref_var *dvar, 266 nir_instr *ninstr) 267 { 268 nir_variable *nvar = remap_var(state, dvar->var); 269 nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar); 270 271 if (dvar->deref.child) 272 ndvar->deref.child = clone_deref(state, dvar->deref.child, 273 ninstr, &ndvar->deref); 274 275 return ndvar; 276 } 277 278 static nir_deref_array * 279 clone_deref_array(clone_state *state, const nir_deref_array *darr, 280 nir_instr *ninstr, nir_deref *parent) 281 { 282 nir_deref_array *ndarr = nir_deref_array_create(parent); 283 284 ndarr->deref.type = darr->deref.type; 285 if (darr->deref.child) 286 ndarr->deref.child = clone_deref(state, darr->deref.child, 287 ninstr, &ndarr->deref); 288 289 ndarr->deref_array_type = darr->deref_array_type; 290 ndarr->base_offset = darr->base_offset; 291 if (ndarr->deref_array_type == nir_deref_array_type_indirect) 292 __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect); 293 294 return ndarr; 295 } 296 297 static nir_deref_struct * 298 clone_deref_struct(clone_state *state, const nir_deref_struct *dstr, 299 nir_instr *ninstr, nir_deref *parent) 300 { 301 nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index); 302 303 ndstr->deref.type = dstr->deref.type; 304 if (dstr->deref.child) 305 ndstr->deref.child = clone_deref(state, dstr->deref.child, 306 ninstr, &ndstr->deref); 307 308 return ndstr; 309 } 310 311 static nir_deref * 312 clone_deref(clone_state *state, const nir_deref *dref, 313 nir_instr *ninstr, nir_deref *parent) 314 { 315 switch (dref->deref_type) { 316 case nir_deref_type_array: 317 return &clone_deref_array(state, nir_deref_as_array(dref), 318 ninstr, parent)->deref; 319 case nir_deref_type_struct: 320 return &clone_deref_struct(state, nir_deref_as_struct(dref), 321 ninstr, parent)->deref; 322 default: 323 unreachable("bad deref type"); 324 return NULL; 325 } 326 } 327 328 static nir_alu_instr * 329 clone_alu(clone_state *state, const nir_alu_instr *alu) 330 { 331 nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op); 332 nalu->exact = alu->exact; 333 334 __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest); 335 nalu->dest.saturate = alu->dest.saturate; 336 nalu->dest.write_mask = alu->dest.write_mask; 337 338 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 339 __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src); 340 nalu->src[i].negate = alu->src[i].negate; 341 nalu->src[i].abs = alu->src[i].abs; 342 memcpy(nalu->src[i].swizzle, alu->src[i].swizzle, 343 sizeof(nalu->src[i].swizzle)); 344 } 345 346 return nalu; 347 } 348 349 static nir_intrinsic_instr * 350 clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr) 351 { 352 nir_intrinsic_instr *nitr = 353 nir_intrinsic_instr_create(state->ns, itr->intrinsic); 354 355 unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables; 356 unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs; 357 358 if (nir_intrinsic_infos[itr->intrinsic].has_dest) 359 __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest); 360 361 nitr->num_components = itr->num_components; 362 memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index)); 363 364 for (unsigned i = 0; i < num_variables; i++) { 365 nitr->variables[i] = clone_deref_var(state, itr->variables[i], 366 &nitr->instr); 367 } 368 369 for (unsigned i = 0; i < num_srcs; i++) 370 __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]); 371 372 return nitr; 373 } 374 375 static nir_load_const_instr * 376 clone_load_const(clone_state *state, const nir_load_const_instr *lc) 377 { 378 nir_load_const_instr *nlc = 379 nir_load_const_instr_create(state->ns, lc->def.num_components, 380 lc->def.bit_size); 381 382 memcpy(&nlc->value, &lc->value, sizeof(nlc->value)); 383 384 add_remap(state, &nlc->def, &lc->def); 385 386 return nlc; 387 } 388 389 static nir_ssa_undef_instr * 390 clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa) 391 { 392 nir_ssa_undef_instr *nsa = 393 nir_ssa_undef_instr_create(state->ns, sa->def.num_components, 394 sa->def.bit_size); 395 396 add_remap(state, &nsa->def, &sa->def); 397 398 return nsa; 399 } 400 401 static nir_tex_instr * 402 clone_tex(clone_state *state, const nir_tex_instr *tex) 403 { 404 nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs); 405 406 ntex->sampler_dim = tex->sampler_dim; 407 ntex->dest_type = tex->dest_type; 408 ntex->op = tex->op; 409 __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest); 410 for (unsigned i = 0; i < ntex->num_srcs; i++) { 411 ntex->src[i].src_type = tex->src[i].src_type; 412 __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src); 413 } 414 ntex->coord_components = tex->coord_components; 415 ntex->is_array = tex->is_array; 416 ntex->is_shadow = tex->is_shadow; 417 ntex->is_new_style_shadow = tex->is_new_style_shadow; 418 ntex->component = tex->component; 419 420 ntex->texture_index = tex->texture_index; 421 if (tex->texture) 422 ntex->texture = clone_deref_var(state, tex->texture, &ntex->instr); 423 ntex->texture_array_size = tex->texture_array_size; 424 425 ntex->sampler_index = tex->sampler_index; 426 if (tex->sampler) 427 ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr); 428 429 return ntex; 430 } 431 432 static nir_phi_instr * 433 clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk) 434 { 435 nir_phi_instr *nphi = nir_phi_instr_create(state->ns); 436 437 __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest); 438 439 /* Cloning a phi node is a bit different from other instructions. The 440 * sources of phi instructions are the only time where we can use an SSA 441 * def before it is defined. In order to handle this, we just copy over 442 * the sources from the old phi instruction directly and then fix them up 443 * in a second pass once all the instrutions in the function have been 444 * properly cloned. 445 * 446 * In order to ensure that the copied sources (which are the same as the 447 * old phi instruction's sources for now) don't get inserted into the old 448 * shader's use-def lists, we have to add the phi instruction *before* we 449 * set up its sources. 450 */ 451 nir_instr_insert_after_block(nblk, &nphi->instr); 452 453 foreach_list_typed(nir_phi_src, src, node, &phi->srcs) { 454 nir_phi_src *nsrc = ralloc(nphi, nir_phi_src); 455 456 /* Just copy the old source for now. */ 457 memcpy(nsrc, src, sizeof(*src)); 458 459 /* Since we're not letting nir_insert_instr handle use/def stuff for us, 460 * we have to set the parent_instr manually. It doesn't really matter 461 * when we do it, so we might as well do it here. 462 */ 463 nsrc->src.parent_instr = &nphi->instr; 464 465 /* Stash it in the list of phi sources. We'll walk this list and fix up 466 * sources at the very end of clone_function_impl. 467 */ 468 list_add(&nsrc->src.use_link, &state->phi_srcs); 469 470 exec_list_push_tail(&nphi->srcs, &nsrc->node); 471 } 472 473 return nphi; 474 } 475 476 static nir_jump_instr * 477 clone_jump(clone_state *state, const nir_jump_instr *jmp) 478 { 479 nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type); 480 481 return njmp; 482 } 483 484 static nir_call_instr * 485 clone_call(clone_state *state, const nir_call_instr *call) 486 { 487 nir_function *ncallee = remap_global(state, call->callee); 488 nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee); 489 490 for (unsigned i = 0; i < ncall->num_params; i++) 491 ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr); 492 493 ncall->return_deref = clone_deref_var(state, call->return_deref, 494 &ncall->instr); 495 496 return ncall; 497 } 498 499 static nir_instr * 500 clone_instr(clone_state *state, const nir_instr *instr) 501 { 502 switch (instr->type) { 503 case nir_instr_type_alu: 504 return &clone_alu(state, nir_instr_as_alu(instr))->instr; 505 case nir_instr_type_intrinsic: 506 return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr; 507 case nir_instr_type_load_const: 508 return &clone_load_const(state, nir_instr_as_load_const(instr))->instr; 509 case nir_instr_type_ssa_undef: 510 return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr; 511 case nir_instr_type_tex: 512 return &clone_tex(state, nir_instr_as_tex(instr))->instr; 513 case nir_instr_type_phi: 514 unreachable("Cannot clone phis with clone_instr"); 515 case nir_instr_type_jump: 516 return &clone_jump(state, nir_instr_as_jump(instr))->instr; 517 case nir_instr_type_call: 518 return &clone_call(state, nir_instr_as_call(instr))->instr; 519 case nir_instr_type_parallel_copy: 520 unreachable("Cannot clone parallel copies"); 521 default: 522 unreachable("bad instr type"); 523 return NULL; 524 } 525 } 526 527 static nir_block * 528 clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk) 529 { 530 /* Don't actually create a new block. Just use the one from the tail of 531 * the list. NIR guarantees that the tail of the list is a block and that 532 * no two blocks are side-by-side in the IR; It should be empty. 533 */ 534 nir_block *nblk = 535 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); 536 assert(nblk->cf_node.type == nir_cf_node_block); 537 assert(exec_list_is_empty(&nblk->instr_list)); 538 539 /* We need this for phi sources */ 540 add_remap(state, nblk, blk); 541 542 nir_foreach_instr(instr, blk) { 543 if (instr->type == nir_instr_type_phi) { 544 /* Phi instructions are a bit of a special case when cloning because 545 * we don't want inserting the instruction to automatically handle 546 * use/defs for us. Instead, we need to wait until all the 547 * blocks/instructions are in so that we can set their sources up. 548 */ 549 clone_phi(state, nir_instr_as_phi(instr), nblk); 550 } else { 551 nir_instr *ninstr = clone_instr(state, instr); 552 nir_instr_insert_after_block(nblk, ninstr); 553 } 554 } 555 556 return nblk; 557 } 558 559 static void 560 clone_cf_list(clone_state *state, struct exec_list *dst, 561 const struct exec_list *list); 562 563 static nir_if * 564 clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i) 565 { 566 nir_if *ni = nir_if_create(state->ns); 567 568 __clone_src(state, ni, &ni->condition, &i->condition); 569 570 nir_cf_node_insert_end(cf_list, &ni->cf_node); 571 572 clone_cf_list(state, &ni->then_list, &i->then_list); 573 clone_cf_list(state, &ni->else_list, &i->else_list); 574 575 return ni; 576 } 577 578 static nir_loop * 579 clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop) 580 { 581 nir_loop *nloop = nir_loop_create(state->ns); 582 583 nir_cf_node_insert_end(cf_list, &nloop->cf_node); 584 585 clone_cf_list(state, &nloop->body, &loop->body); 586 587 return nloop; 588 } 589 590 /* clone list of nir_cf_node: */ 591 static void 592 clone_cf_list(clone_state *state, struct exec_list *dst, 593 const struct exec_list *list) 594 { 595 foreach_list_typed(nir_cf_node, cf, node, list) { 596 switch (cf->type) { 597 case nir_cf_node_block: 598 clone_block(state, dst, nir_cf_node_as_block(cf)); 599 break; 600 case nir_cf_node_if: 601 clone_if(state, dst, nir_cf_node_as_if(cf)); 602 break; 603 case nir_cf_node_loop: 604 clone_loop(state, dst, nir_cf_node_as_loop(cf)); 605 break; 606 default: 607 unreachable("bad cf type"); 608 } 609 } 610 } 611 612 /* After we've cloned almost everything, we have to walk the list of phi 613 * sources and fix them up. Thanks to loops, the block and SSA value for a 614 * phi source may not be defined when we first encounter it. Instead, we 615 * add it to the phi_srcs list and we fix it up here. 616 */ 617 static void 618 fixup_phi_srcs(clone_state *state) 619 { 620 list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) { 621 src->pred = remap_local(state, src->pred); 622 623 /* Remove from this list */ 624 list_del(&src->src.use_link); 625 626 if (src->src.is_ssa) { 627 src->src.ssa = remap_local(state, src->src.ssa); 628 list_addtail(&src->src.use_link, &src->src.ssa->uses); 629 } else { 630 src->src.reg.reg = remap_reg(state, src->src.reg.reg); 631 list_addtail(&src->src.use_link, &src->src.reg.reg->uses); 632 } 633 } 634 assert(list_empty(&state->phi_srcs)); 635 } 636 637 void 638 nir_cf_list_clone(nir_cf_list *dst, nir_cf_list *src, nir_cf_node *parent, 639 struct hash_table *remap_table) 640 { 641 exec_list_make_empty(&dst->list); 642 dst->impl = src->impl; 643 644 if (exec_list_is_empty(&src->list)) 645 return; 646 647 clone_state state; 648 init_clone_state(&state, remap_table, false, true); 649 650 /* We use the same shader */ 651 state.ns = src->impl->function->shader; 652 653 /* The control-flow code assumes that the list of cf_nodes always starts 654 * and ends with a block. We start by adding an empty block. 655 */ 656 nir_block *nblk = nir_block_create(state.ns); 657 nblk->cf_node.parent = parent; 658 exec_list_push_tail(&dst->list, &nblk->cf_node.node); 659 660 clone_cf_list(&state, &dst->list, &src->list); 661 662 fixup_phi_srcs(&state); 663 } 664 665 static nir_function_impl * 666 clone_function_impl(clone_state *state, const nir_function_impl *fi) 667 { 668 nir_function_impl *nfi = nir_function_impl_create_bare(state->ns); 669 670 clone_var_list(state, &nfi->locals, &fi->locals); 671 clone_reg_list(state, &nfi->registers, &fi->registers); 672 nfi->reg_alloc = fi->reg_alloc; 673 674 nfi->num_params = fi->num_params; 675 nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params); 676 for (unsigned i = 0; i < fi->num_params; i++) { 677 nfi->params[i] = clone_variable(state, fi->params[i]); 678 } 679 if (fi->return_var) 680 nfi->return_var = clone_variable(state, fi->return_var); 681 682 assert(list_empty(&state->phi_srcs)); 683 684 clone_cf_list(state, &nfi->body, &fi->body); 685 686 fixup_phi_srcs(state); 687 688 /* All metadata is invalidated in the cloning process */ 689 nfi->valid_metadata = 0; 690 691 return nfi; 692 } 693 694 nir_function_impl * 695 nir_function_impl_clone(const nir_function_impl *fi) 696 { 697 clone_state state; 698 init_clone_state(&state, NULL, false, false); 699 700 /* We use the same shader */ 701 state.ns = fi->function->shader; 702 703 nir_function_impl *nfi = clone_function_impl(&state, fi); 704 705 free_clone_state(&state); 706 707 return nfi; 708 } 709 710 static nir_function * 711 clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns) 712 { 713 assert(ns == state->ns); 714 nir_function *nfxn = nir_function_create(ns, fxn->name); 715 716 /* Needed for call instructions */ 717 add_remap(state, nfxn, fxn); 718 719 nfxn->num_params = fxn->num_params; 720 nfxn->params = ralloc_array(state->ns, nir_parameter, fxn->num_params); 721 memcpy(nfxn->params, fxn->params, sizeof(nir_parameter) * fxn->num_params); 722 723 nfxn->return_type = fxn->return_type; 724 725 /* At first glance, it looks like we should clone the function_impl here. 726 * However, call instructions need to be able to reference at least the 727 * function and those will get processed as we clone the function_impl's. 728 * We stop here and do function_impls as a second pass. 729 */ 730 731 return nfxn; 732 } 733 734 nir_shader * 735 nir_shader_clone(void *mem_ctx, const nir_shader *s) 736 { 737 clone_state state; 738 init_clone_state(&state, NULL, true, false); 739 740 nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options, NULL); 741 state.ns = ns; 742 743 clone_var_list(&state, &ns->uniforms, &s->uniforms); 744 clone_var_list(&state, &ns->inputs, &s->inputs); 745 clone_var_list(&state, &ns->outputs, &s->outputs); 746 clone_var_list(&state, &ns->shared, &s->shared); 747 clone_var_list(&state, &ns->globals, &s->globals); 748 clone_var_list(&state, &ns->system_values, &s->system_values); 749 750 /* Go through and clone functions */ 751 foreach_list_typed(nir_function, fxn, node, &s->functions) 752 clone_function(&state, fxn, ns); 753 754 /* Only after all functions are cloned can we clone the actual function 755 * implementations. This is because nir_call_instr's need to reference the 756 * functions of other functions and we don't know what order the functions 757 * will have in the list. 758 */ 759 nir_foreach_function(fxn, s) { 760 nir_function *nfxn = remap_global(&state, fxn); 761 nfxn->impl = clone_function_impl(&state, fxn->impl); 762 nfxn->impl->function = nfxn; 763 } 764 765 clone_reg_list(&state, &ns->registers, &s->registers); 766 ns->reg_alloc = s->reg_alloc; 767 768 *ns->info = *s->info; 769 ns->info->name = ralloc_strdup(ns, ns->info->name); 770 if (ns->info->label) 771 ns->info->label = ralloc_strdup(ns, ns->info->label); 772 773 ns->num_inputs = s->num_inputs; 774 ns->num_uniforms = s->num_uniforms; 775 ns->num_outputs = s->num_outputs; 776 ns->num_shared = s->num_shared; 777 778 free_clone_state(&state); 779 780 return ns; 781 } 782