1 /* 2 * Copyright 2014 Connor Abbott 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "nir_instr_set.h" 25 #include "nir_vla.h" 26 27 #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data)) 28 29 static uint32_t 30 hash_src(uint32_t hash, const nir_src *src) 31 { 32 assert(src->is_ssa); 33 hash = HASH(hash, src->ssa); 34 return hash; 35 } 36 37 static uint32_t 38 hash_alu_src(uint32_t hash, const nir_alu_src *src, unsigned num_components) 39 { 40 hash = HASH(hash, src->abs); 41 hash = HASH(hash, src->negate); 42 43 for (unsigned i = 0; i < num_components; i++) 44 hash = HASH(hash, src->swizzle[i]); 45 46 hash = hash_src(hash, &src->src); 47 return hash; 48 } 49 50 static uint32_t 51 hash_alu(uint32_t hash, const nir_alu_instr *instr) 52 { 53 hash = HASH(hash, instr->op); 54 hash = HASH(hash, instr->dest.dest.ssa.num_components); 55 hash = HASH(hash, instr->dest.dest.ssa.bit_size); 56 /* We explicitly don't hash instr->dest.dest.exact */ 57 58 if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { 59 assert(nir_op_infos[instr->op].num_inputs == 2); 60 uint32_t hash0 = hash_alu_src(hash, &instr->src[0], 61 nir_ssa_alu_instr_src_components(instr, 0)); 62 uint32_t hash1 = hash_alu_src(hash, &instr->src[1], 63 nir_ssa_alu_instr_src_components(instr, 1)); 64 /* For commutative operations, we need some commutative way of 65 * combining the hashes. One option would be to XOR them but that 66 * means that anything with two identical sources will hash to 0 and 67 * that's common enough we probably don't want the guaranteed 68 * collision. Either addition or multiplication will also work. 69 */ 70 hash = hash0 * hash1; 71 } else { 72 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { 73 hash = hash_alu_src(hash, &instr->src[i], 74 nir_ssa_alu_instr_src_components(instr, i)); 75 } 76 } 77 78 return hash; 79 } 80 81 static uint32_t 82 hash_load_const(uint32_t hash, const nir_load_const_instr *instr) 83 { 84 hash = HASH(hash, instr->def.num_components); 85 86 unsigned size = instr->def.num_components * (instr->def.bit_size / 8); 87 hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f32, size); 88 89 return hash; 90 } 91 92 static int 93 cmp_phi_src(const void *data1, const void *data2) 94 { 95 nir_phi_src *src1 = *(nir_phi_src **)data1; 96 nir_phi_src *src2 = *(nir_phi_src **)data2; 97 return src1->pred - src2->pred; 98 } 99 100 static uint32_t 101 hash_phi(uint32_t hash, const nir_phi_instr *instr) 102 { 103 hash = HASH(hash, instr->instr.block); 104 105 /* sort sources by predecessor, since the order shouldn't matter */ 106 unsigned num_preds = instr->instr.block->predecessors->entries; 107 NIR_VLA(nir_phi_src *, srcs, num_preds); 108 unsigned i = 0; 109 nir_foreach_phi_src(src, instr) { 110 srcs[i++] = src; 111 } 112 113 qsort(srcs, num_preds, sizeof(nir_phi_src *), cmp_phi_src); 114 115 for (i = 0; i < num_preds; i++) { 116 hash = hash_src(hash, &srcs[i]->src); 117 hash = HASH(hash, srcs[i]->pred); 118 } 119 120 return hash; 121 } 122 123 static uint32_t 124 hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr) 125 { 126 const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; 127 hash = HASH(hash, instr->intrinsic); 128 129 if (info->has_dest) { 130 hash = HASH(hash, instr->dest.ssa.num_components); 131 hash = HASH(hash, instr->dest.ssa.bit_size); 132 } 133 134 assert(info->num_variables == 0); 135 136 hash = _mesa_fnv32_1a_accumulate_block(hash, instr->const_index, 137 info->num_indices 138 * sizeof(instr->const_index[0])); 139 return hash; 140 } 141 142 static uint32_t 143 hash_tex(uint32_t hash, const nir_tex_instr *instr) 144 { 145 hash = HASH(hash, instr->op); 146 hash = HASH(hash, instr->num_srcs); 147 148 for (unsigned i = 0; i < instr->num_srcs; i++) { 149 hash = HASH(hash, instr->src[i].src_type); 150 hash = hash_src(hash, &instr->src[i].src); 151 } 152 153 hash = HASH(hash, instr->coord_components); 154 hash = HASH(hash, instr->sampler_dim); 155 hash = HASH(hash, instr->is_array); 156 hash = HASH(hash, instr->is_shadow); 157 hash = HASH(hash, instr->is_new_style_shadow); 158 unsigned component = instr->component; 159 hash = HASH(hash, component); 160 hash = HASH(hash, instr->texture_index); 161 hash = HASH(hash, instr->texture_array_size); 162 hash = HASH(hash, instr->sampler_index); 163 164 assert(!instr->texture && !instr->sampler); 165 166 return hash; 167 } 168 169 /* Computes a hash of an instruction for use in a hash table. Note that this 170 * will only work for instructions where instr_can_rewrite() returns true, and 171 * it should return identical hashes for two instructions that are the same 172 * according nir_instrs_equal(). 173 */ 174 175 static uint32_t 176 hash_instr(const void *data) 177 { 178 const nir_instr *instr = data; 179 uint32_t hash = _mesa_fnv32_1a_offset_bias; 180 181 switch (instr->type) { 182 case nir_instr_type_alu: 183 hash = hash_alu(hash, nir_instr_as_alu(instr)); 184 break; 185 case nir_instr_type_load_const: 186 hash = hash_load_const(hash, nir_instr_as_load_const(instr)); 187 break; 188 case nir_instr_type_phi: 189 hash = hash_phi(hash, nir_instr_as_phi(instr)); 190 break; 191 case nir_instr_type_intrinsic: 192 hash = hash_intrinsic(hash, nir_instr_as_intrinsic(instr)); 193 break; 194 case nir_instr_type_tex: 195 hash = hash_tex(hash, nir_instr_as_tex(instr)); 196 break; 197 default: 198 unreachable("Invalid instruction type"); 199 } 200 201 return hash; 202 } 203 204 bool 205 nir_srcs_equal(nir_src src1, nir_src src2) 206 { 207 if (src1.is_ssa) { 208 if (src2.is_ssa) { 209 return src1.ssa == src2.ssa; 210 } else { 211 return false; 212 } 213 } else { 214 if (src2.is_ssa) { 215 return false; 216 } else { 217 if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL)) 218 return false; 219 220 if (src1.reg.indirect) { 221 if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect)) 222 return false; 223 } 224 225 return src1.reg.reg == src2.reg.reg && 226 src1.reg.base_offset == src2.reg.base_offset; 227 } 228 } 229 } 230 231 bool 232 nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, 233 unsigned src1, unsigned src2) 234 { 235 if (alu1->src[src1].abs != alu2->src[src2].abs || 236 alu1->src[src1].negate != alu2->src[src2].negate) 237 return false; 238 239 for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { 240 if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i]) 241 return false; 242 } 243 244 return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src); 245 } 246 247 /* Returns "true" if two instructions are equal. Note that this will only 248 * work for the subset of instructions defined by instr_can_rewrite(). Also, 249 * it should only return "true" for instructions that hash_instr() will return 250 * the same hash for (ignoring collisions, of course). 251 */ 252 253 static bool 254 nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) 255 { 256 if (instr1->type != instr2->type) 257 return false; 258 259 switch (instr1->type) { 260 case nir_instr_type_alu: { 261 nir_alu_instr *alu1 = nir_instr_as_alu(instr1); 262 nir_alu_instr *alu2 = nir_instr_as_alu(instr2); 263 264 if (alu1->op != alu2->op) 265 return false; 266 267 /* TODO: We can probably acutally do something more inteligent such 268 * as allowing different numbers and taking a maximum or something 269 * here */ 270 if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components) 271 return false; 272 273 if (alu1->dest.dest.ssa.bit_size != alu2->dest.dest.ssa.bit_size) 274 return false; 275 276 /* We explicitly don't hash instr->dest.dest.exact */ 277 278 if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { 279 assert(nir_op_infos[alu1->op].num_inputs == 2); 280 return (nir_alu_srcs_equal(alu1, alu2, 0, 0) && 281 nir_alu_srcs_equal(alu1, alu2, 1, 1)) || 282 (nir_alu_srcs_equal(alu1, alu2, 0, 1) && 283 nir_alu_srcs_equal(alu1, alu2, 1, 0)); 284 } else { 285 for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { 286 if (!nir_alu_srcs_equal(alu1, alu2, i, i)) 287 return false; 288 } 289 } 290 return true; 291 } 292 case nir_instr_type_tex: { 293 nir_tex_instr *tex1 = nir_instr_as_tex(instr1); 294 nir_tex_instr *tex2 = nir_instr_as_tex(instr2); 295 296 if (tex1->op != tex2->op) 297 return false; 298 299 if (tex1->num_srcs != tex2->num_srcs) 300 return false; 301 for (unsigned i = 0; i < tex1->num_srcs; i++) { 302 if (tex1->src[i].src_type != tex2->src[i].src_type || 303 !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) { 304 return false; 305 } 306 } 307 308 if (tex1->coord_components != tex2->coord_components || 309 tex1->sampler_dim != tex2->sampler_dim || 310 tex1->is_array != tex2->is_array || 311 tex1->is_shadow != tex2->is_shadow || 312 tex1->is_new_style_shadow != tex2->is_new_style_shadow || 313 tex1->component != tex2->component || 314 tex1->texture_index != tex2->texture_index || 315 tex1->texture_array_size != tex2->texture_array_size || 316 tex1->sampler_index != tex2->sampler_index) { 317 return false; 318 } 319 320 /* Don't support un-lowered sampler derefs currently. */ 321 assert(!tex1->texture && !tex1->sampler && 322 !tex2->texture && !tex2->sampler); 323 324 return true; 325 } 326 case nir_instr_type_load_const: { 327 nir_load_const_instr *load1 = nir_instr_as_load_const(instr1); 328 nir_load_const_instr *load2 = nir_instr_as_load_const(instr2); 329 330 if (load1->def.num_components != load2->def.num_components) 331 return false; 332 333 if (load1->def.bit_size != load2->def.bit_size) 334 return false; 335 336 return memcmp(load1->value.f32, load2->value.f32, 337 load1->def.num_components * (load1->def.bit_size / 8u)) == 0; 338 } 339 case nir_instr_type_phi: { 340 nir_phi_instr *phi1 = nir_instr_as_phi(instr1); 341 nir_phi_instr *phi2 = nir_instr_as_phi(instr2); 342 343 if (phi1->instr.block != phi2->instr.block) 344 return false; 345 346 nir_foreach_phi_src(src1, phi1) { 347 nir_foreach_phi_src(src2, phi2) { 348 if (src1->pred == src2->pred) { 349 if (!nir_srcs_equal(src1->src, src2->src)) 350 return false; 351 352 break; 353 } 354 } 355 } 356 357 return true; 358 } 359 case nir_instr_type_intrinsic: { 360 nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1); 361 nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2); 362 const nir_intrinsic_info *info = 363 &nir_intrinsic_infos[intrinsic1->intrinsic]; 364 365 if (intrinsic1->intrinsic != intrinsic2->intrinsic || 366 intrinsic1->num_components != intrinsic2->num_components) 367 return false; 368 369 if (info->has_dest && intrinsic1->dest.ssa.num_components != 370 intrinsic2->dest.ssa.num_components) 371 return false; 372 373 if (info->has_dest && intrinsic1->dest.ssa.bit_size != 374 intrinsic2->dest.ssa.bit_size) 375 return false; 376 377 for (unsigned i = 0; i < info->num_srcs; i++) { 378 if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i])) 379 return false; 380 } 381 382 assert(info->num_variables == 0); 383 384 for (unsigned i = 0; i < info->num_indices; i++) { 385 if (intrinsic1->const_index[i] != intrinsic2->const_index[i]) 386 return false; 387 } 388 389 return true; 390 } 391 case nir_instr_type_call: 392 case nir_instr_type_jump: 393 case nir_instr_type_ssa_undef: 394 case nir_instr_type_parallel_copy: 395 default: 396 unreachable("Invalid instruction type"); 397 } 398 399 return false; 400 } 401 402 static bool 403 src_is_ssa(nir_src *src, void *data) 404 { 405 (void) data; 406 return src->is_ssa; 407 } 408 409 static bool 410 dest_is_ssa(nir_dest *dest, void *data) 411 { 412 (void) data; 413 return dest->is_ssa; 414 } 415 416 /* This function determines if uses of an instruction can safely be rewritten 417 * to use another identical instruction instead. Note that this function must 418 * be kept in sync with hash_instr() and nir_instrs_equal() -- only 419 * instructions that pass this test will be handed on to those functions, and 420 * conversely they must handle everything that this function returns true for. 421 */ 422 423 static bool 424 instr_can_rewrite(nir_instr *instr) 425 { 426 /* We only handle SSA. */ 427 if (!nir_foreach_dest(instr, dest_is_ssa, NULL) || 428 !nir_foreach_src(instr, src_is_ssa, NULL)) 429 return false; 430 431 switch (instr->type) { 432 case nir_instr_type_alu: 433 case nir_instr_type_load_const: 434 case nir_instr_type_phi: 435 return true; 436 case nir_instr_type_tex: { 437 nir_tex_instr *tex = nir_instr_as_tex(instr); 438 439 /* Don't support un-lowered sampler derefs currently. */ 440 if (tex->texture || tex->sampler) 441 return false; 442 443 return true; 444 } 445 case nir_instr_type_intrinsic: { 446 const nir_intrinsic_info *info = 447 &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; 448 return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && 449 (info->flags & NIR_INTRINSIC_CAN_REORDER) && 450 info->num_variables == 0; /* not implemented yet */ 451 } 452 case nir_instr_type_call: 453 case nir_instr_type_jump: 454 case nir_instr_type_ssa_undef: 455 return false; 456 case nir_instr_type_parallel_copy: 457 default: 458 unreachable("Invalid instruction type"); 459 } 460 461 return false; 462 } 463 464 static nir_ssa_def * 465 nir_instr_get_dest_ssa_def(nir_instr *instr) 466 { 467 switch (instr->type) { 468 case nir_instr_type_alu: 469 assert(nir_instr_as_alu(instr)->dest.dest.is_ssa); 470 return &nir_instr_as_alu(instr)->dest.dest.ssa; 471 case nir_instr_type_load_const: 472 return &nir_instr_as_load_const(instr)->def; 473 case nir_instr_type_phi: 474 assert(nir_instr_as_phi(instr)->dest.is_ssa); 475 return &nir_instr_as_phi(instr)->dest.ssa; 476 case nir_instr_type_intrinsic: 477 assert(nir_instr_as_intrinsic(instr)->dest.is_ssa); 478 return &nir_instr_as_intrinsic(instr)->dest.ssa; 479 case nir_instr_type_tex: 480 assert(nir_instr_as_tex(instr)->dest.is_ssa); 481 return &nir_instr_as_tex(instr)->dest.ssa; 482 default: 483 unreachable("We never ask for any of these"); 484 } 485 } 486 487 static bool 488 cmp_func(const void *data1, const void *data2) 489 { 490 return nir_instrs_equal(data1, data2); 491 } 492 493 struct set * 494 nir_instr_set_create(void *mem_ctx) 495 { 496 return _mesa_set_create(mem_ctx, hash_instr, cmp_func); 497 } 498 499 void 500 nir_instr_set_destroy(struct set *instr_set) 501 { 502 _mesa_set_destroy(instr_set, NULL); 503 } 504 505 bool 506 nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr) 507 { 508 if (!instr_can_rewrite(instr)) 509 return false; 510 511 struct set_entry *entry = _mesa_set_search(instr_set, instr); 512 if (entry) { 513 nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr); 514 nir_instr *match = (nir_instr *) entry->key; 515 nir_ssa_def *new_def = nir_instr_get_dest_ssa_def(match); 516 517 /* It's safe to replace an exact instruction with an inexact one as 518 * long as we make it exact. If we got here, the two instructions are 519 * exactly identical in every other way so, once we've set the exact 520 * bit, they are the same. 521 */ 522 if (instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->exact) 523 nir_instr_as_alu(match)->exact = true; 524 525 nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def)); 526 return true; 527 } 528 529 _mesa_set_add(instr_set, instr); 530 return false; 531 } 532 533 void 534 nir_instr_set_remove(struct set *instr_set, nir_instr *instr) 535 { 536 if (!instr_can_rewrite(instr)) 537 return; 538 539 struct set_entry *entry = _mesa_set_search(instr_set, instr); 540 if (entry) 541 _mesa_set_remove(instr_set, entry); 542 } 543 544