1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #define VT_DEBUG 0 28 29 #if VT_DEBUG 30 #define VT_DUMP(q) do { q } while (0) 31 #else 32 #define VT_DUMP(q) 33 #endif 34 35 #include <cstring> 36 37 #include "sb_shader.h" 38 #include "sb_pass.h" 39 40 namespace r600_sb { 41 42 static const char * chans = "xyzw01?_"; 43 44 sb_ostream& operator << (sb_ostream &o, value &v) { 45 46 bool dead = v.flags & VLF_DEAD; 47 48 if (dead) 49 o << "{"; 50 51 switch (v.kind) { 52 case VLK_SPECIAL_REG: { 53 switch (v.select.sel()) { 54 case SV_AR_INDEX: o << "AR"; break; 55 case SV_ALU_PRED: o << "PR"; break; 56 case SV_EXEC_MASK: o << "EM"; break; 57 case SV_VALID_MASK: o << "VM"; break; 58 case SV_GEOMETRY_EMIT: o << "GEOMETRY_EMIT"; break; 59 case SV_LDS_RW: o << "LDS_RW"; break; 60 case SV_LDS_OQA: o << "LDS_OQA"; break; 61 case SV_LDS_OQB: o << "LDS_OQB"; break; 62 default: o << "???specialreg"; break; 63 } 64 break; 65 } 66 67 case VLK_REG: 68 o << "R" << v.select.sel() << "." 69 << chans[v.select.chan()]; 70 71 break; 72 case VLK_KCACHE: { 73 o << "C" << v.select.sel() << "." << chans[v.select.chan()]; 74 } 75 break; 76 case VLK_CONST: 77 o << v.literal_value.f << "|"; 78 o.print_zw_hex(v.literal_value.u, 8); 79 break; 80 case VLK_PARAM: 81 o << "Param" << (v.select.sel() - ALU_SRC_PARAM_OFFSET) 82 << chans[v.select.chan()]; 83 break; 84 case VLK_TEMP: 85 o << "t" << v.select.sel() - shader::temp_regid_offset; 86 break; 87 case VLK_REL_REG: 88 89 o << "A" << v.select; 90 o << "["; 91 o << *v.rel; 92 o << "]"; 93 94 o << "_" << v.uid; 95 96 break; 97 case VLK_UNDEF: 98 o << "undef"; 99 break; 100 default: 101 o << v.kind << "?????"; 102 break; 103 } 104 105 if (v.version) 106 o << "." << v.version; 107 108 if (dead) 109 o << "}"; 110 111 if (v.is_global()) 112 o << "||"; 113 if (v.is_fixed()) 114 o << "F"; 115 if (v.is_prealloc()) 116 o << "P"; 117 118 sel_chan g; 119 120 if (v.is_rel()) { 121 g = v.array->gpr; 122 } else { 123 g = v.gpr; 124 } 125 126 if (g) { 127 o << "@R" << g.sel() << "." << chans[g.chan()]; 128 } 129 130 return o; 131 } 132 133 void value_table::add_value(value* v) { 134 135 if (v->gvn_source) { 136 return; 137 } 138 139 VT_DUMP( 140 sblog << "gvn add_value "; 141 dump::dump_val(v); 142 ); 143 144 value_hash hash = v->hash(); 145 vt_item & vti = hashtable[hash & size_mask]; 146 vti.push_back(v); 147 ++cnt; 148 149 if (v->def && ex.try_fold(v)) { 150 VT_DUMP( 151 sblog << " folded: "; 152 dump::dump_val(v->gvn_source); 153 sblog << "\n"; 154 ); 155 return; 156 } 157 158 int n = 0; 159 for (vt_item::iterator I = vti.begin(), E = vti.end(); I != E; ++I, ++n) { 160 value *c = *I; 161 162 if (c == v) 163 break; 164 165 if (expr_equal(c, v)) { 166 v->gvn_source = c->gvn_source; 167 168 VT_DUMP( 169 sblog << " found : equal to "; 170 dump::dump_val(v->gvn_source); 171 sblog << "\n"; 172 ); 173 return; 174 } 175 } 176 177 v->gvn_source = v; 178 VT_DUMP( 179 sblog << " added new\n"; 180 ); 181 } 182 183 value_hash value::hash() { 184 if (ghash) 185 return ghash; 186 if (is_rel()) 187 ghash = rel_hash(); 188 else if (def) 189 ghash = def->hash(); 190 else 191 ghash = ((uintptr_t)this) | 1; 192 193 return ghash; 194 } 195 196 value_hash value::rel_hash() { 197 value_hash h = rel ? rel->hash() : 0; 198 h |= select << 10; 199 h |= array->hash(); 200 return h; 201 } 202 203 bool value_table::expr_equal(value* l, value* r) { 204 return ex.equal(l, r); 205 } 206 207 void value_table::get_values(vvec& v) { 208 v.resize(cnt); 209 210 vvec::iterator T = v.begin(); 211 212 for(vt_table::iterator I = hashtable.begin(), E = hashtable.end(); 213 I != E; ++I) { 214 T = std::copy(I->begin(), I->end(), T); 215 } 216 } 217 218 void value::add_use(node* n) { 219 if (0) { 220 sblog << "add_use "; 221 dump::dump_val(this); 222 sblog << " => "; 223 dump::dump_op(n); 224 } 225 uses.push_back(n); 226 } 227 228 struct use_node_comp { 229 explicit use_node_comp(const node *n) : n(n) {} 230 bool operator() (const node *o) { 231 return o->hash() == n->hash(); 232 } 233 234 private: 235 const node *n; 236 }; 237 238 void value::remove_use(const node *n) { 239 uselist::iterator it = 240 std::find_if(uses.begin(), uses.end(), use_node_comp(n)); 241 242 if (it != uses.end()) 243 { 244 // We only ever had a pointer, so don't delete it here 245 uses.erase(it); 246 } 247 } 248 249 unsigned value::use_count() { 250 return uses.size(); 251 } 252 253 bool value::is_global() { 254 if (chunk) 255 return chunk->is_global(); 256 return flags & VLF_GLOBAL; 257 } 258 259 void value::set_global() { 260 assert(is_sgpr()); 261 flags |= VLF_GLOBAL; 262 if (chunk) 263 chunk->set_global(); 264 } 265 266 void value::set_prealloc() { 267 assert(is_sgpr()); 268 flags |= VLF_PREALLOC; 269 if (chunk) 270 chunk->set_prealloc(); 271 } 272 273 bool value::is_fixed() { 274 if (array && array->gpr) 275 return true; 276 if (chunk && chunk->is_fixed()) 277 return true; 278 return flags & VLF_FIXED; 279 } 280 281 void value::fix() { 282 if (chunk) 283 chunk->fix(); 284 flags |= VLF_FIXED; 285 } 286 287 bool value::is_prealloc() { 288 if (chunk) 289 return chunk->is_prealloc(); 290 return flags & VLF_PREALLOC; 291 } 292 293 void value::delete_uses() { 294 // We only ever had pointers, so don't delete them here 295 uses.erase(uses.begin(), uses.end()); 296 } 297 298 void ra_constraint::update_values() { 299 for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) { 300 assert(!(*I)->constraint); 301 (*I)->constraint = this; 302 } 303 } 304 305 void* sb_pool::allocate(unsigned sz) { 306 sz = (sz + SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1); 307 assert (sz < (block_size >> 6) && "too big allocation size for sb_pool"); 308 309 unsigned offset = total_size % block_size; 310 unsigned capacity = block_size * blocks.size(); 311 312 if (total_size + sz > capacity) { 313 total_size = capacity; 314 void * nb = malloc(block_size); 315 blocks.push_back(nb); 316 offset = 0; 317 } 318 319 total_size += sz; 320 return ((char*)blocks.back() + offset); 321 } 322 323 void sb_pool::free_all() { 324 for (block_vector::iterator I = blocks.begin(), E = blocks.end(); I != E; 325 ++I) { 326 free(*I); 327 } 328 } 329 330 value* sb_value_pool::create(value_kind k, sel_chan regid, 331 unsigned ver) { 332 void* np = allocate(aligned_elt_size); 333 value *v = new (np) value(size(), k, regid, ver); 334 return v; 335 } 336 337 void sb_value_pool::delete_all() { 338 unsigned bcnt = blocks.size(); 339 unsigned toffset = 0; 340 for (unsigned b = 0; b < bcnt; ++b) { 341 char *bstart = (char*)blocks[b]; 342 for (unsigned offset = 0; offset < block_size; 343 offset += aligned_elt_size) { 344 ((value*)(bstart + offset))->~value(); 345 toffset += aligned_elt_size; 346 if (toffset >= total_size) 347 return; 348 } 349 } 350 } 351 352 bool sb_bitset::get(unsigned id) { 353 assert(id < bit_size); 354 unsigned w = id / bt_bits; 355 unsigned b = id % bt_bits; 356 return (data[w] >> b) & 1; 357 } 358 359 void sb_bitset::set(unsigned id, bool bit) { 360 assert(id < bit_size); 361 unsigned w = id / bt_bits; 362 unsigned b = id % bt_bits; 363 if (w >= data.size()) 364 data.resize(w + 1); 365 366 if (bit) 367 data[w] |= (1 << b); 368 else 369 data[w] &= ~(1 << b); 370 } 371 372 inline bool sb_bitset::set_chk(unsigned id, bool bit) { 373 assert(id < bit_size); 374 unsigned w = id / bt_bits; 375 unsigned b = id % bt_bits; 376 basetype d = data[w]; 377 basetype dn = (d & ~(1 << b)) | (bit << b); 378 bool r = (d != dn); 379 data[w] = r ? dn : data[w]; 380 return r; 381 } 382 383 void sb_bitset::clear() { 384 std::fill(data.begin(), data.end(), 0); 385 } 386 387 void sb_bitset::resize(unsigned size) { 388 unsigned cur_data_size = data.size(); 389 unsigned new_data_size = (size + bt_bits - 1) / bt_bits; 390 391 392 if (new_data_size != cur_data_size) 393 data.resize(new_data_size); 394 395 // make sure that new bits in the existing word are cleared 396 if (cur_data_size && size > bit_size && bit_size % bt_bits) { 397 basetype clear_mask = (~(basetype)0u) << (bit_size % bt_bits); 398 data[cur_data_size - 1] &= ~clear_mask; 399 } 400 401 bit_size = size; 402 } 403 404 unsigned sb_bitset::find_bit(unsigned start) { 405 assert(start < bit_size); 406 unsigned w = start / bt_bits; 407 unsigned b = start % bt_bits; 408 unsigned sz = data.size(); 409 410 while (w < sz) { 411 basetype d = data[w] >> b; 412 if (d != 0) { 413 unsigned pos = __builtin_ctz(d) + b + w * bt_bits; 414 return pos; 415 } 416 417 b = 0; 418 ++w; 419 } 420 421 return bit_size; 422 } 423 424 sb_value_set::iterator::iterator(shader& sh, sb_value_set* s, unsigned nb) 425 : vp(sh.get_value_pool()), s(s), nb(nb) {} 426 427 bool sb_value_set::add_set_checked(sb_value_set& s2) { 428 if (bs.size() < s2.bs.size()) 429 bs.resize(s2.bs.size()); 430 sb_bitset nbs = bs | s2.bs; 431 if (bs != nbs) { 432 bs.swap(nbs); 433 return true; 434 } 435 return false; 436 } 437 438 void r600_sb::sb_value_set::remove_set(sb_value_set& s2) { 439 bs.mask(s2.bs); 440 } 441 442 bool sb_value_set::add_val(value* v) { 443 assert(v); 444 if (bs.size() < v->uid) 445 bs.resize(v->uid + 32); 446 447 return bs.set_chk(v->uid - 1, 1); 448 } 449 450 bool sb_value_set::remove_vec(vvec& vv) { 451 bool modified = false; 452 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { 453 if (*I) 454 modified |= remove_val(*I); 455 } 456 return modified; 457 } 458 459 void sb_value_set::clear() { 460 bs.clear(); 461 } 462 463 bool sb_value_set::remove_val(value* v) { 464 assert(v); 465 if (bs.size() < v->uid) 466 return false; 467 return bs.set_chk(v->uid - 1, 0); 468 } 469 470 bool r600_sb::sb_value_set::add_vec(vvec& vv) { 471 bool modified = false; 472 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { 473 value *v = *I; 474 if (v) 475 modified |= add_val(v); 476 } 477 return modified; 478 } 479 480 bool r600_sb::sb_value_set::contains(value* v) { 481 unsigned b = v->uid - 1; 482 if (b < bs.size()) 483 return bs.get(b); 484 else 485 return false; 486 } 487 488 bool sb_value_set::empty() { 489 return bs.size() == 0 || bs.find_bit(0) == bs.size(); 490 } 491 492 void sb_bitset::swap(sb_bitset& bs2) { 493 std::swap(data, bs2.data); 494 std::swap(bit_size, bs2.bit_size); 495 } 496 497 bool sb_bitset::operator ==(const sb_bitset& bs2) { 498 if (bit_size != bs2.bit_size) 499 return false; 500 501 for (unsigned i = 0, c = data.size(); i < c; ++i) { 502 if (data[i] != bs2.data[i]) 503 return false; 504 } 505 return true; 506 } 507 508 sb_bitset& sb_bitset::operator &=(const sb_bitset& bs2) { 509 if (bit_size > bs2.bit_size) { 510 resize(bs2.bit_size); 511 } 512 513 for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c; 514 ++i) { 515 data[i] &= bs2.data[i]; 516 } 517 return *this; 518 } 519 520 sb_bitset& sb_bitset::mask(const sb_bitset& bs2) { 521 if (bit_size < bs2.bit_size) { 522 resize(bs2.bit_size); 523 } 524 525 for (unsigned i = 0, c = data.size(); i < c; 526 ++i) { 527 data[i] &= ~bs2.data[i]; 528 } 529 return *this; 530 } 531 532 bool ra_constraint::check() { 533 assert(kind == CK_SAME_REG); 534 535 unsigned reg = 0; 536 537 for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) { 538 value *v = *I; 539 if (!v) 540 continue; 541 542 if (!v->gpr) 543 return false; 544 545 if (reg == 0) 546 reg = v->gpr.sel() + 1; 547 else if (reg != v->gpr.sel() + 1) 548 return false; 549 550 if (v->is_chan_pinned()) { 551 if (v->pin_gpr.chan() != v->gpr.chan()) 552 return false; 553 } 554 } 555 return true; 556 } 557 558 bool gpr_array::is_dead() { 559 return false; 560 } 561 562 } // namespace r600_sb 563