1 /* 2 * Copyright (C) 2009 Nicolai Haehnle. 3 * Copyright 2010 Tom Stellard <tstellar (at) gmail.com> 4 * 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial 17 * portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 */ 28 29 #include "radeon_dataflow.h" 30 31 #include "radeon_compiler.h" 32 #include "radeon_compiler_util.h" 33 #include "radeon_list.h" 34 #include "radeon_swizzle.h" 35 #include "radeon_variable.h" 36 37 struct src_clobbered_reads_cb_data { 38 rc_register_file File; 39 unsigned int Index; 40 unsigned int Mask; 41 struct rc_reader_data * ReaderData; 42 }; 43 44 typedef void (*rc_presub_replace_fn)(struct rc_instruction *, 45 struct rc_instruction *, 46 unsigned int); 47 48 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) 49 { 50 struct rc_src_register combine; 51 combine.File = inner.File; 52 combine.Index = inner.Index; 53 combine.RelAddr = inner.RelAddr; 54 if (outer.Abs) { 55 combine.Abs = 1; 56 combine.Negate = outer.Negate; 57 } else { 58 combine.Abs = inner.Abs; 59 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); 60 combine.Negate ^= outer.Negate; 61 } 62 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); 63 return combine; 64 } 65 66 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, 67 struct rc_src_register * src) 68 { 69 rc_register_file file = src->File; 70 struct rc_reader_data * reader_data = data; 71 72 if(!rc_inst_can_use_presub(inst, 73 reader_data->Writer->U.I.PreSub.Opcode, 74 rc_swizzle_to_writemask(src->Swizzle), 75 src, 76 &reader_data->Writer->U.I.PreSub.SrcReg[0], 77 &reader_data->Writer->U.I.PreSub.SrcReg[1])) { 78 reader_data->Abort = 1; 79 return; 80 } 81 82 /* XXX This could probably be handled better. */ 83 if (file == RC_FILE_ADDRESS) { 84 reader_data->Abort = 1; 85 return; 86 } 87 88 /* These instructions cannot read from the constants file. 89 * see radeonTransformTEX() 90 */ 91 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && 92 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && 93 (inst->U.I.Opcode == RC_OPCODE_TEX || 94 inst->U.I.Opcode == RC_OPCODE_TXB || 95 inst->U.I.Opcode == RC_OPCODE_TXP || 96 inst->U.I.Opcode == RC_OPCODE_TXD || 97 inst->U.I.Opcode == RC_OPCODE_TXL || 98 inst->U.I.Opcode == RC_OPCODE_KIL)){ 99 reader_data->Abort = 1; 100 return; 101 } 102 } 103 104 static void src_clobbered_reads_cb( 105 void * data, 106 struct rc_instruction * inst, 107 struct rc_src_register * src) 108 { 109 struct src_clobbered_reads_cb_data * sc_data = data; 110 111 if (src->File == sc_data->File 112 && src->Index == sc_data->Index 113 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { 114 115 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 116 } 117 118 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { 119 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 120 } 121 } 122 123 static void is_src_clobbered_scan_write( 124 void * data, 125 struct rc_instruction * inst, 126 rc_register_file file, 127 unsigned int index, 128 unsigned int mask) 129 { 130 struct src_clobbered_reads_cb_data sc_data; 131 struct rc_reader_data * reader_data = data; 132 sc_data.File = file; 133 sc_data.Index = index; 134 sc_data.Mask = mask; 135 sc_data.ReaderData = reader_data; 136 rc_for_all_reads_src(reader_data->Writer, 137 src_clobbered_reads_cb, &sc_data); 138 } 139 140 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) 141 { 142 struct rc_reader_data reader_data; 143 unsigned int i; 144 145 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || 146 inst_mov->U.I.WriteALUResult || 147 inst_mov->U.I.SaturateMode) 148 return; 149 150 /* Get a list of all the readers of this MOV instruction. */ 151 reader_data.ExitOnAbort = 1; 152 rc_get_readers(c, inst_mov, &reader_data, 153 copy_propagate_scan_read, NULL, 154 is_src_clobbered_scan_write); 155 156 if (reader_data.Abort || reader_data.ReaderCount == 0) 157 return; 158 159 /* Propagate the MOV instruction. */ 160 for (i = 0; i < reader_data.ReaderCount; i++) { 161 struct rc_instruction * inst = reader_data.Readers[i].Inst; 162 *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); 163 164 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) 165 inst->U.I.PreSub = inst_mov->U.I.PreSub; 166 } 167 168 /* Finally, remove the original MOV instruction */ 169 rc_remove_instruction(inst_mov); 170 } 171 172 /** 173 * Check if a source register is actually always the same 174 * swizzle constant. 175 */ 176 static int is_src_uniform_constant(struct rc_src_register src, 177 rc_swizzle * pswz, unsigned int * pnegate) 178 { 179 int have_used = 0; 180 181 if (src.File != RC_FILE_NONE) { 182 *pswz = 0; 183 return 0; 184 } 185 186 for(unsigned int chan = 0; chan < 4; ++chan) { 187 unsigned int swz = GET_SWZ(src.Swizzle, chan); 188 if (swz < 4) { 189 *pswz = 0; 190 return 0; 191 } 192 if (swz == RC_SWIZZLE_UNUSED) 193 continue; 194 195 if (!have_used) { 196 *pswz = swz; 197 *pnegate = GET_BIT(src.Negate, chan); 198 have_used = 1; 199 } else { 200 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { 201 *pswz = 0; 202 return 0; 203 } 204 } 205 } 206 207 return 1; 208 } 209 210 static void constant_folding_mad(struct rc_instruction * inst) 211 { 212 rc_swizzle swz = 0; 213 unsigned int negate= 0; 214 215 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { 216 if (swz == RC_SWIZZLE_ZERO) { 217 inst->U.I.Opcode = RC_OPCODE_MUL; 218 return; 219 } 220 } 221 222 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 223 if (swz == RC_SWIZZLE_ONE) { 224 inst->U.I.Opcode = RC_OPCODE_ADD; 225 if (negate) 226 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 227 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; 228 return; 229 } else if (swz == RC_SWIZZLE_ZERO) { 230 inst->U.I.Opcode = RC_OPCODE_MOV; 231 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 232 return; 233 } 234 } 235 236 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 237 if (swz == RC_SWIZZLE_ONE) { 238 inst->U.I.Opcode = RC_OPCODE_ADD; 239 if (negate) 240 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; 241 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 242 return; 243 } else if (swz == RC_SWIZZLE_ZERO) { 244 inst->U.I.Opcode = RC_OPCODE_MOV; 245 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 246 return; 247 } 248 } 249 } 250 251 static void constant_folding_mul(struct rc_instruction * inst) 252 { 253 rc_swizzle swz = 0; 254 unsigned int negate = 0; 255 256 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 257 if (swz == RC_SWIZZLE_ONE) { 258 inst->U.I.Opcode = RC_OPCODE_MOV; 259 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 260 if (negate) 261 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 262 return; 263 } else if (swz == RC_SWIZZLE_ZERO) { 264 inst->U.I.Opcode = RC_OPCODE_MOV; 265 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 266 return; 267 } 268 } 269 270 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 271 if (swz == RC_SWIZZLE_ONE) { 272 inst->U.I.Opcode = RC_OPCODE_MOV; 273 if (negate) 274 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 275 return; 276 } else if (swz == RC_SWIZZLE_ZERO) { 277 inst->U.I.Opcode = RC_OPCODE_MOV; 278 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 279 return; 280 } 281 } 282 } 283 284 static void constant_folding_add(struct rc_instruction * inst) 285 { 286 rc_swizzle swz = 0; 287 unsigned int negate = 0; 288 289 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 290 if (swz == RC_SWIZZLE_ZERO) { 291 inst->U.I.Opcode = RC_OPCODE_MOV; 292 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 293 return; 294 } 295 } 296 297 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 298 if (swz == RC_SWIZZLE_ZERO) { 299 inst->U.I.Opcode = RC_OPCODE_MOV; 300 return; 301 } 302 } 303 } 304 305 /** 306 * Replace 0.0, 1.0 and 0.5 immediate constants by their 307 * respective swizzles. Simplify instructions like ADD dst, src, 0; 308 */ 309 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) 310 { 311 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 312 unsigned int i; 313 314 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ 315 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { 316 struct rc_constant * constant; 317 struct rc_src_register newsrc; 318 int have_real_reference; 319 unsigned int chan; 320 321 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ 322 for (chan = 0; chan < 4; ++chan) 323 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) 324 break; 325 if (chan == 4) { 326 inst->U.I.SrcReg[src].File = RC_FILE_NONE; 327 continue; 328 } 329 330 /* Convert immediates to swizzles. */ 331 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || 332 inst->U.I.SrcReg[src].RelAddr || 333 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) 334 continue; 335 336 constant = 337 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; 338 339 if (constant->Type != RC_CONSTANT_IMMEDIATE) 340 continue; 341 342 newsrc = inst->U.I.SrcReg[src]; 343 have_real_reference = 0; 344 for (chan = 0; chan < 4; ++chan) { 345 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); 346 unsigned int newswz; 347 float imm; 348 float baseimm; 349 350 if (swz >= 4) 351 continue; 352 353 imm = constant->u.Immediate[swz]; 354 baseimm = imm; 355 if (imm < 0.0) 356 baseimm = -baseimm; 357 358 if (baseimm == 0.0) { 359 newswz = RC_SWIZZLE_ZERO; 360 } else if (baseimm == 1.0) { 361 newswz = RC_SWIZZLE_ONE; 362 } else if (baseimm == 0.5 && c->has_half_swizzles) { 363 newswz = RC_SWIZZLE_HALF; 364 } else { 365 have_real_reference = 1; 366 continue; 367 } 368 369 SET_SWZ(newsrc.Swizzle, chan, newswz); 370 if (imm < 0.0 && !newsrc.Abs) 371 newsrc.Negate ^= 1 << chan; 372 } 373 374 if (!have_real_reference) { 375 newsrc.File = RC_FILE_NONE; 376 newsrc.Index = 0; 377 } 378 379 /* don't make the swizzle worse */ 380 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && 381 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) 382 continue; 383 384 inst->U.I.SrcReg[src] = newsrc; 385 } 386 387 /* Simplify instructions based on constants */ 388 if (inst->U.I.Opcode == RC_OPCODE_MAD) 389 constant_folding_mad(inst); 390 391 /* note: MAD can simplify to MUL or ADD */ 392 if (inst->U.I.Opcode == RC_OPCODE_MUL) 393 constant_folding_mul(inst); 394 else if (inst->U.I.Opcode == RC_OPCODE_ADD) 395 constant_folding_add(inst); 396 397 /* In case this instruction has been converted, make sure all of the 398 * registers that are no longer used are empty. */ 399 opcode = rc_get_opcode_info(inst->U.I.Opcode); 400 for(i = opcode->NumSrcRegs; i < 3; i++) { 401 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); 402 } 403 } 404 405 /** 406 * If src and dst use the same register, this function returns a writemask that 407 * indicates wich components are read by src. Otherwise zero is returned. 408 */ 409 static unsigned int src_reads_dst_mask(struct rc_src_register src, 410 struct rc_dst_register dst) 411 { 412 if (dst.File != src.File || dst.Index != src.Index) { 413 return 0; 414 } 415 return rc_swizzle_to_writemask(src.Swizzle); 416 } 417 418 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) 419 * in any of its channels. Return 0 otherwise. */ 420 static int src_has_const_swz(struct rc_src_register src) { 421 int chan; 422 for(chan = 0; chan < 4; chan++) { 423 unsigned int swz = GET_SWZ(src.Swizzle, chan); 424 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF 425 || swz == RC_SWIZZLE_ONE) { 426 return 1; 427 } 428 } 429 return 0; 430 } 431 432 static void presub_scan_read( 433 void * data, 434 struct rc_instruction * inst, 435 struct rc_src_register * src) 436 { 437 struct rc_reader_data * reader_data = data; 438 rc_presubtract_op * presub_opcode = reader_data->CbData; 439 440 if (!rc_inst_can_use_presub(inst, *presub_opcode, 441 reader_data->Writer->U.I.DstReg.WriteMask, 442 src, 443 &reader_data->Writer->U.I.SrcReg[0], 444 &reader_data->Writer->U.I.SrcReg[1])) { 445 reader_data->Abort = 1; 446 return; 447 } 448 } 449 450 static int presub_helper( 451 struct radeon_compiler * c, 452 struct rc_instruction * inst_add, 453 rc_presubtract_op presub_opcode, 454 rc_presub_replace_fn presub_replace) 455 { 456 struct rc_reader_data reader_data; 457 unsigned int i; 458 rc_presubtract_op cb_op = presub_opcode; 459 460 reader_data.CbData = &cb_op; 461 reader_data.ExitOnAbort = 1; 462 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, 463 is_src_clobbered_scan_write); 464 465 if (reader_data.Abort || reader_data.ReaderCount == 0) 466 return 0; 467 468 for(i = 0; i < reader_data.ReaderCount; i++) { 469 unsigned int src_index; 470 struct rc_reader reader = reader_data.Readers[i]; 471 const struct rc_opcode_info * info = 472 rc_get_opcode_info(reader.Inst->U.I.Opcode); 473 474 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { 475 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) 476 presub_replace(inst_add, reader.Inst, src_index); 477 } 478 } 479 return 1; 480 } 481 482 /* This function assumes that inst_add->U.I.SrcReg[0] and 483 * inst_add->U.I.SrcReg[1] aren't both negative. */ 484 static void presub_replace_add( 485 struct rc_instruction * inst_add, 486 struct rc_instruction * inst_reader, 487 unsigned int src_index) 488 { 489 rc_presubtract_op presub_opcode; 490 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) 491 presub_opcode = RC_PRESUB_SUB; 492 else 493 presub_opcode = RC_PRESUB_ADD; 494 495 if (inst_add->U.I.SrcReg[1].Negate) { 496 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 497 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; 498 } else { 499 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; 500 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; 501 } 502 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 503 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; 504 inst_reader->U.I.PreSub.Opcode = presub_opcode; 505 inst_reader->U.I.SrcReg[src_index] = 506 chain_srcregs(inst_reader->U.I.SrcReg[src_index], 507 inst_reader->U.I.PreSub.SrcReg[0]); 508 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 509 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; 510 } 511 512 static int is_presub_candidate( 513 struct radeon_compiler * c, 514 struct rc_instruction * inst) 515 { 516 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); 517 unsigned int i; 518 unsigned int is_constant[2] = {0, 0}; 519 520 assert(inst->U.I.Opcode == RC_OPCODE_ADD); 521 522 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE 523 || inst->U.I.SaturateMode 524 || inst->U.I.WriteALUResult 525 || inst->U.I.Omod) { 526 return 0; 527 } 528 529 /* If both sources use a constant swizzle, then we can't convert it to 530 * a presubtract operation. In fact for the ADD and SUB presubtract 531 * operations neither source can contain a constant swizzle. This 532 * specific case is checked in peephole_add_presub_add() when 533 * we make sure the swizzles for both sources are equal, so we 534 * don't need to worry about it here. */ 535 for (i = 0; i < 2; i++) { 536 int chan; 537 for (chan = 0; chan < 4; chan++) { 538 rc_swizzle swz = 539 get_swz(inst->U.I.SrcReg[i].Swizzle, chan); 540 if (swz == RC_SWIZZLE_ONE 541 || swz == RC_SWIZZLE_ZERO 542 || swz == RC_SWIZZLE_HALF) { 543 is_constant[i] = 1; 544 } 545 } 546 } 547 if (is_constant[0] && is_constant[1]) 548 return 0; 549 550 for(i = 0; i < info->NumSrcRegs; i++) { 551 struct rc_src_register src = inst->U.I.SrcReg[i]; 552 if (src_reads_dst_mask(src, inst->U.I.DstReg)) 553 return 0; 554 555 src.File = RC_FILE_PRESUB; 556 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) 557 return 0; 558 } 559 return 1; 560 } 561 562 static int peephole_add_presub_add( 563 struct radeon_compiler * c, 564 struct rc_instruction * inst_add) 565 { 566 unsigned dstmask = inst_add->U.I.DstReg.WriteMask; 567 unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; 568 unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; 569 570 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) 571 return 0; 572 573 /* src0 and src1 can't have absolute values */ 574 if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) 575 return 0; 576 577 /* presub_replace_add() assumes only one is negative */ 578 if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) 579 return 0; 580 581 /* if src0 is negative, at least all bits of dstmask have to be set */ 582 if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) 583 return 0; 584 585 /* if src1 is negative, at least all bits of dstmask have to be set */ 586 if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) 587 return 0; 588 589 if (!is_presub_candidate(c, inst_add)) 590 return 0; 591 592 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { 593 rc_remove_instruction(inst_add); 594 return 1; 595 } 596 return 0; 597 } 598 599 static void presub_replace_inv( 600 struct rc_instruction * inst_add, 601 struct rc_instruction * inst_reader, 602 unsigned int src_index) 603 { 604 /* We must be careful not to modify inst_add, since it 605 * is possible it will remain part of the program.*/ 606 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 607 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 608 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; 609 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], 610 inst_reader->U.I.PreSub.SrcReg[0]); 611 612 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 613 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; 614 } 615 616 /** 617 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] 618 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source 619 * of the add instruction must have the constatnt 1 swizzle. This function 620 * does not check const registers to see if their value is 1.0, so it should 621 * be called after the constant_folding optimization. 622 * @return 623 * 0 if the ADD instruction is still part of the program. 624 * 1 if the ADD instruction is no longer part of the program. 625 */ 626 static int peephole_add_presub_inv( 627 struct radeon_compiler * c, 628 struct rc_instruction * inst_add) 629 { 630 unsigned int i, swz; 631 632 if (!is_presub_candidate(c, inst_add)) 633 return 0; 634 635 /* Check if src0 is 1. */ 636 /* XXX It would be nice to use is_src_uniform_constant here, but that 637 * function only works if the register's file is RC_FILE_NONE */ 638 for(i = 0; i < 4; i++ ) { 639 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); 640 if(((1 << i) & inst_add->U.I.DstReg.WriteMask) 641 && swz != RC_SWIZZLE_ONE) { 642 return 0; 643 } 644 } 645 646 /* Check src1. */ 647 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != 648 inst_add->U.I.DstReg.WriteMask 649 || inst_add->U.I.SrcReg[1].Abs 650 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY 651 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) 652 || src_has_const_swz(inst_add->U.I.SrcReg[1])) { 653 654 return 0; 655 } 656 657 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { 658 rc_remove_instruction(inst_add); 659 return 1; 660 } 661 return 0; 662 } 663 664 struct peephole_mul_cb_data { 665 struct rc_dst_register * Writer; 666 unsigned int Clobbered; 667 }; 668 669 static void omod_filter_reader_cb( 670 void * userdata, 671 struct rc_instruction * inst, 672 rc_register_file file, 673 unsigned int index, 674 unsigned int mask) 675 { 676 struct peephole_mul_cb_data * d = userdata; 677 if (rc_src_reads_dst_mask(file, mask, index, 678 d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) { 679 680 d->Clobbered = 1; 681 } 682 } 683 684 static void omod_filter_writer_cb( 685 void * userdata, 686 struct rc_instruction * inst, 687 rc_register_file file, 688 unsigned int index, 689 unsigned int mask) 690 { 691 struct peephole_mul_cb_data * d = userdata; 692 if (file == d->Writer->File && index == d->Writer->Index && 693 (mask & d->Writer->WriteMask)) { 694 d->Clobbered = 1; 695 } 696 } 697 698 static int peephole_mul_omod( 699 struct radeon_compiler * c, 700 struct rc_instruction * inst_mul, 701 struct rc_list * var_list) 702 { 703 unsigned int chan = 0, swz, i; 704 int const_index = -1; 705 int temp_index = -1; 706 float const_value; 707 rc_omod_op omod_op = RC_OMOD_DISABLE; 708 struct rc_list * writer_list; 709 struct rc_variable * var; 710 struct peephole_mul_cb_data cb_data; 711 712 for (i = 0; i < 2; i++) { 713 unsigned int j; 714 if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT 715 && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) { 716 return 0; 717 } 718 if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { 719 if (temp_index != -1) { 720 /* The instruction has two temp sources */ 721 return 0; 722 } else { 723 temp_index = i; 724 continue; 725 } 726 } 727 /* If we get this far Src[i] must be a constant src */ 728 if (inst_mul->U.I.SrcReg[i].Negate) { 729 return 0; 730 } 731 /* The constant src needs to read from the same swizzle */ 732 swz = RC_SWIZZLE_UNUSED; 733 chan = 0; 734 for (j = 0; j < 4; j++) { 735 unsigned int j_swz = 736 GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); 737 if (j_swz == RC_SWIZZLE_UNUSED) { 738 continue; 739 } 740 if (swz == RC_SWIZZLE_UNUSED) { 741 swz = j_swz; 742 chan = j; 743 } else if (j_swz != swz) { 744 return 0; 745 } 746 } 747 748 if (const_index != -1) { 749 /* The instruction has two constant sources */ 750 return 0; 751 } else { 752 const_index = i; 753 } 754 } 755 756 if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File, 757 inst_mul->U.I.SrcReg[const_index].Index)) { 758 return 0; 759 } 760 const_value = rc_get_constant_value(c, 761 inst_mul->U.I.SrcReg[const_index].Index, 762 inst_mul->U.I.SrcReg[const_index].Swizzle, 763 inst_mul->U.I.SrcReg[const_index].Negate, 764 chan); 765 766 if (const_value == 2.0f) { 767 omod_op = RC_OMOD_MUL_2; 768 } else if (const_value == 4.0f) { 769 omod_op = RC_OMOD_MUL_4; 770 } else if (const_value == 8.0f) { 771 omod_op = RC_OMOD_MUL_8; 772 } else if (const_value == (1.0f / 2.0f)) { 773 omod_op = RC_OMOD_DIV_2; 774 } else if (const_value == (1.0f / 4.0f)) { 775 omod_op = RC_OMOD_DIV_4; 776 } else if (const_value == (1.0f / 8.0f)) { 777 omod_op = RC_OMOD_DIV_8; 778 } else { 779 return 0; 780 } 781 782 writer_list = rc_variable_list_get_writers_one_reader(var_list, 783 RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]); 784 785 if (!writer_list) { 786 return 0; 787 } 788 789 cb_data.Clobbered = 0; 790 cb_data.Writer = &inst_mul->U.I.DstReg; 791 for (var = writer_list->Item; var; var = var->Friend) { 792 struct rc_instruction * inst; 793 const struct rc_opcode_info * info = rc_get_opcode_info( 794 var->Inst->U.I.Opcode); 795 if (info->HasTexture) { 796 return 0; 797 } 798 if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) { 799 return 0; 800 } 801 for (inst = inst_mul->Prev; inst != var->Inst; 802 inst = inst->Prev) { 803 rc_for_all_reads_mask(inst, omod_filter_reader_cb, 804 &cb_data); 805 rc_for_all_writes_mask(inst, omod_filter_writer_cb, 806 &cb_data); 807 if (cb_data.Clobbered) { 808 break; 809 } 810 } 811 } 812 813 if (cb_data.Clobbered) { 814 return 0; 815 } 816 817 /* Rewrite the instructions */ 818 for (var = writer_list->Item; var; var = var->Friend) { 819 struct rc_variable * writer = writer_list->Item; 820 unsigned conversion_swizzle = rc_make_conversion_swizzle( 821 writer->Inst->U.I.DstReg.WriteMask, 822 inst_mul->U.I.DstReg.WriteMask); 823 writer->Inst->U.I.Omod = omod_op; 824 writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File; 825 writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index; 826 rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle); 827 writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode; 828 } 829 830 rc_remove_instruction(inst_mul); 831 832 return 1; 833 } 834 835 /** 836 * @return 837 * 0 if inst is still part of the program. 838 * 1 if inst is no longer part of the program. 839 */ 840 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) 841 { 842 switch(inst->U.I.Opcode){ 843 case RC_OPCODE_ADD: 844 if (c->has_presub) { 845 if(peephole_add_presub_inv(c, inst)) 846 return 1; 847 if(peephole_add_presub_add(c, inst)) 848 return 1; 849 } 850 break; 851 default: 852 break; 853 } 854 return 0; 855 } 856 857 void rc_optimize(struct radeon_compiler * c, void *user) 858 { 859 struct rc_instruction * inst = c->Program.Instructions.Next; 860 struct rc_list * var_list; 861 while(inst != &c->Program.Instructions) { 862 struct rc_instruction * cur = inst; 863 inst = inst->Next; 864 865 constant_folding(c, cur); 866 867 if(peephole(c, cur)) 868 continue; 869 870 if (cur->U.I.Opcode == RC_OPCODE_MOV) { 871 copy_propagate(c, cur); 872 /* cur may no longer be part of the program */ 873 } 874 } 875 876 if (!c->has_omod) { 877 return; 878 } 879 880 inst = c->Program.Instructions.Next; 881 while(inst != &c->Program.Instructions) { 882 struct rc_instruction * cur = inst; 883 inst = inst->Next; 884 if (cur->U.I.Opcode == RC_OPCODE_MUL) { 885 var_list = rc_get_variables(c); 886 peephole_mul_omod(c, cur, var_list); 887 } 888 } 889 } 890