1 /* 2 * Copyright (C) 2009 Nicolai Haehnle. 3 * Copyright 2010 Tom Stellard <tstellar (at) gmail.com> 4 * 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial 17 * portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 */ 28 29 #include "radeon_dataflow.h" 30 31 #include "radeon_compiler.h" 32 #include "radeon_compiler_util.h" 33 #include "radeon_list.h" 34 #include "radeon_swizzle.h" 35 #include "radeon_variable.h" 36 37 struct src_clobbered_reads_cb_data { 38 rc_register_file File; 39 unsigned int Index; 40 unsigned int Mask; 41 struct rc_reader_data * ReaderData; 42 }; 43 44 typedef void (*rc_presub_replace_fn)(struct rc_instruction *, 45 struct rc_instruction *, 46 unsigned int); 47 48 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) 49 { 50 struct rc_src_register combine; 51 combine.File = inner.File; 52 combine.Index = inner.Index; 53 combine.RelAddr = inner.RelAddr; 54 if (outer.Abs) { 55 combine.Abs = 1; 56 combine.Negate = outer.Negate; 57 } else { 58 combine.Abs = inner.Abs; 59 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); 60 combine.Negate ^= outer.Negate; 61 } 62 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); 63 return combine; 64 } 65 66 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, 67 struct rc_src_register * src) 68 { 69 rc_register_file file = src->File; 70 struct rc_reader_data * reader_data = data; 71 72 if(!rc_inst_can_use_presub(inst, 73 reader_data->Writer->U.I.PreSub.Opcode, 74 rc_swizzle_to_writemask(src->Swizzle), 75 src, 76 &reader_data->Writer->U.I.PreSub.SrcReg[0], 77 &reader_data->Writer->U.I.PreSub.SrcReg[1])) { 78 reader_data->Abort = 1; 79 return; 80 } 81 82 /* XXX This could probably be handled better. */ 83 if (file == RC_FILE_ADDRESS) { 84 reader_data->Abort = 1; 85 return; 86 } 87 88 /* These instructions cannot read from the constants file. 89 * see radeonTransformTEX() 90 */ 91 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && 92 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && 93 (inst->U.I.Opcode == RC_OPCODE_TEX || 94 inst->U.I.Opcode == RC_OPCODE_TXB || 95 inst->U.I.Opcode == RC_OPCODE_TXP || 96 inst->U.I.Opcode == RC_OPCODE_TXD || 97 inst->U.I.Opcode == RC_OPCODE_TXL || 98 inst->U.I.Opcode == RC_OPCODE_KIL)){ 99 reader_data->Abort = 1; 100 return; 101 } 102 } 103 104 static void src_clobbered_reads_cb( 105 void * data, 106 struct rc_instruction * inst, 107 struct rc_src_register * src) 108 { 109 struct src_clobbered_reads_cb_data * sc_data = data; 110 111 if (src->File == sc_data->File 112 && src->Index == sc_data->Index 113 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { 114 115 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 116 } 117 118 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { 119 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 120 } 121 } 122 123 static void is_src_clobbered_scan_write( 124 void * data, 125 struct rc_instruction * inst, 126 rc_register_file file, 127 unsigned int index, 128 unsigned int mask) 129 { 130 struct src_clobbered_reads_cb_data sc_data; 131 struct rc_reader_data * reader_data = data; 132 sc_data.File = file; 133 sc_data.Index = index; 134 sc_data.Mask = mask; 135 sc_data.ReaderData = reader_data; 136 rc_for_all_reads_src(reader_data->Writer, 137 src_clobbered_reads_cb, &sc_data); 138 } 139 140 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) 141 { 142 struct rc_reader_data reader_data; 143 unsigned int i; 144 145 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || 146 inst_mov->U.I.WriteALUResult) 147 return; 148 149 /* Get a list of all the readers of this MOV instruction. */ 150 reader_data.ExitOnAbort = 1; 151 rc_get_readers(c, inst_mov, &reader_data, 152 copy_propagate_scan_read, NULL, 153 is_src_clobbered_scan_write); 154 155 if (reader_data.Abort || reader_data.ReaderCount == 0) 156 return; 157 158 /* We can propagate SaturateMode if all the readers are MOV instructions 159 * without a presubtract operation, source negation and absolute. 160 * In that case, we just move SaturateMode to all readers. */ 161 if (inst_mov->U.I.SaturateMode) { 162 for (i = 0; i < reader_data.ReaderCount; i++) { 163 struct rc_instruction * inst = reader_data.Readers[i].Inst; 164 165 if (inst->U.I.Opcode != RC_OPCODE_MOV || 166 inst->U.I.SrcReg[0].File == RC_FILE_PRESUB || 167 inst->U.I.SrcReg[0].Abs || 168 inst->U.I.SrcReg[0].Negate) { 169 return; 170 } 171 } 172 } 173 174 /* Propagate the MOV instruction. */ 175 for (i = 0; i < reader_data.ReaderCount; i++) { 176 struct rc_instruction * inst = reader_data.Readers[i].Inst; 177 *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); 178 179 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) 180 inst->U.I.PreSub = inst_mov->U.I.PreSub; 181 if (!inst->U.I.SaturateMode) 182 inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode; 183 } 184 185 /* Finally, remove the original MOV instruction */ 186 rc_remove_instruction(inst_mov); 187 } 188 189 /** 190 * Check if a source register is actually always the same 191 * swizzle constant. 192 */ 193 static int is_src_uniform_constant(struct rc_src_register src, 194 rc_swizzle * pswz, unsigned int * pnegate) 195 { 196 int have_used = 0; 197 198 if (src.File != RC_FILE_NONE) { 199 *pswz = 0; 200 return 0; 201 } 202 203 for(unsigned int chan = 0; chan < 4; ++chan) { 204 unsigned int swz = GET_SWZ(src.Swizzle, chan); 205 if (swz < 4) { 206 *pswz = 0; 207 return 0; 208 } 209 if (swz == RC_SWIZZLE_UNUSED) 210 continue; 211 212 if (!have_used) { 213 *pswz = swz; 214 *pnegate = GET_BIT(src.Negate, chan); 215 have_used = 1; 216 } else { 217 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { 218 *pswz = 0; 219 return 0; 220 } 221 } 222 } 223 224 return 1; 225 } 226 227 static void constant_folding_mad(struct rc_instruction * inst) 228 { 229 rc_swizzle swz = 0; 230 unsigned int negate= 0; 231 232 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { 233 if (swz == RC_SWIZZLE_ZERO) { 234 inst->U.I.Opcode = RC_OPCODE_MUL; 235 return; 236 } 237 } 238 239 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 240 if (swz == RC_SWIZZLE_ONE) { 241 inst->U.I.Opcode = RC_OPCODE_ADD; 242 if (negate) 243 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 244 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; 245 return; 246 } else if (swz == RC_SWIZZLE_ZERO) { 247 inst->U.I.Opcode = RC_OPCODE_MOV; 248 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 249 return; 250 } 251 } 252 253 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 254 if (swz == RC_SWIZZLE_ONE) { 255 inst->U.I.Opcode = RC_OPCODE_ADD; 256 if (negate) 257 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; 258 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 259 return; 260 } else if (swz == RC_SWIZZLE_ZERO) { 261 inst->U.I.Opcode = RC_OPCODE_MOV; 262 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 263 return; 264 } 265 } 266 } 267 268 static void constant_folding_mul(struct rc_instruction * inst) 269 { 270 rc_swizzle swz = 0; 271 unsigned int negate = 0; 272 273 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 274 if (swz == RC_SWIZZLE_ONE) { 275 inst->U.I.Opcode = RC_OPCODE_MOV; 276 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 277 if (negate) 278 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 279 return; 280 } else if (swz == RC_SWIZZLE_ZERO) { 281 inst->U.I.Opcode = RC_OPCODE_MOV; 282 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 283 return; 284 } 285 } 286 287 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 288 if (swz == RC_SWIZZLE_ONE) { 289 inst->U.I.Opcode = RC_OPCODE_MOV; 290 if (negate) 291 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 292 return; 293 } else if (swz == RC_SWIZZLE_ZERO) { 294 inst->U.I.Opcode = RC_OPCODE_MOV; 295 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 296 return; 297 } 298 } 299 } 300 301 static void constant_folding_add(struct rc_instruction * inst) 302 { 303 rc_swizzle swz = 0; 304 unsigned int negate = 0; 305 306 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 307 if (swz == RC_SWIZZLE_ZERO) { 308 inst->U.I.Opcode = RC_OPCODE_MOV; 309 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 310 return; 311 } 312 } 313 314 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 315 if (swz == RC_SWIZZLE_ZERO) { 316 inst->U.I.Opcode = RC_OPCODE_MOV; 317 return; 318 } 319 } 320 } 321 322 /** 323 * Replace 0.0, 1.0 and 0.5 immediate constants by their 324 * respective swizzles. Simplify instructions like ADD dst, src, 0; 325 */ 326 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) 327 { 328 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 329 unsigned int i; 330 331 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ 332 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { 333 struct rc_constant * constant; 334 struct rc_src_register newsrc; 335 int have_real_reference; 336 unsigned int chan; 337 338 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ 339 for (chan = 0; chan < 4; ++chan) 340 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) 341 break; 342 if (chan == 4) { 343 inst->U.I.SrcReg[src].File = RC_FILE_NONE; 344 continue; 345 } 346 347 /* Convert immediates to swizzles. */ 348 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || 349 inst->U.I.SrcReg[src].RelAddr || 350 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) 351 continue; 352 353 constant = 354 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; 355 356 if (constant->Type != RC_CONSTANT_IMMEDIATE) 357 continue; 358 359 newsrc = inst->U.I.SrcReg[src]; 360 have_real_reference = 0; 361 for (chan = 0; chan < 4; ++chan) { 362 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); 363 unsigned int newswz; 364 float imm; 365 float baseimm; 366 367 if (swz >= 4) 368 continue; 369 370 imm = constant->u.Immediate[swz]; 371 baseimm = imm; 372 if (imm < 0.0) 373 baseimm = -baseimm; 374 375 if (baseimm == 0.0) { 376 newswz = RC_SWIZZLE_ZERO; 377 } else if (baseimm == 1.0) { 378 newswz = RC_SWIZZLE_ONE; 379 } else if (baseimm == 0.5 && c->has_half_swizzles) { 380 newswz = RC_SWIZZLE_HALF; 381 } else { 382 have_real_reference = 1; 383 continue; 384 } 385 386 SET_SWZ(newsrc.Swizzle, chan, newswz); 387 if (imm < 0.0 && !newsrc.Abs) 388 newsrc.Negate ^= 1 << chan; 389 } 390 391 if (!have_real_reference) { 392 newsrc.File = RC_FILE_NONE; 393 newsrc.Index = 0; 394 } 395 396 /* don't make the swizzle worse */ 397 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && 398 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) 399 continue; 400 401 inst->U.I.SrcReg[src] = newsrc; 402 } 403 404 /* Simplify instructions based on constants */ 405 if (inst->U.I.Opcode == RC_OPCODE_MAD) 406 constant_folding_mad(inst); 407 408 /* note: MAD can simplify to MUL or ADD */ 409 if (inst->U.I.Opcode == RC_OPCODE_MUL) 410 constant_folding_mul(inst); 411 else if (inst->U.I.Opcode == RC_OPCODE_ADD) 412 constant_folding_add(inst); 413 414 /* In case this instruction has been converted, make sure all of the 415 * registers that are no longer used are empty. */ 416 opcode = rc_get_opcode_info(inst->U.I.Opcode); 417 for(i = opcode->NumSrcRegs; i < 3; i++) { 418 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); 419 } 420 } 421 422 /** 423 * If src and dst use the same register, this function returns a writemask that 424 * indicates wich components are read by src. Otherwise zero is returned. 425 */ 426 static unsigned int src_reads_dst_mask(struct rc_src_register src, 427 struct rc_dst_register dst) 428 { 429 if (dst.File != src.File || dst.Index != src.Index) { 430 return 0; 431 } 432 return rc_swizzle_to_writemask(src.Swizzle); 433 } 434 435 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) 436 * in any of its channels. Return 0 otherwise. */ 437 static int src_has_const_swz(struct rc_src_register src) { 438 int chan; 439 for(chan = 0; chan < 4; chan++) { 440 unsigned int swz = GET_SWZ(src.Swizzle, chan); 441 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF 442 || swz == RC_SWIZZLE_ONE) { 443 return 1; 444 } 445 } 446 return 0; 447 } 448 449 static void presub_scan_read( 450 void * data, 451 struct rc_instruction * inst, 452 struct rc_src_register * src) 453 { 454 struct rc_reader_data * reader_data = data; 455 rc_presubtract_op * presub_opcode = reader_data->CbData; 456 457 if (!rc_inst_can_use_presub(inst, *presub_opcode, 458 reader_data->Writer->U.I.DstReg.WriteMask, 459 src, 460 &reader_data->Writer->U.I.SrcReg[0], 461 &reader_data->Writer->U.I.SrcReg[1])) { 462 reader_data->Abort = 1; 463 return; 464 } 465 } 466 467 static int presub_helper( 468 struct radeon_compiler * c, 469 struct rc_instruction * inst_add, 470 rc_presubtract_op presub_opcode, 471 rc_presub_replace_fn presub_replace) 472 { 473 struct rc_reader_data reader_data; 474 unsigned int i; 475 rc_presubtract_op cb_op = presub_opcode; 476 477 reader_data.CbData = &cb_op; 478 reader_data.ExitOnAbort = 1; 479 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, 480 is_src_clobbered_scan_write); 481 482 if (reader_data.Abort || reader_data.ReaderCount == 0) 483 return 0; 484 485 for(i = 0; i < reader_data.ReaderCount; i++) { 486 unsigned int src_index; 487 struct rc_reader reader = reader_data.Readers[i]; 488 const struct rc_opcode_info * info = 489 rc_get_opcode_info(reader.Inst->U.I.Opcode); 490 491 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { 492 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) 493 presub_replace(inst_add, reader.Inst, src_index); 494 } 495 } 496 return 1; 497 } 498 499 /* This function assumes that inst_add->U.I.SrcReg[0] and 500 * inst_add->U.I.SrcReg[1] aren't both negative. */ 501 static void presub_replace_add( 502 struct rc_instruction * inst_add, 503 struct rc_instruction * inst_reader, 504 unsigned int src_index) 505 { 506 rc_presubtract_op presub_opcode; 507 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) 508 presub_opcode = RC_PRESUB_SUB; 509 else 510 presub_opcode = RC_PRESUB_ADD; 511 512 if (inst_add->U.I.SrcReg[1].Negate) { 513 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 514 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; 515 } else { 516 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; 517 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; 518 } 519 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 520 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; 521 inst_reader->U.I.PreSub.Opcode = presub_opcode; 522 inst_reader->U.I.SrcReg[src_index] = 523 chain_srcregs(inst_reader->U.I.SrcReg[src_index], 524 inst_reader->U.I.PreSub.SrcReg[0]); 525 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 526 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; 527 } 528 529 static int is_presub_candidate( 530 struct radeon_compiler * c, 531 struct rc_instruction * inst) 532 { 533 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); 534 unsigned int i; 535 unsigned int is_constant[2] = {0, 0}; 536 537 assert(inst->U.I.Opcode == RC_OPCODE_ADD); 538 539 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE 540 || inst->U.I.SaturateMode 541 || inst->U.I.WriteALUResult 542 || inst->U.I.Omod) { 543 return 0; 544 } 545 546 /* If both sources use a constant swizzle, then we can't convert it to 547 * a presubtract operation. In fact for the ADD and SUB presubtract 548 * operations neither source can contain a constant swizzle. This 549 * specific case is checked in peephole_add_presub_add() when 550 * we make sure the swizzles for both sources are equal, so we 551 * don't need to worry about it here. */ 552 for (i = 0; i < 2; i++) { 553 int chan; 554 for (chan = 0; chan < 4; chan++) { 555 rc_swizzle swz = 556 get_swz(inst->U.I.SrcReg[i].Swizzle, chan); 557 if (swz == RC_SWIZZLE_ONE 558 || swz == RC_SWIZZLE_ZERO 559 || swz == RC_SWIZZLE_HALF) { 560 is_constant[i] = 1; 561 } 562 } 563 } 564 if (is_constant[0] && is_constant[1]) 565 return 0; 566 567 for(i = 0; i < info->NumSrcRegs; i++) { 568 struct rc_src_register src = inst->U.I.SrcReg[i]; 569 if (src_reads_dst_mask(src, inst->U.I.DstReg)) 570 return 0; 571 572 src.File = RC_FILE_PRESUB; 573 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) 574 return 0; 575 } 576 return 1; 577 } 578 579 static int peephole_add_presub_add( 580 struct radeon_compiler * c, 581 struct rc_instruction * inst_add) 582 { 583 unsigned dstmask = inst_add->U.I.DstReg.WriteMask; 584 unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; 585 unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; 586 587 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) 588 return 0; 589 590 /* src0 and src1 can't have absolute values */ 591 if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) 592 return 0; 593 594 /* presub_replace_add() assumes only one is negative */ 595 if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) 596 return 0; 597 598 /* if src0 is negative, at least all bits of dstmask have to be set */ 599 if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) 600 return 0; 601 602 /* if src1 is negative, at least all bits of dstmask have to be set */ 603 if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) 604 return 0; 605 606 if (!is_presub_candidate(c, inst_add)) 607 return 0; 608 609 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { 610 rc_remove_instruction(inst_add); 611 return 1; 612 } 613 return 0; 614 } 615 616 static void presub_replace_inv( 617 struct rc_instruction * inst_add, 618 struct rc_instruction * inst_reader, 619 unsigned int src_index) 620 { 621 /* We must be careful not to modify inst_add, since it 622 * is possible it will remain part of the program.*/ 623 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 624 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 625 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; 626 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], 627 inst_reader->U.I.PreSub.SrcReg[0]); 628 629 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 630 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; 631 } 632 633 /** 634 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] 635 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source 636 * of the add instruction must have the constatnt 1 swizzle. This function 637 * does not check const registers to see if their value is 1.0, so it should 638 * be called after the constant_folding optimization. 639 * @return 640 * 0 if the ADD instruction is still part of the program. 641 * 1 if the ADD instruction is no longer part of the program. 642 */ 643 static int peephole_add_presub_inv( 644 struct radeon_compiler * c, 645 struct rc_instruction * inst_add) 646 { 647 unsigned int i, swz; 648 649 if (!is_presub_candidate(c, inst_add)) 650 return 0; 651 652 /* Check if src0 is 1. */ 653 /* XXX It would be nice to use is_src_uniform_constant here, but that 654 * function only works if the register's file is RC_FILE_NONE */ 655 for(i = 0; i < 4; i++ ) { 656 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); 657 if(((1 << i) & inst_add->U.I.DstReg.WriteMask) 658 && swz != RC_SWIZZLE_ONE) { 659 return 0; 660 } 661 } 662 663 /* Check src1. */ 664 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != 665 inst_add->U.I.DstReg.WriteMask 666 || inst_add->U.I.SrcReg[1].Abs 667 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY 668 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) 669 || src_has_const_swz(inst_add->U.I.SrcReg[1])) { 670 671 return 0; 672 } 673 674 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { 675 rc_remove_instruction(inst_add); 676 return 1; 677 } 678 return 0; 679 } 680 681 struct peephole_mul_cb_data { 682 struct rc_dst_register * Writer; 683 unsigned int Clobbered; 684 }; 685 686 static void omod_filter_reader_cb( 687 void * userdata, 688 struct rc_instruction * inst, 689 rc_register_file file, 690 unsigned int index, 691 unsigned int mask) 692 { 693 struct peephole_mul_cb_data * d = userdata; 694 if (rc_src_reads_dst_mask(file, mask, index, 695 d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) { 696 697 d->Clobbered = 1; 698 } 699 } 700 701 static void omod_filter_writer_cb( 702 void * userdata, 703 struct rc_instruction * inst, 704 rc_register_file file, 705 unsigned int index, 706 unsigned int mask) 707 { 708 struct peephole_mul_cb_data * d = userdata; 709 if (file == d->Writer->File && index == d->Writer->Index && 710 (mask & d->Writer->WriteMask)) { 711 d->Clobbered = 1; 712 } 713 } 714 715 static int peephole_mul_omod( 716 struct radeon_compiler * c, 717 struct rc_instruction * inst_mul, 718 struct rc_list * var_list) 719 { 720 unsigned int chan = 0, swz, i; 721 int const_index = -1; 722 int temp_index = -1; 723 float const_value; 724 rc_omod_op omod_op = RC_OMOD_DISABLE; 725 struct rc_list * writer_list; 726 struct rc_variable * var; 727 struct peephole_mul_cb_data cb_data; 728 unsigned writemask_sum; 729 730 for (i = 0; i < 2; i++) { 731 unsigned int j; 732 if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT 733 && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) { 734 return 0; 735 } 736 if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { 737 if (temp_index != -1) { 738 /* The instruction has two temp sources */ 739 return 0; 740 } else { 741 temp_index = i; 742 continue; 743 } 744 } 745 /* If we get this far Src[i] must be a constant src */ 746 if (inst_mul->U.I.SrcReg[i].Negate) { 747 return 0; 748 } 749 /* The constant src needs to read from the same swizzle */ 750 swz = RC_SWIZZLE_UNUSED; 751 chan = 0; 752 for (j = 0; j < 4; j++) { 753 unsigned int j_swz = 754 GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); 755 if (j_swz == RC_SWIZZLE_UNUSED) { 756 continue; 757 } 758 if (swz == RC_SWIZZLE_UNUSED) { 759 swz = j_swz; 760 chan = j; 761 } else if (j_swz != swz) { 762 return 0; 763 } 764 } 765 766 if (const_index != -1) { 767 /* The instruction has two constant sources */ 768 return 0; 769 } else { 770 const_index = i; 771 } 772 } 773 774 if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File, 775 inst_mul->U.I.SrcReg[const_index].Index)) { 776 return 0; 777 } 778 const_value = rc_get_constant_value(c, 779 inst_mul->U.I.SrcReg[const_index].Index, 780 inst_mul->U.I.SrcReg[const_index].Swizzle, 781 inst_mul->U.I.SrcReg[const_index].Negate, 782 chan); 783 784 if (const_value == 2.0f) { 785 omod_op = RC_OMOD_MUL_2; 786 } else if (const_value == 4.0f) { 787 omod_op = RC_OMOD_MUL_4; 788 } else if (const_value == 8.0f) { 789 omod_op = RC_OMOD_MUL_8; 790 } else if (const_value == (1.0f / 2.0f)) { 791 omod_op = RC_OMOD_DIV_2; 792 } else if (const_value == (1.0f / 4.0f)) { 793 omod_op = RC_OMOD_DIV_4; 794 } else if (const_value == (1.0f / 8.0f)) { 795 omod_op = RC_OMOD_DIV_8; 796 } else { 797 return 0; 798 } 799 800 writer_list = rc_variable_list_get_writers_one_reader(var_list, 801 RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]); 802 803 if (!writer_list) { 804 return 0; 805 } 806 807 cb_data.Clobbered = 0; 808 cb_data.Writer = &inst_mul->U.I.DstReg; 809 for (var = writer_list->Item; var; var = var->Friend) { 810 struct rc_instruction * inst; 811 const struct rc_opcode_info * info = rc_get_opcode_info( 812 var->Inst->U.I.Opcode); 813 if (info->HasTexture) { 814 return 0; 815 } 816 if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) { 817 return 0; 818 } 819 for (inst = inst_mul->Prev; inst != var->Inst; 820 inst = inst->Prev) { 821 rc_for_all_reads_mask(inst, omod_filter_reader_cb, 822 &cb_data); 823 rc_for_all_writes_mask(inst, omod_filter_writer_cb, 824 &cb_data); 825 if (cb_data.Clobbered) { 826 break; 827 } 828 } 829 } 830 831 if (cb_data.Clobbered) { 832 return 0; 833 } 834 835 /* Rewrite the instructions */ 836 writemask_sum = rc_variable_writemask_sum(writer_list->Item); 837 for (var = writer_list->Item; var; var = var->Friend) { 838 struct rc_variable * writer = var; 839 unsigned conversion_swizzle = rc_make_conversion_swizzle( 840 writemask_sum, 841 inst_mul->U.I.DstReg.WriteMask); 842 writer->Inst->U.I.Omod = omod_op; 843 writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File; 844 writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index; 845 rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle); 846 writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode; 847 } 848 849 rc_remove_instruction(inst_mul); 850 851 return 1; 852 } 853 854 /** 855 * @return 856 * 0 if inst is still part of the program. 857 * 1 if inst is no longer part of the program. 858 */ 859 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) 860 { 861 switch(inst->U.I.Opcode){ 862 case RC_OPCODE_ADD: 863 if (c->has_presub) { 864 if(peephole_add_presub_inv(c, inst)) 865 return 1; 866 if(peephole_add_presub_add(c, inst)) 867 return 1; 868 } 869 break; 870 default: 871 break; 872 } 873 return 0; 874 } 875 876 void rc_optimize(struct radeon_compiler * c, void *user) 877 { 878 struct rc_instruction * inst = c->Program.Instructions.Next; 879 struct rc_list * var_list; 880 while(inst != &c->Program.Instructions) { 881 struct rc_instruction * cur = inst; 882 inst = inst->Next; 883 884 constant_folding(c, cur); 885 886 if(peephole(c, cur)) 887 continue; 888 889 if (cur->U.I.Opcode == RC_OPCODE_MOV) { 890 copy_propagate(c, cur); 891 /* cur may no longer be part of the program */ 892 } 893 } 894 895 if (!c->has_omod) { 896 return; 897 } 898 899 inst = c->Program.Instructions.Next; 900 while(inst != &c->Program.Instructions) { 901 struct rc_instruction * cur = inst; 902 inst = inst->Next; 903 if (cur->U.I.Opcode == RC_OPCODE_MUL) { 904 var_list = rc_get_variables(c); 905 peephole_mul_omod(c, cur, var_list); 906 } 907 } 908 } 909