1 /* 2 * Copyright (C) 2009 Nicolai Haehnle. 3 * 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 */ 27 28 #include "radeon_program_pair.h" 29 30 #include <stdio.h> 31 32 #include "radeon_compiler.h" 33 #include "radeon_compiler_util.h" 34 #include "radeon_dataflow.h" 35 #include "radeon_list.h" 36 #include "radeon_variable.h" 37 38 #include "util/u_debug.h" 39 40 #define VERBOSE 0 41 42 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) 43 44 struct schedule_instruction { 45 struct rc_instruction * Instruction; 46 47 /** Next instruction in the linked list of ready instructions. */ 48 struct schedule_instruction *NextReady; 49 50 /** Values that this instruction reads and writes */ 51 struct reg_value * WriteValues[4]; 52 struct reg_value * ReadValues[12]; 53 unsigned int NumWriteValues:3; 54 unsigned int NumReadValues:4; 55 56 /** 57 * Number of (read and write) dependencies that must be resolved before 58 * this instruction can be scheduled. 59 */ 60 unsigned int NumDependencies:5; 61 62 /** List of all readers (see rc_get_readers() for the definition of 63 * "all readers"), even those outside the basic block this instruction 64 * lives in. */ 65 struct rc_reader_data GlobalReaders; 66 67 /** If the scheduler has paired an RGB and an Alpha instruction together, 68 * PairedInst references the alpha insturction's dependency information. 69 */ 70 struct schedule_instruction * PairedInst; 71 72 /** This scheduler uses the value of Score to determine which 73 * instruction to schedule. Instructions with a higher value of Score 74 * will be scheduled first. */ 75 int Score; 76 77 /** The number of components that read from a TEX instruction. */ 78 unsigned TexReadCount; 79 80 /** For TEX instructions a list of readers */ 81 struct rc_list * TexReaders; 82 }; 83 84 85 /** 86 * Used to keep track of which instructions read a value. 87 */ 88 struct reg_value_reader { 89 struct schedule_instruction *Reader; 90 struct reg_value_reader *Next; 91 }; 92 93 /** 94 * Used to keep track which values are stored in each component of a 95 * RC_FILE_TEMPORARY. 96 */ 97 struct reg_value { 98 struct schedule_instruction * Writer; 99 100 /** 101 * Unordered linked list of instructions that read from this value. 102 * When this value becomes available, we increase all readers' 103 * dependency count. 104 */ 105 struct reg_value_reader *Readers; 106 107 /** 108 * Number of readers of this value. This is decremented each time 109 * a reader of the value is committed. 110 * When the reader cound reaches zero, the dependency count 111 * of the instruction writing \ref Next is decremented. 112 */ 113 unsigned int NumReaders; 114 115 struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ 116 }; 117 118 struct register_state { 119 struct reg_value * Values[4]; 120 }; 121 122 struct remap_reg { 123 struct rc_instruciont * Inst; 124 unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); 125 unsigned int OldSwizzle:3; 126 unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); 127 unsigned int NewSwizzle:3; 128 unsigned int OnlyTexReads:1; 129 struct remap_reg * Next; 130 }; 131 132 struct schedule_state { 133 struct radeon_compiler * C; 134 struct schedule_instruction * Current; 135 /** Array of the previous writers of Current's destination register 136 * indexed by channel. */ 137 struct schedule_instruction * PrevWriter[4]; 138 139 struct register_state Temporary[RC_REGISTER_MAX_INDEX]; 140 141 /** 142 * Linked lists of instructions that can be scheduled right now, 143 * based on which ALU/TEX resources they require. 144 */ 145 /*@{*/ 146 struct schedule_instruction *ReadyFullALU; 147 struct schedule_instruction *ReadyRGB; 148 struct schedule_instruction *ReadyAlpha; 149 struct schedule_instruction *ReadyTEX; 150 /*@}*/ 151 struct rc_list *PendingTEX; 152 153 void (*CalcScore)(struct schedule_instruction *); 154 long max_tex_group; 155 unsigned PrevBlockHasTex:1; 156 unsigned TEXCount; 157 unsigned Opt:1; 158 }; 159 160 static struct reg_value ** get_reg_valuep(struct schedule_state * s, 161 rc_register_file file, unsigned int index, unsigned int chan) 162 { 163 if (file != RC_FILE_TEMPORARY) 164 return 0; 165 166 if (index >= RC_REGISTER_MAX_INDEX) { 167 rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index); 168 return 0; 169 } 170 171 return &s->Temporary[index].Values[chan]; 172 } 173 174 static unsigned get_tex_read_count(struct schedule_instruction * sinst) 175 { 176 unsigned tex_read_count = sinst->TexReadCount; 177 if (sinst->PairedInst) { 178 tex_read_count += sinst->PairedInst->TexReadCount; 179 } 180 return tex_read_count; 181 } 182 183 #if VERBOSE 184 static void print_list(struct schedule_instruction * sinst) 185 { 186 struct schedule_instruction * ptr; 187 for (ptr = sinst; ptr; ptr=ptr->NextReady) { 188 unsigned tex_read_count = get_tex_read_count(ptr); 189 unsigned score = sinst->Score; 190 fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score, 191 tex_read_count); 192 } 193 fprintf(stderr, "\n"); 194 } 195 #endif 196 197 static void remove_inst_from_list(struct schedule_instruction ** list, 198 struct schedule_instruction * inst) 199 { 200 struct schedule_instruction * prev = NULL; 201 struct schedule_instruction * list_ptr; 202 for (list_ptr = *list; list_ptr; prev = list_ptr, 203 list_ptr = list_ptr->NextReady) { 204 if (list_ptr == inst) { 205 if (prev) { 206 prev->NextReady = inst->NextReady; 207 } else { 208 *list = inst->NextReady; 209 } 210 inst->NextReady = NULL; 211 break; 212 } 213 } 214 } 215 216 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) 217 { 218 inst->NextReady = *list; 219 *list = inst; 220 } 221 222 static void add_inst_to_list_score(struct schedule_instruction ** list, 223 struct schedule_instruction * inst) 224 { 225 struct schedule_instruction * temp; 226 struct schedule_instruction * prev; 227 if (!*list) { 228 *list = inst; 229 return; 230 } 231 temp = *list; 232 prev = NULL; 233 while(temp && inst->Score <= temp->Score) { 234 prev = temp; 235 temp = temp->NextReady; 236 } 237 238 if (!prev) { 239 inst->NextReady = temp; 240 *list = inst; 241 } else { 242 prev->NextReady = inst; 243 inst->NextReady = temp; 244 } 245 } 246 247 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) 248 { 249 DBG("%i is now ready\n", sinst->Instruction->IP); 250 251 /* Adding Ready TEX instructions to the end of the "Ready List" helps 252 * us emit TEX instructions in blocks without losing our place. */ 253 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) 254 add_inst_to_list_score(&s->ReadyTEX, sinst); 255 else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) 256 add_inst_to_list_score(&s->ReadyRGB, sinst); 257 else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) 258 add_inst_to_list_score(&s->ReadyAlpha, sinst); 259 else 260 add_inst_to_list_score(&s->ReadyFullALU, sinst); 261 } 262 263 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) 264 { 265 assert(sinst->NumDependencies > 0); 266 sinst->NumDependencies--; 267 if (!sinst->NumDependencies) 268 instruction_ready(s, sinst); 269 } 270 271 /* These functions provide different heuristics for scheduling instructions. 272 * The default is calc_score_readers. */ 273 274 #if 0 275 276 static void calc_score_zero(struct schedule_instruction * sinst) 277 { 278 sinst->Score = 0; 279 } 280 281 static void calc_score_deps(struct schedule_instruction * sinst) 282 { 283 int i; 284 sinst->Score = 0; 285 for (i = 0; i < sinst->NumWriteValues; i++) { 286 struct reg_value * v = sinst->WriteValues[i]; 287 if (v->NumReaders) { 288 struct reg_value_reader * r; 289 for (r = v->Readers; r; r = r->Next) { 290 if (r->Reader->NumDependencies == 1) { 291 sinst->Score += 100; 292 } 293 sinst->Score += r->Reader->NumDependencies; 294 } 295 } 296 } 297 } 298 299 #endif 300 301 #define NO_OUTPUT_SCORE (1 << 24) 302 303 static void score_no_output(struct schedule_instruction * sinst) 304 { 305 assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL); 306 if (!sinst->Instruction->U.P.RGB.OutputWriteMask && 307 !sinst->Instruction->U.P.Alpha.OutputWriteMask) { 308 if (sinst->PairedInst) { 309 if (!sinst->PairedInst->Instruction->U.P. 310 RGB.OutputWriteMask 311 && !sinst->PairedInst->Instruction->U.P. 312 Alpha.OutputWriteMask) { 313 sinst->Score |= NO_OUTPUT_SCORE; 314 } 315 316 } else { 317 sinst->Score |= NO_OUTPUT_SCORE; 318 } 319 } 320 } 321 322 #define PAIRED_SCORE (1 << 16) 323 324 static void calc_score_r300(struct schedule_instruction * sinst) 325 { 326 unsigned src_idx; 327 328 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { 329 sinst->Score = 0; 330 return; 331 } 332 333 score_no_output(sinst); 334 335 if (sinst->PairedInst) { 336 sinst->Score |= PAIRED_SCORE; 337 return; 338 } 339 340 for (src_idx = 0; src_idx < 4; src_idx++) { 341 sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used + 342 sinst->Instruction->U.P.Alpha.Src[src_idx].Used; 343 } 344 } 345 346 #define NO_READ_TEX_SCORE (1 << 16) 347 348 static void calc_score_readers(struct schedule_instruction * sinst) 349 { 350 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { 351 sinst->Score = 0; 352 } else { 353 sinst->Score = sinst->NumReadValues; 354 if (sinst->PairedInst) { 355 sinst->Score += sinst->PairedInst->NumReadValues; 356 } 357 if (get_tex_read_count(sinst) == 0) { 358 sinst->Score |= NO_READ_TEX_SCORE; 359 } 360 score_no_output(sinst); 361 } 362 } 363 364 /** 365 * This function decreases the dependencies of the next instruction that 366 * wants to write to each of sinst's read values. 367 */ 368 static void commit_update_reads(struct schedule_state * s, 369 struct schedule_instruction * sinst){ 370 unsigned int i; 371 for(i = 0; i < sinst->NumReadValues; ++i) { 372 struct reg_value * v = sinst->ReadValues[i]; 373 assert(v->NumReaders > 0); 374 v->NumReaders--; 375 if (!v->NumReaders) { 376 if (v->Next) { 377 decrease_dependencies(s, v->Next->Writer); 378 } 379 } 380 } 381 if (sinst->PairedInst) { 382 commit_update_reads(s, sinst->PairedInst); 383 } 384 } 385 386 static void commit_update_writes(struct schedule_state * s, 387 struct schedule_instruction * sinst){ 388 unsigned int i; 389 for(i = 0; i < sinst->NumWriteValues; ++i) { 390 struct reg_value * v = sinst->WriteValues[i]; 391 if (v->NumReaders) { 392 for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { 393 decrease_dependencies(s, r->Reader); 394 } 395 } else { 396 /* This happens in instruction sequences of the type 397 * OP r.x, ...; 398 * OP r.x, r.x, ...; 399 * See also the subtlety in how instructions that both 400 * read and write the same register are scanned. 401 */ 402 if (v->Next) 403 decrease_dependencies(s, v->Next->Writer); 404 } 405 } 406 if (sinst->PairedInst) { 407 commit_update_writes(s, sinst->PairedInst); 408 } 409 } 410 411 static void notify_sem_wait(struct schedule_state *s) 412 { 413 struct rc_list * pend_ptr; 414 for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) { 415 struct rc_list * read_ptr; 416 struct schedule_instruction * pending = pend_ptr->Item; 417 for (read_ptr = pending->TexReaders; read_ptr; 418 read_ptr = read_ptr->Next) { 419 struct schedule_instruction * reader = read_ptr->Item; 420 reader->TexReadCount--; 421 } 422 } 423 s->PendingTEX = NULL; 424 } 425 426 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst) 427 { 428 DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score); 429 430 commit_update_reads(s, sinst); 431 432 commit_update_writes(s, sinst); 433 434 if (get_tex_read_count(sinst) > 0) { 435 sinst->Instruction->U.P.SemWait = 1; 436 notify_sem_wait(s); 437 } 438 } 439 440 /** 441 * Emit all ready texture instructions in a single block. 442 * 443 * Emit as a single block to (hopefully) sample many textures in parallel, 444 * and to avoid hardware indirections on R300. 445 */ 446 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) 447 { 448 struct schedule_instruction *readytex; 449 struct rc_instruction * inst_begin; 450 451 assert(s->ReadyTEX); 452 notify_sem_wait(s); 453 454 /* Node marker for R300 */ 455 inst_begin = rc_insert_new_instruction(s->C, before->Prev); 456 inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; 457 458 /* Link texture instructions back in */ 459 readytex = s->ReadyTEX; 460 while(readytex) { 461 rc_insert_instruction(before->Prev, readytex->Instruction); 462 DBG("%i: commit TEX reads\n", readytex->Instruction->IP); 463 464 /* All of the TEX instructions in the same TEX block have 465 * their source registers read from before any of the 466 * instructions in that block write to their destination 467 * registers. This means that when we commit a TEX 468 * instruction, any other TEX instruction that wants to write 469 * to one of the committed instruction's source register can be 470 * marked as ready and should be emitted in the same TEX 471 * block. This prevents the following sequence from being 472 * emitted in two different TEX blocks: 473 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0]; 474 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0]; 475 */ 476 commit_update_reads(s, readytex); 477 readytex = readytex->NextReady; 478 } 479 readytex = s->ReadyTEX; 480 s->ReadyTEX = 0; 481 while(readytex){ 482 DBG("%i: commit TEX writes\n", readytex->Instruction->IP); 483 commit_update_writes(s, readytex); 484 /* Set semaphore bits for last TEX instruction in the block */ 485 if (!readytex->NextReady) { 486 readytex->Instruction->U.I.TexSemAcquire = 1; 487 readytex->Instruction->U.I.TexSemWait = 1; 488 } 489 rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex)); 490 readytex = readytex->NextReady; 491 } 492 } 493 494 /* This is a helper function for destructive_merge_instructions(). It helps 495 * merge presubtract sources from two instructions and makes sure the 496 * presubtract sources end up in the correct spot. This function assumes that 497 * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb) 498 * but no scalar instruction (alpha). 499 * @return 0 if merging the presubtract sources fails. 500 * @retrun 1 if merging the presubtract sources succeeds. 501 */ 502 static int merge_presub_sources( 503 struct rc_pair_instruction * dst_full, 504 struct rc_pair_sub_instruction src, 505 unsigned int type) 506 { 507 unsigned int srcp_src, srcp_regs, is_rgb, is_alpha; 508 struct rc_pair_sub_instruction * dst_sub; 509 const struct rc_opcode_info * info; 510 511 assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); 512 513 switch(type) { 514 case RC_SOURCE_RGB: 515 is_rgb = 1; 516 is_alpha = 0; 517 dst_sub = &dst_full->RGB; 518 break; 519 case RC_SOURCE_ALPHA: 520 is_rgb = 0; 521 is_alpha = 1; 522 dst_sub = &dst_full->Alpha; 523 break; 524 default: 525 assert(0); 526 return 0; 527 } 528 529 info = rc_get_opcode_info(dst_full->RGB.Opcode); 530 531 if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used) 532 return 0; 533 534 srcp_regs = rc_presubtract_src_reg_count( 535 src.Src[RC_PAIR_PRESUB_SRC].Index); 536 for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { 537 unsigned int arg; 538 int free_source; 539 unsigned int one_way = 0; 540 struct rc_pair_instruction_source srcp = src.Src[srcp_src]; 541 struct rc_pair_instruction_source temp; 542 543 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, 544 srcp.File, srcp.Index); 545 546 /* If free_source < 0 then there are no free source 547 * slots. */ 548 if (free_source < 0) 549 return 0; 550 551 temp = dst_sub->Src[srcp_src]; 552 dst_sub->Src[srcp_src] = dst_sub->Src[free_source]; 553 554 /* srcp needs src0 and src1 to be the same */ 555 if (free_source < srcp_src) { 556 if (!temp.Used) 557 continue; 558 free_source = rc_pair_alloc_source(dst_full, is_rgb, 559 is_alpha, temp.File, temp.Index); 560 if (free_source < 0) 561 return 0; 562 one_way = 1; 563 } else { 564 dst_sub->Src[free_source] = temp; 565 } 566 567 /* If free_source == srcp_src, then the presubtract 568 * source is already in the correct place. */ 569 if (free_source == srcp_src) 570 continue; 571 572 /* Shuffle the sources, so we can put the 573 * presubtract source in the correct place. */ 574 for(arg = 0; arg < info->NumSrcRegs; arg++) { 575 /*If this arg does not read from an rgb source, 576 * do nothing. */ 577 if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) 578 & type)) { 579 continue; 580 } 581 582 if (dst_full->RGB.Arg[arg].Source == srcp_src) 583 dst_full->RGB.Arg[arg].Source = free_source; 584 /* We need to do this just in case register 585 * is one of the sources already, but in the 586 * wrong spot. */ 587 else if(dst_full->RGB.Arg[arg].Source == free_source 588 && !one_way) { 589 dst_full->RGB.Arg[arg].Source = srcp_src; 590 } 591 } 592 } 593 return 1; 594 } 595 596 597 /* This function assumes that rgb.Alpha and alpha.RGB are unused */ 598 static int destructive_merge_instructions( 599 struct rc_pair_instruction * rgb, 600 struct rc_pair_instruction * alpha) 601 { 602 const struct rc_opcode_info * opcode; 603 604 assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); 605 assert(alpha->RGB.Opcode == RC_OPCODE_NOP); 606 607 /* Presubtract registers need to be merged first so that registers 608 * needed by the presubtract operation can be placed in src0 and/or 609 * src1. */ 610 611 /* Merge the rgb presubtract registers. */ 612 if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { 613 if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { 614 return 0; 615 } 616 } 617 /* Merge the alpha presubtract registers */ 618 if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { 619 if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ 620 return 0; 621 } 622 } 623 624 /* Copy alpha args into rgb */ 625 opcode = rc_get_opcode_info(alpha->Alpha.Opcode); 626 627 for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { 628 unsigned int srcrgb = 0; 629 unsigned int srcalpha = 0; 630 unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; 631 rc_register_file file = 0; 632 unsigned int index = 0; 633 int source; 634 635 if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) { 636 srcrgb = 1; 637 file = alpha->RGB.Src[oldsrc].File; 638 index = alpha->RGB.Src[oldsrc].Index; 639 } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) { 640 srcalpha = 1; 641 file = alpha->Alpha.Src[oldsrc].File; 642 index = alpha->Alpha.Src[oldsrc].Index; 643 } 644 645 source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); 646 if (source < 0) 647 return 0; 648 649 rgb->Alpha.Arg[arg].Source = source; 650 rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; 651 rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; 652 rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; 653 } 654 655 /* Copy alpha opcode into rgb */ 656 rgb->Alpha.Opcode = alpha->Alpha.Opcode; 657 rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; 658 rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; 659 rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; 660 rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; 661 rgb->Alpha.Saturate = alpha->Alpha.Saturate; 662 rgb->Alpha.Omod = alpha->Alpha.Omod; 663 664 /* Merge ALU result writing */ 665 if (alpha->WriteALUResult) { 666 if (rgb->WriteALUResult) 667 return 0; 668 669 rgb->WriteALUResult = alpha->WriteALUResult; 670 rgb->ALUResultCompare = alpha->ALUResultCompare; 671 } 672 673 /* Copy SemWait */ 674 rgb->SemWait |= alpha->SemWait; 675 676 return 1; 677 } 678 679 /** 680 * Try to merge the given instructions into the rgb instructions. 681 * 682 * Return true on success; on failure, return false, and keep 683 * the instructions untouched. 684 */ 685 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) 686 { 687 struct rc_pair_instruction backup; 688 689 /*Instructions can't write output registers and ALU result at the 690 * same time. */ 691 if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask) 692 || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) { 693 return 0; 694 } 695 696 /* Writing output registers in the middle of shaders is slow, so 697 * we don't want to pair output writes with temp writes. */ 698 if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask) 699 || (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) { 700 return 0; 701 } 702 703 memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); 704 705 if (destructive_merge_instructions(rgb, alpha)) 706 return 1; 707 708 memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); 709 return 0; 710 } 711 712 static void presub_nop(struct rc_instruction * emitted) { 713 int prev_rgb_index, prev_alpha_index, i, num_src; 714 715 /* We don't need a nop if the previous instruction is a TEX. */ 716 if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) { 717 return; 718 } 719 if (emitted->Prev->U.P.RGB.WriteMask) 720 prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex; 721 else 722 prev_rgb_index = -1; 723 if (emitted->Prev->U.P.Alpha.WriteMask) 724 prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex; 725 else 726 prev_alpha_index = 1; 727 728 /* Check the previous rgb instruction */ 729 if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) { 730 num_src = rc_presubtract_src_reg_count( 731 emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index); 732 for (i = 0; i < num_src; i++) { 733 unsigned int index = emitted->U.P.RGB.Src[i].Index; 734 if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY 735 && (index == prev_rgb_index 736 || index == prev_alpha_index)) { 737 emitted->Prev->U.P.Nop = 1; 738 return; 739 } 740 } 741 } 742 743 /* Check the previous alpha instruction. */ 744 if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) 745 return; 746 747 num_src = rc_presubtract_src_reg_count( 748 emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index); 749 for (i = 0; i < num_src; i++) { 750 unsigned int index = emitted->U.P.Alpha.Src[i].Index; 751 if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY 752 && (index == prev_rgb_index || index == prev_alpha_index)) { 753 emitted->Prev->U.P.Nop = 1; 754 return; 755 } 756 } 757 } 758 759 static void rgb_to_alpha_remap ( 760 struct rc_instruction * inst, 761 struct rc_pair_instruction_arg * arg, 762 rc_register_file old_file, 763 rc_swizzle old_swz, 764 unsigned int new_index) 765 { 766 int new_src_index; 767 unsigned int i; 768 769 for (i = 0; i < 3; i++) { 770 if (get_swz(arg->Swizzle, i) == old_swz) { 771 SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); 772 } 773 } 774 new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, 775 old_file, new_index); 776 /* This conversion is not possible, we must have made a mistake in 777 * is_rgb_to_alpha_possible. */ 778 if (new_src_index < 0) { 779 assert(0); 780 return; 781 } 782 783 arg->Source = new_src_index; 784 } 785 786 static int can_remap(unsigned int opcode) 787 { 788 switch(opcode) { 789 case RC_OPCODE_DDX: 790 case RC_OPCODE_DDY: 791 return 0; 792 default: 793 return 1; 794 } 795 } 796 797 static int can_convert_opcode_to_alpha(unsigned int opcode) 798 { 799 switch(opcode) { 800 case RC_OPCODE_DDX: 801 case RC_OPCODE_DDY: 802 case RC_OPCODE_DP2: 803 case RC_OPCODE_DP3: 804 case RC_OPCODE_DP4: 805 case RC_OPCODE_DPH: 806 return 0; 807 default: 808 return 1; 809 } 810 } 811 812 static void is_rgb_to_alpha_possible( 813 void * userdata, 814 struct rc_instruction * inst, 815 struct rc_pair_instruction_arg * arg, 816 struct rc_pair_instruction_source * src) 817 { 818 unsigned int read_chan = RC_SWIZZLE_UNUSED; 819 unsigned int alpha_sources = 0; 820 unsigned int i; 821 struct rc_reader_data * reader_data = userdata; 822 823 if (!can_remap(inst->U.P.RGB.Opcode) 824 || !can_remap(inst->U.P.Alpha.Opcode)) { 825 reader_data->Abort = 1; 826 return; 827 } 828 829 if (!src) 830 return; 831 832 /* XXX There are some cases where we can still do the conversion if 833 * a reader reads from a presubtract source, but for now we'll prevent 834 * it. */ 835 if (arg->Source == RC_PAIR_PRESUB_SRC) { 836 reader_data->Abort = 1; 837 return; 838 } 839 840 /* Make sure the source only reads the register component that we 841 * are going to be convering from. It is OK if the instruction uses 842 * this component more than once. 843 * XXX If the index we will be converting to is the same as the 844 * current index, then it is OK to read from more than one component. 845 */ 846 for (i = 0; i < 3; i++) { 847 rc_swizzle swz = get_swz(arg->Swizzle, i); 848 switch(swz) { 849 case RC_SWIZZLE_X: 850 case RC_SWIZZLE_Y: 851 case RC_SWIZZLE_Z: 852 case RC_SWIZZLE_W: 853 if (read_chan == RC_SWIZZLE_UNUSED) { 854 read_chan = swz; 855 } else if (read_chan != swz) { 856 reader_data->Abort = 1; 857 return; 858 } 859 break; 860 default: 861 break; 862 } 863 } 864 865 /* Make sure there are enough alpha sources. 866 * XXX If we know what register all the readers are going 867 * to be remapped to, then in some situations we can still do 868 * the subsitution, even if all 3 alpha sources are being used.*/ 869 for (i = 0; i < 3; i++) { 870 if (inst->U.P.Alpha.Src[i].Used) { 871 alpha_sources++; 872 } 873 } 874 if (alpha_sources > 2) { 875 reader_data->Abort = 1; 876 return; 877 } 878 } 879 880 static int convert_rgb_to_alpha( 881 struct schedule_state * s, 882 struct schedule_instruction * sched_inst) 883 { 884 struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; 885 unsigned int old_mask = pair_inst->RGB.WriteMask; 886 unsigned int old_swz = rc_mask_to_swizzle(old_mask); 887 const struct rc_opcode_info * info = 888 rc_get_opcode_info(pair_inst->RGB.Opcode); 889 int new_index = -1; 890 unsigned int i; 891 892 if (sched_inst->GlobalReaders.Abort) 893 return 0; 894 895 if (!pair_inst->RGB.WriteMask) 896 return 0; 897 898 if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) 899 || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { 900 return 0; 901 } 902 903 assert(sched_inst->NumWriteValues == 1); 904 905 if (!sched_inst->WriteValues[0]) { 906 assert(0); 907 return 0; 908 } 909 910 /* We start at the old index, because if we can reuse the same 911 * register and just change the swizzle then it is more likely we 912 * will be able to convert all the readers. */ 913 for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { 914 struct reg_value ** new_regvalp = get_reg_valuep( 915 s, RC_FILE_TEMPORARY, i, 3); 916 if (!*new_regvalp) { 917 struct reg_value ** old_regvalp = 918 get_reg_valuep(s, 919 RC_FILE_TEMPORARY, 920 pair_inst->RGB.DestIndex, 921 rc_mask_to_swizzle(old_mask)); 922 new_index = i; 923 *new_regvalp = *old_regvalp; 924 *old_regvalp = NULL; 925 new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); 926 break; 927 } 928 } 929 if (new_index < 0) { 930 return 0; 931 } 932 933 /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA 934 * as the RGB opcode, then the Alpha instruction will already contain 935 * the correct opcode and instruction args, so we do not want to 936 * overwrite them. 937 */ 938 if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) { 939 pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; 940 memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, 941 sizeof(pair_inst->Alpha.Arg)); 942 } 943 pair_inst->Alpha.DestIndex = new_index; 944 pair_inst->Alpha.WriteMask = RC_MASK_W; 945 pair_inst->Alpha.Target = pair_inst->RGB.Target; 946 pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; 947 pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; 948 pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; 949 pair_inst->Alpha.Omod = pair_inst->RGB.Omod; 950 /* Move the swizzles into the first chan */ 951 for (i = 0; i < info->NumSrcRegs; i++) { 952 unsigned int j; 953 for (j = 0; j < 3; j++) { 954 unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); 955 if (swz != RC_SWIZZLE_UNUSED) { 956 pair_inst->Alpha.Arg[i].Swizzle = 957 rc_init_swizzle(swz, 1); 958 break; 959 } 960 } 961 } 962 pair_inst->RGB.Opcode = RC_OPCODE_NOP; 963 pair_inst->RGB.DestIndex = 0; 964 pair_inst->RGB.WriteMask = 0; 965 pair_inst->RGB.Target = 0; 966 pair_inst->RGB.OutputWriteMask = 0; 967 pair_inst->RGB.DepthWriteMask = 0; 968 pair_inst->RGB.Saturate = 0; 969 memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); 970 971 for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { 972 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; 973 rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg, 974 RC_FILE_TEMPORARY, old_swz, new_index); 975 } 976 return 1; 977 } 978 979 static void try_convert_and_pair( 980 struct schedule_state *s, 981 struct schedule_instruction ** inst_list) 982 { 983 struct schedule_instruction * list_ptr = *inst_list; 984 while (list_ptr && *inst_list && (*inst_list)->NextReady) { 985 int paired = 0; 986 if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP 987 && list_ptr->Instruction->U.P.RGB.Opcode 988 != RC_OPCODE_REPL_ALPHA) { 989 goto next; 990 } 991 if (list_ptr->NumWriteValues == 1 992 && convert_rgb_to_alpha(s, list_ptr)) { 993 994 struct schedule_instruction * pair_ptr; 995 remove_inst_from_list(inst_list, list_ptr); 996 add_inst_to_list_score(&s->ReadyAlpha, list_ptr); 997 998 for (pair_ptr = s->ReadyRGB; pair_ptr; 999 pair_ptr = pair_ptr->NextReady) { 1000 if (merge_instructions(&pair_ptr->Instruction->U.P, 1001 &list_ptr->Instruction->U.P)) { 1002 remove_inst_from_list(&s->ReadyAlpha, list_ptr); 1003 remove_inst_from_list(&s->ReadyRGB, pair_ptr); 1004 pair_ptr->PairedInst = list_ptr; 1005 1006 add_inst_to_list(&s->ReadyFullALU, pair_ptr); 1007 list_ptr = *inst_list; 1008 paired = 1; 1009 break; 1010 } 1011 1012 } 1013 } 1014 if (!paired) { 1015 next: 1016 list_ptr = list_ptr->NextReady; 1017 } 1018 } 1019 } 1020 1021 /** 1022 * This function attempts to merge RGB and Alpha instructions together. 1023 */ 1024 static void pair_instructions(struct schedule_state * s) 1025 { 1026 struct schedule_instruction *rgb_ptr; 1027 struct schedule_instruction *alpha_ptr; 1028 1029 /* Some pairings might fail because they require too 1030 * many source slots; try all possible pairings if necessary */ 1031 rgb_ptr = s->ReadyRGB; 1032 while(rgb_ptr) { 1033 struct schedule_instruction * rgb_next = rgb_ptr->NextReady; 1034 alpha_ptr = s->ReadyAlpha; 1035 while(alpha_ptr) { 1036 struct schedule_instruction * alpha_next = alpha_ptr->NextReady; 1037 if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) { 1038 /* Remove RGB and Alpha from their ready lists. 1039 */ 1040 remove_inst_from_list(&s->ReadyRGB, rgb_ptr); 1041 remove_inst_from_list(&s->ReadyAlpha, alpha_ptr); 1042 rgb_ptr->PairedInst = alpha_ptr; 1043 add_inst_to_list(&s->ReadyFullALU, rgb_ptr); 1044 break; 1045 } 1046 alpha_ptr = alpha_next; 1047 } 1048 rgb_ptr = rgb_next; 1049 } 1050 1051 if (!s->Opt) { 1052 return; 1053 } 1054 1055 /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB 1056 * slot can be converted into Alpha instructions. */ 1057 try_convert_and_pair(s, &s->ReadyFullALU); 1058 1059 /* Try to convert some of the RGB instructions to Alpha and 1060 * try to pair it with another RGB. */ 1061 try_convert_and_pair(s, &s->ReadyRGB); 1062 } 1063 1064 static void update_max_score( 1065 struct schedule_state * s, 1066 struct schedule_instruction ** list, 1067 int * max_score, 1068 struct schedule_instruction ** max_inst_out, 1069 struct schedule_instruction *** list_out) 1070 { 1071 struct schedule_instruction * list_ptr; 1072 for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) { 1073 int score; 1074 s->CalcScore(list_ptr); 1075 score = list_ptr->Score; 1076 if (!*max_inst_out || score > *max_score) { 1077 *max_score = score; 1078 *max_inst_out = list_ptr; 1079 *list_out = list; 1080 } 1081 } 1082 } 1083 1084 static void emit_instruction( 1085 struct schedule_state * s, 1086 struct rc_instruction * before) 1087 { 1088 int max_score = -1; 1089 struct schedule_instruction * max_inst = NULL; 1090 struct schedule_instruction ** max_list = NULL; 1091 unsigned tex_count = 0; 1092 struct schedule_instruction * tex_ptr; 1093 1094 pair_instructions(s); 1095 #if VERBOSE 1096 fprintf(stderr, "Full:\n"); 1097 print_list(s->ReadyFullALU); 1098 fprintf(stderr, "RGB:\n"); 1099 print_list(s->ReadyRGB); 1100 fprintf(stderr, "Alpha:\n"); 1101 print_list(s->ReadyAlpha); 1102 fprintf(stderr, "TEX:\n"); 1103 print_list(s->ReadyTEX); 1104 #endif 1105 1106 for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) { 1107 if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) { 1108 emit_all_tex(s, before); 1109 return; 1110 } 1111 tex_count++; 1112 } 1113 update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list); 1114 update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list); 1115 update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list); 1116 1117 if (tex_count >= s->max_tex_group || max_score == -1 1118 || (s->TEXCount > 0 && tex_count == s->TEXCount) 1119 || (!s->C->is_r500 && tex_count > 0 && max_score == -1)) { 1120 emit_all_tex(s, before); 1121 } else { 1122 1123 1124 remove_inst_from_list(max_list, max_inst); 1125 rc_insert_instruction(before->Prev, max_inst->Instruction); 1126 commit_alu_instruction(s, max_inst); 1127 1128 presub_nop(before->Prev); 1129 } 1130 } 1131 1132 static void add_tex_reader( 1133 struct schedule_state * s, 1134 struct schedule_instruction * writer, 1135 struct schedule_instruction * reader) 1136 { 1137 if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) { 1138 /*Not a TEX instructions */ 1139 return; 1140 } 1141 reader->TexReadCount++; 1142 rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader)); 1143 } 1144 1145 static void scan_read(void * data, struct rc_instruction * inst, 1146 rc_register_file file, unsigned int index, unsigned int chan) 1147 { 1148 struct schedule_state * s = data; 1149 struct reg_value ** v = get_reg_valuep(s, file, index, chan); 1150 struct reg_value_reader * reader; 1151 1152 if (!v) 1153 return; 1154 1155 if (*v && (*v)->Writer == s->Current) { 1156 /* The instruction reads and writes to a register component. 1157 * In this case, we only want to increment dependencies by one. 1158 * Why? 1159 * Because each instruction depends on the writers of its source 1160 * registers _and_ the most recent writer of its destination 1161 * register. In this case, the current instruction (s->Current) 1162 * has a dependency that both writes to one of its source 1163 * registers and was the most recent writer to its destination 1164 * register. We have already marked this dependency in 1165 * scan_write(), so we don't need to do it again. 1166 */ 1167 1168 /* We need to make sure we are adding s->Current to the 1169 * previous writer's list of TexReaders, if the previous writer 1170 * was a TEX instruction. 1171 */ 1172 add_tex_reader(s, s->PrevWriter[chan], s->Current); 1173 1174 return; 1175 } 1176 1177 DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); 1178 1179 reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); 1180 reader->Reader = s->Current; 1181 if (!*v) { 1182 /* In this situation, the instruction reads from a register 1183 * that hasn't been written to or read from in the current 1184 * block. */ 1185 *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); 1186 memset(*v, 0, sizeof(struct reg_value)); 1187 (*v)->Readers = reader; 1188 } else { 1189 reader->Next = (*v)->Readers; 1190 (*v)->Readers = reader; 1191 /* Only update the current instruction's dependencies if the 1192 * register it reads from has been written to in this block. */ 1193 if ((*v)->Writer) { 1194 add_tex_reader(s, (*v)->Writer, s->Current); 1195 s->Current->NumDependencies++; 1196 } 1197 } 1198 (*v)->NumReaders++; 1199 1200 if (s->Current->NumReadValues >= 12) { 1201 rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); 1202 } else { 1203 s->Current->ReadValues[s->Current->NumReadValues++] = *v; 1204 } 1205 } 1206 1207 static void scan_write(void * data, struct rc_instruction * inst, 1208 rc_register_file file, unsigned int index, unsigned int chan) 1209 { 1210 struct schedule_state * s = data; 1211 struct reg_value ** pv = get_reg_valuep(s, file, index, chan); 1212 struct reg_value * newv; 1213 1214 if (!pv) 1215 return; 1216 1217 DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); 1218 1219 newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); 1220 memset(newv, 0, sizeof(*newv)); 1221 1222 newv->Writer = s->Current; 1223 1224 if (*pv) { 1225 (*pv)->Next = newv; 1226 s->Current->NumDependencies++; 1227 /* Keep track of the previous writer to s->Current's destination 1228 * register */ 1229 s->PrevWriter[chan] = (*pv)->Writer; 1230 } 1231 1232 *pv = newv; 1233 1234 if (s->Current->NumWriteValues >= 4) { 1235 rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); 1236 } else { 1237 s->Current->WriteValues[s->Current->NumWriteValues++] = newv; 1238 } 1239 } 1240 1241 static void is_rgb_to_alpha_possible_normal( 1242 void * userdata, 1243 struct rc_instruction * inst, 1244 struct rc_src_register * src) 1245 { 1246 struct rc_reader_data * reader_data = userdata; 1247 reader_data->Abort = 1; 1248 1249 } 1250 1251 static void schedule_block(struct schedule_state * s, 1252 struct rc_instruction * begin, struct rc_instruction * end) 1253 { 1254 unsigned int ip; 1255 1256 /* Scan instructions for data dependencies */ 1257 ip = 0; 1258 for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { 1259 s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current)); 1260 memset(s->Current, 0, sizeof(struct schedule_instruction)); 1261 1262 if (inst->Type == RC_INSTRUCTION_NORMAL) { 1263 const struct rc_opcode_info * info = 1264 rc_get_opcode_info(inst->U.I.Opcode); 1265 if (info->HasTexture) { 1266 s->TEXCount++; 1267 } 1268 } 1269 1270 /* XXX: This causes SemWait to be set for all instructions in 1271 * a block if the previous block contained a TEX instruction. 1272 * We can do better here, but it will take a lot of work. */ 1273 if (s->PrevBlockHasTex) { 1274 s->Current->TexReadCount = 1; 1275 } 1276 1277 s->Current->Instruction = inst; 1278 inst->IP = ip++; 1279 1280 DBG("%i: Scanning\n", inst->IP); 1281 1282 /* The order of things here is subtle and maybe slightly 1283 * counter-intuitive, to account for the case where an 1284 * instruction writes to the same register as it reads 1285 * from. */ 1286 rc_for_all_writes_chan(inst, &scan_write, s); 1287 rc_for_all_reads_chan(inst, &scan_read, s); 1288 1289 DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies); 1290 1291 if (!s->Current->NumDependencies) { 1292 instruction_ready(s, s->Current); 1293 } 1294 1295 /* Get global readers for possible RGB->Alpha conversion. */ 1296 s->Current->GlobalReaders.ExitOnAbort = 1; 1297 rc_get_readers(s->C, inst, &s->Current->GlobalReaders, 1298 is_rgb_to_alpha_possible_normal, 1299 is_rgb_to_alpha_possible, NULL); 1300 } 1301 1302 /* Temporarily unlink all instructions */ 1303 begin->Prev->Next = end; 1304 end->Prev = begin->Prev; 1305 1306 /* Schedule instructions back */ 1307 while(!s->C->Error && 1308 (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) { 1309 emit_instruction(s, end); 1310 } 1311 } 1312 1313 static int is_controlflow(struct rc_instruction * inst) 1314 { 1315 if (inst->Type == RC_INSTRUCTION_NORMAL) { 1316 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 1317 return opcode->IsFlowControl; 1318 } 1319 return 0; 1320 } 1321 1322 void rc_pair_schedule(struct radeon_compiler *cc, void *user) 1323 { 1324 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; 1325 struct schedule_state s; 1326 struct rc_instruction * inst = c->Base.Program.Instructions.Next; 1327 unsigned int * opt = user; 1328 1329 memset(&s, 0, sizeof(s)); 1330 s.Opt = *opt; 1331 s.C = &c->Base; 1332 if (s.C->is_r500) { 1333 s.CalcScore = calc_score_readers; 1334 } else { 1335 s.CalcScore = calc_score_r300; 1336 } 1337 s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8); 1338 while(inst != &c->Base.Program.Instructions) { 1339 struct rc_instruction * first; 1340 1341 if (is_controlflow(inst)) { 1342 inst = inst->Next; 1343 continue; 1344 } 1345 1346 first = inst; 1347 1348 while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) 1349 inst = inst->Next; 1350 1351 DBG("Schedule one block\n"); 1352 memset(s.Temporary, 0, sizeof(s.Temporary)); 1353 s.TEXCount = 0; 1354 schedule_block(&s, first, inst); 1355 if (s.PendingTEX) { 1356 s.PrevBlockHasTex = 1; 1357 } 1358 } 1359 } 1360