1 /************************************************************************** 2 * 3 * Copyright 2011 The Chromium OS authors. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 #include "i915_reg.h" 29 #include "i915_context.h" 30 #include "i915_fpc.h" 31 32 #include "pipe/p_shader_tokens.h" 33 #include "util/u_math.h" 34 #include "util/u_memory.h" 35 #include "util/u_string.h" 36 #include "tgsi/tgsi_parse.h" 37 #include "tgsi/tgsi_dump.h" 38 #include "tgsi/tgsi_exec.h" 39 40 struct i915_optimize_context 41 { 42 int first_write[TGSI_EXEC_NUM_TEMPS]; 43 int last_read[TGSI_EXEC_NUM_TEMPS]; 44 }; 45 46 static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1) 47 { 48 return (s1->Register.File == d1->Register.File && 49 s1->Register.Indirect == d1->Register.Indirect && 50 s1->Register.Dimension == d1->Register.Dimension && 51 s1->Register.Index == d1->Register.Index); 52 } 53 54 static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2) 55 { 56 return (d1->Register.File == d2->Register.File && 57 d1->Register.Indirect == d2->Register.Indirect && 58 d1->Register.Dimension == d2->Register.Dimension && 59 d1->Register.Index == d2->Register.Index); 60 } 61 62 static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2) 63 { 64 return (d1->Register.File == d2->Register.File && 65 d1->Register.Indirect == d2->Register.Indirect && 66 d1->Register.Dimension == d2->Register.Dimension && 67 d1->Register.Index == d2->Register.Index && 68 d1->Register.Absolute == d2->Register.Absolute && 69 d1->Register.Negate == d2->Register.Negate); 70 } 71 72 static const struct { 73 boolean is_texture; 74 boolean commutes; 75 unsigned neutral_element; 76 unsigned num_dst; 77 unsigned num_src; 78 } op_table [TGSI_OPCODE_LAST] = { 79 [ TGSI_OPCODE_ADD ] = { false, true, TGSI_SWIZZLE_ZERO, 1, 2 }, 80 [ TGSI_OPCODE_CEIL ] = { false, false, 0, 1, 1 }, 81 [ TGSI_OPCODE_CMP ] = { false, false, 0, 1, 2 }, 82 [ TGSI_OPCODE_COS ] = { false, false, 0, 1, 1 }, 83 [ TGSI_OPCODE_DDX ] = { false, false, 0, 1, 0 }, 84 [ TGSI_OPCODE_DDY ] = { false, false, 0, 1, 0 }, 85 [ TGSI_OPCODE_DP2 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, 86 [ TGSI_OPCODE_DP3 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, 87 [ TGSI_OPCODE_DP4 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, 88 [ TGSI_OPCODE_DPH ] = { false, false, 0, 1, 2 }, 89 [ TGSI_OPCODE_DST ] = { false, false, 0, 1, 2 }, 90 [ TGSI_OPCODE_END ] = { false, false, 0, 0, 0 }, 91 [ TGSI_OPCODE_EX2 ] = { false, false, 0, 1, 1 }, 92 [ TGSI_OPCODE_FLR ] = { false, false, 0, 1, 1 }, 93 [ TGSI_OPCODE_FRC ] = { false, false, 0, 1, 1 }, 94 [ TGSI_OPCODE_KILL_IF ] = { false, false, 0, 0, 1 }, 95 [ TGSI_OPCODE_KILL ] = { false, false, 0, 0, 0 }, 96 [ TGSI_OPCODE_LG2 ] = { false, false, 0, 1, 1 }, 97 [ TGSI_OPCODE_LIT ] = { false, false, 0, 1, 1 }, 98 [ TGSI_OPCODE_LRP ] = { false, false, 0, 1, 3 }, 99 [ TGSI_OPCODE_MAX ] = { false, false, 0, 1, 2 }, 100 [ TGSI_OPCODE_MAD ] = { false, false, 0, 1, 3 }, 101 [ TGSI_OPCODE_MIN ] = { false, false, 0, 1, 2 }, 102 [ TGSI_OPCODE_MOV ] = { false, false, 0, 1, 1 }, 103 [ TGSI_OPCODE_MUL ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, 104 [ TGSI_OPCODE_NOP ] = { false, false, 0, 0, 0 }, 105 [ TGSI_OPCODE_POW ] = { false, false, 0, 1, 2 }, 106 [ TGSI_OPCODE_RCP ] = { false, false, 0, 1, 1 }, 107 [ TGSI_OPCODE_RET ] = { false, false, 0, 0, 0 }, 108 [ TGSI_OPCODE_RSQ ] = { false, false, 0, 1, 1 }, 109 [ TGSI_OPCODE_SCS ] = { false, false, 0, 1, 1 }, 110 [ TGSI_OPCODE_SEQ ] = { false, false, 0, 1, 2 }, 111 [ TGSI_OPCODE_SGE ] = { false, false, 0, 1, 2 }, 112 [ TGSI_OPCODE_SGT ] = { false, false, 0, 1, 2 }, 113 [ TGSI_OPCODE_SIN ] = { false, false, 0, 1, 1 }, 114 [ TGSI_OPCODE_SLE ] = { false, false, 0, 1, 2 }, 115 [ TGSI_OPCODE_SLT ] = { false, false, 0, 1, 2 }, 116 [ TGSI_OPCODE_SNE ] = { false, false, 0, 1, 2 }, 117 [ TGSI_OPCODE_SSG ] = { false, false, 0, 1, 1 }, 118 [ TGSI_OPCODE_TEX ] = { true, false, 0, 1, 2 }, 119 [ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 }, 120 [ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 }, 121 [ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 }, 122 [ TGSI_OPCODE_XPD ] = { false, false, 0, 1, 2 }, 123 }; 124 125 static boolean op_has_dst(unsigned opcode) 126 { 127 return (op_table[opcode].num_dst > 0); 128 } 129 130 static int op_num_dst(unsigned opcode) 131 { 132 return op_table[opcode].num_dst; 133 } 134 135 static int op_num_src(unsigned opcode) 136 { 137 return op_table[opcode].num_src; 138 } 139 140 static boolean op_commutes(unsigned opcode) 141 { 142 return op_table[opcode].commutes; 143 } 144 145 static unsigned mask_for_unswizzled(int num_components) 146 { 147 unsigned mask = 0; 148 switch(num_components) 149 { 150 case 4: 151 mask |= TGSI_WRITEMASK_W; 152 case 3: 153 mask |= TGSI_WRITEMASK_Z; 154 case 2: 155 mask |= TGSI_WRITEMASK_Y; 156 case 1: 157 mask |= TGSI_WRITEMASK_X; 158 } 159 return mask; 160 } 161 162 static boolean is_unswizzled(struct i915_full_src_register *r, 163 unsigned write_mask) 164 { 165 if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X) 166 return FALSE; 167 if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y) 168 return FALSE; 169 if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) 170 return FALSE; 171 if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W) 172 return FALSE; 173 return TRUE; 174 } 175 176 static boolean op_is_texture(unsigned opcode) 177 { 178 return op_table[opcode].is_texture; 179 } 180 181 static unsigned op_neutral_element(unsigned opcode) 182 { 183 unsigned ne = op_table[opcode].neutral_element; 184 if (!ne) { 185 debug_printf("No neutral element for opcode %d\n",opcode); 186 ne = TGSI_SWIZZLE_ZERO; 187 } 188 return ne; 189 } 190 191 /* 192 * Sets the swizzle to the neutral element for the operation for the bits 193 * of writemask which are set, swizzle to identity otherwise. 194 */ 195 static void set_neutral_element_swizzle(struct i915_full_src_register *r, 196 unsigned write_mask, 197 unsigned neutral) 198 { 199 if ( write_mask & TGSI_WRITEMASK_X ) 200 r->Register.SwizzleX = neutral; 201 else 202 r->Register.SwizzleX = TGSI_SWIZZLE_X; 203 204 if ( write_mask & TGSI_WRITEMASK_Y ) 205 r->Register.SwizzleY = neutral; 206 else 207 r->Register.SwizzleY = TGSI_SWIZZLE_Y; 208 209 if ( write_mask & TGSI_WRITEMASK_Z ) 210 r->Register.SwizzleZ = neutral; 211 else 212 r->Register.SwizzleZ = TGSI_SWIZZLE_Z; 213 214 if ( write_mask & TGSI_WRITEMASK_W ) 215 r->Register.SwizzleW = neutral; 216 else 217 r->Register.SwizzleW = TGSI_SWIZZLE_W; 218 } 219 220 static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i) 221 { 222 o->File = i->File; 223 o->Indirect = i->Indirect; 224 o->Dimension = i->Dimension; 225 o->Index = i->Index; 226 o->SwizzleX = i->SwizzleX; 227 o->SwizzleY = i->SwizzleY; 228 o->SwizzleZ = i->SwizzleZ; 229 o->SwizzleW = i->SwizzleW; 230 o->Absolute = i->Absolute; 231 o->Negate = i->Negate; 232 } 233 234 static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i) 235 { 236 o->File = i->File; 237 o->WriteMask = i->WriteMask; 238 o->Indirect = i->Indirect; 239 o->Dimension = i->Dimension; 240 o->Index = i->Index; 241 } 242 243 static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i) 244 { 245 memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); 246 memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); 247 248 copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); 249 250 copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); 251 copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); 252 copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); 253 } 254 255 static void copy_token(union i915_full_token *o, union tgsi_full_token *i) 256 { 257 if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) 258 memcpy(o, i, sizeof(*o)); 259 else 260 copy_instruction(&o->FullInstruction, &i->FullInstruction); 261 262 } 263 264 static void liveness_mark_written(struct i915_optimize_context *ctx, 265 struct i915_full_dst_register *dst_reg, 266 int pos) 267 { 268 int dst_reg_index; 269 if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) { 270 dst_reg_index = dst_reg->Register.Index; 271 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS); 272 /* dead -> live transition */ 273 if (ctx->first_write[dst_reg_index] != -1) 274 ctx->first_write[dst_reg_index] = pos; 275 } 276 } 277 278 static void liveness_mark_read(struct i915_optimize_context *ctx, 279 struct i915_full_src_register *src_reg, 280 int pos) 281 { 282 int src_reg_index; 283 if (src_reg->Register.File == TGSI_FILE_TEMPORARY) { 284 src_reg_index = src_reg->Register.Index; 285 assert(src_reg_index < TGSI_EXEC_NUM_TEMPS); 286 /* live -> dead transition */ 287 if (ctx->last_read[src_reg_index] != -1) 288 ctx->last_read[src_reg_index] = pos; 289 } 290 } 291 292 static void liveness_analysis(struct i915_optimize_context *ctx, 293 struct i915_token_list *tokens) 294 { 295 struct i915_full_dst_register *dst_reg; 296 struct i915_full_src_register *src_reg; 297 union i915_full_token *current; 298 unsigned opcode; 299 int num_dst, num_src; 300 int i = 0; 301 302 for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) 303 { 304 ctx->first_write[i] = -1; 305 ctx->last_read[i] = -1; 306 } 307 308 for(i = 0; i < tokens->NumTokens; i++) 309 { 310 current = &tokens->Tokens[i]; 311 312 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) 313 continue; 314 315 opcode = current->FullInstruction.Instruction.Opcode; 316 num_dst = op_num_dst(opcode); 317 318 switch(num_dst) 319 { 320 case 1: 321 dst_reg = ¤t->FullInstruction.Dst[0]; 322 liveness_mark_written(ctx, dst_reg, i); 323 case 0: 324 break; 325 default: 326 debug_printf("Op %d has %d dst regs\n", opcode, num_dst); 327 break; 328 } 329 } 330 331 for(i = tokens->NumTokens - 1; i >= 0; i--) 332 { 333 current = &tokens->Tokens[i]; 334 335 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) 336 continue; 337 338 opcode = current->FullInstruction.Instruction.Opcode; 339 num_src = op_num_src(opcode); 340 341 switch(num_src) 342 { 343 case 3: 344 src_reg = ¤t->FullInstruction.Src[2]; 345 liveness_mark_read(ctx, src_reg, i); 346 case 2: 347 src_reg = ¤t->FullInstruction.Src[1]; 348 liveness_mark_read(ctx, src_reg, i); 349 case 1: 350 src_reg = ¤t->FullInstruction.Src[0]; 351 liveness_mark_read(ctx, src_reg, i); 352 case 0: 353 break; 354 default: 355 debug_printf("Op %d has %d src regs\n", opcode, num_src); 356 break; 357 } 358 } 359 } 360 361 static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from) 362 { 363 int dst_reg_index = dst_reg->Register.Index; 364 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS); 365 return (from >= ctx->last_read[dst_reg_index]); 366 } 367 368 /* Returns a mask with the components used for a texture access instruction */ 369 static unsigned i915_tex_mask(union i915_full_token *instr) 370 { 371 unsigned mask; 372 373 /* Get the number of coords */ 374 mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture)); 375 376 /* Add the W component if projective */ 377 if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP) 378 mask |= TGSI_WRITEMASK_W; 379 380 return mask; 381 } 382 383 static boolean target_is_texture2d(uint tex) 384 { 385 switch (tex) { 386 case TGSI_TEXTURE_2D: 387 case TGSI_TEXTURE_RECT: 388 return true; 389 default: 390 return false; 391 } 392 } 393 394 395 /* 396 * Optimize away useless indirect texture reads: 397 * MOV TEMP[0].xy, IN[0].xyyy 398 * TEX TEMP[1], TEMP[0], SAMP[0], 2D 399 * into: 400 * TEX TEMP[1], IN[0], SAMP[0], 2D 401 * 402 * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/.. 403 */ 404 static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx, 405 struct i915_token_list *tokens, 406 int index) 407 { 408 union i915_full_token *current = &tokens->Tokens[index - 1]; 409 union i915_full_token *next = &tokens->Tokens[index]; 410 411 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 412 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 413 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 414 op_is_texture(next->FullInstruction.Instruction.Opcode) && 415 target_is_texture2d(next->FullInstruction.Texture.Texture) && 416 same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && 417 is_unswizzled(¤t->FullInstruction.Src[0], i915_tex_mask(next)) && 418 unused_from(ctx, ¤t->FullInstruction.Dst[0], index)) 419 { 420 memcpy(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0], sizeof(struct i915_src_register)); 421 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 422 } 423 } 424 425 /* 426 * Optimize away things like: 427 * MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0]) 428 * MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0]) 429 * into: 430 * NOP 431 * MOV OUT[0].xyw, TEMP[1].xyww 432 */ 433 static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next) 434 { 435 struct i915_full_src_register *src_reg1, *src_reg2; 436 struct i915_full_dst_register *dst_reg1, *dst_reg2; 437 unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w; 438 439 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 440 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 441 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 442 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 443 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && 444 same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && 445 same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && 446 !same_src_dst_reg(¤t->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) ) 447 { 448 src_reg1 = ¤t->FullInstruction.Src[0]; 449 dst_reg1 = ¤t->FullInstruction.Dst[0]; 450 src_reg2 = &next->FullInstruction.Src[0]; 451 dst_reg2 = &next->FullInstruction.Dst[0]; 452 453 /* Start with swizzles from the first mov */ 454 swizzle_x = src_reg1->Register.SwizzleX; 455 swizzle_y = src_reg1->Register.SwizzleY; 456 swizzle_z = src_reg1->Register.SwizzleZ; 457 swizzle_w = src_reg1->Register.SwizzleW; 458 459 /* Pile the second mov on top */ 460 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X) 461 swizzle_x = src_reg2->Register.SwizzleX; 462 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y) 463 swizzle_y = src_reg2->Register.SwizzleY; 464 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z) 465 swizzle_z = src_reg2->Register.SwizzleZ; 466 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W) 467 swizzle_w = src_reg2->Register.SwizzleW; 468 469 dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask; 470 src_reg2->Register.SwizzleX = swizzle_x; 471 src_reg2->Register.SwizzleY = swizzle_y; 472 src_reg2->Register.SwizzleZ = swizzle_z; 473 src_reg2->Register.SwizzleW = swizzle_w; 474 475 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 476 477 return; 478 } 479 } 480 481 /* 482 * Optimize away things like: 483 * MUL OUT[0].xyz, TEMP[1], TEMP[2] 484 * MOV OUT[0].w, TEMP[2] 485 * into: 486 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] 487 * This is useful for optimizing texenv. 488 */ 489 static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next) 490 { 491 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 492 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 493 op_commutes(current->FullInstruction.Instruction.Opcode) && 494 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && 495 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 496 same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && 497 same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) && 498 !same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && 499 is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && 500 is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && 501 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) 502 { 503 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 504 505 set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0); 506 set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 507 next->FullInstruction.Dst[0].Register.WriteMask, 508 op_neutral_element(current->FullInstruction.Instruction.Opcode)); 509 510 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | 511 next->FullInstruction.Dst[0].Register.WriteMask; 512 return; 513 } 514 515 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 516 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 517 op_commutes(current->FullInstruction.Instruction.Opcode) && 518 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && 519 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 520 same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && 521 same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && 522 !same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && 523 is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && 524 is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && 525 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) 526 { 527 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 528 529 set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0); 530 set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 531 next->FullInstruction.Dst[0].Register.WriteMask, 532 op_neutral_element(current->FullInstruction.Instruction.Opcode)); 533 534 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | 535 next->FullInstruction.Dst[0].Register.WriteMask; 536 return; 537 } 538 } 539 540 /* 541 * Optimize away things like: 542 * MOV TEMP[0].xyz TEMP[0].xyzx 543 * into: 544 * NOP 545 */ 546 static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current) 547 { 548 union i915_full_token current; 549 copy_token(¤t , tgsi_current); 550 if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 551 current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 552 op_has_dst(current.FullInstruction.Instruction.Opcode) && 553 !current.FullInstruction.Instruction.Saturate && 554 current.FullInstruction.Src[0].Register.Absolute == 0 && 555 current.FullInstruction.Src[0].Register.Negate == 0 && 556 is_unswizzled(¤t.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) && 557 same_src_dst_reg(¤t.FullInstruction.Src[0], ¤t.FullInstruction.Dst[0]) ) 558 { 559 return TRUE; 560 } 561 return FALSE; 562 } 563 564 /* 565 * Optimize away things like: 566 * *** TEMP[0], TEMP[1], TEMP[2] 567 * MOV OUT[0] TEMP[0] 568 * into: 569 * *** OUT[0], TEMP[1], TEMP[2] 570 */ 571 static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx, 572 struct i915_token_list *tokens, 573 int index) 574 { 575 union i915_full_token *current = &tokens->Tokens[index - 1]; 576 union i915_full_token *next = &tokens->Tokens[index]; 577 578 // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); 579 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 580 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 581 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 582 op_has_dst(current->FullInstruction.Instruction.Opcode) && 583 !next->FullInstruction.Instruction.Saturate && 584 next->FullInstruction.Src[0].Register.Absolute == 0 && 585 next->FullInstruction.Src[0].Register.Negate == 0 && 586 unused_from(ctx, ¤t->FullInstruction.Dst[0], index) && 587 current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW && 588 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) && 589 current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask && 590 same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) ) 591 { 592 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 593 594 current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0]; 595 return; 596 } 597 } 598 599 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) 600 { 601 struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); 602 struct tgsi_parse_context parse; 603 struct i915_optimize_context *ctx; 604 int i = 0; 605 606 ctx = malloc(sizeof(*ctx)); 607 608 out_tokens->NumTokens = 0; 609 610 /* Count the tokens */ 611 tgsi_parse_init( &parse, tokens ); 612 while( !tgsi_parse_end_of_tokens( &parse ) ) { 613 tgsi_parse_token( &parse ); 614 out_tokens->NumTokens++; 615 } 616 tgsi_parse_free (&parse); 617 618 /* Allocate our tokens */ 619 out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens); 620 621 tgsi_parse_init( &parse, tokens ); 622 while( !tgsi_parse_end_of_tokens( &parse ) ) { 623 tgsi_parse_token( &parse ); 624 625 if (i915_fpc_useless_mov(&parse.FullToken)) { 626 out_tokens->NumTokens--; 627 continue; 628 } 629 630 copy_token(&out_tokens->Tokens[i] , &parse.FullToken); 631 632 i++; 633 } 634 tgsi_parse_free (&parse); 635 636 liveness_analysis(ctx, out_tokens); 637 638 i = 1; 639 while( i < out_tokens->NumTokens) { 640 i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i); 641 i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); 642 i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); 643 i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i); 644 i++; 645 } 646 647 free(ctx); 648 649 return out_tokens; 650 } 651 652 void i915_optimize_free(struct i915_token_list *tokens) 653 { 654 free(tokens->Tokens); 655 free(tokens); 656 } 657 658 659