1 /************************************************************************** 2 * 3 * Copyright 2011 The Chromium OS authors. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 #include "i915_reg.h" 29 #include "i915_context.h" 30 #include "i915_fpc.h" 31 32 #include "pipe/p_shader_tokens.h" 33 #include "util/u_math.h" 34 #include "util/u_memory.h" 35 #include "util/u_string.h" 36 #include "tgsi/tgsi_parse.h" 37 #include "tgsi/tgsi_dump.h" 38 #include "tgsi/tgsi_exec.h" 39 40 struct i915_optimize_context 41 { 42 int first_write[TGSI_EXEC_NUM_TEMPS]; 43 int last_read[TGSI_EXEC_NUM_TEMPS]; 44 }; 45 46 static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1) 47 { 48 return (s1->Register.File == d1->Register.File && 49 s1->Register.Indirect == d1->Register.Indirect && 50 s1->Register.Dimension == d1->Register.Dimension && 51 s1->Register.Index == d1->Register.Index); 52 } 53 54 static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2) 55 { 56 return (d1->Register.File == d2->Register.File && 57 d1->Register.Indirect == d2->Register.Indirect && 58 d1->Register.Dimension == d2->Register.Dimension && 59 d1->Register.Index == d2->Register.Index); 60 } 61 62 static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2) 63 { 64 return (d1->Register.File == d2->Register.File && 65 d1->Register.Indirect == d2->Register.Indirect && 66 d1->Register.Dimension == d2->Register.Dimension && 67 d1->Register.Index == d2->Register.Index && 68 d1->Register.Absolute == d2->Register.Absolute && 69 d1->Register.Negate == d2->Register.Negate); 70 } 71 72 static const struct { 73 boolean is_texture; 74 boolean commutes; 75 unsigned neutral_element; 76 unsigned num_dst; 77 unsigned num_src; 78 } op_table [TGSI_OPCODE_LAST] = { 79 [ TGSI_OPCODE_ADD ] = { false, true, TGSI_SWIZZLE_ZERO, 1, 2 }, 80 [ TGSI_OPCODE_CEIL ] = { false, false, 0, 1, 1 }, 81 [ TGSI_OPCODE_CMP ] = { false, false, 0, 1, 2 }, 82 [ TGSI_OPCODE_COS ] = { false, false, 0, 1, 1 }, 83 [ TGSI_OPCODE_DDX ] = { false, false, 0, 1, 0 }, 84 [ TGSI_OPCODE_DDY ] = { false, false, 0, 1, 0 }, 85 [ TGSI_OPCODE_DP2 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, 86 [ TGSI_OPCODE_DP3 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, 87 [ TGSI_OPCODE_DP4 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, 88 [ TGSI_OPCODE_DST ] = { false, false, 0, 1, 2 }, 89 [ TGSI_OPCODE_END ] = { false, false, 0, 0, 0 }, 90 [ TGSI_OPCODE_EX2 ] = { false, false, 0, 1, 1 }, 91 [ TGSI_OPCODE_FLR ] = { false, false, 0, 1, 1 }, 92 [ TGSI_OPCODE_FRC ] = { false, false, 0, 1, 1 }, 93 [ TGSI_OPCODE_KILL_IF ] = { false, false, 0, 0, 1 }, 94 [ TGSI_OPCODE_KILL ] = { false, false, 0, 0, 0 }, 95 [ TGSI_OPCODE_LG2 ] = { false, false, 0, 1, 1 }, 96 [ TGSI_OPCODE_LIT ] = { false, false, 0, 1, 1 }, 97 [ TGSI_OPCODE_LRP ] = { false, false, 0, 1, 3 }, 98 [ TGSI_OPCODE_MAX ] = { false, false, 0, 1, 2 }, 99 [ TGSI_OPCODE_MAD ] = { false, false, 0, 1, 3 }, 100 [ TGSI_OPCODE_MIN ] = { false, false, 0, 1, 2 }, 101 [ TGSI_OPCODE_MOV ] = { false, false, 0, 1, 1 }, 102 [ TGSI_OPCODE_MUL ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 }, 103 [ TGSI_OPCODE_NOP ] = { false, false, 0, 0, 0 }, 104 [ TGSI_OPCODE_POW ] = { false, false, 0, 1, 2 }, 105 [ TGSI_OPCODE_RCP ] = { false, false, 0, 1, 1 }, 106 [ TGSI_OPCODE_RET ] = { false, false, 0, 0, 0 }, 107 [ TGSI_OPCODE_RSQ ] = { false, false, 0, 1, 1 }, 108 [ TGSI_OPCODE_SEQ ] = { false, false, 0, 1, 2 }, 109 [ TGSI_OPCODE_SGE ] = { false, false, 0, 1, 2 }, 110 [ TGSI_OPCODE_SGT ] = { false, false, 0, 1, 2 }, 111 [ TGSI_OPCODE_SIN ] = { false, false, 0, 1, 1 }, 112 [ TGSI_OPCODE_SLE ] = { false, false, 0, 1, 2 }, 113 [ TGSI_OPCODE_SLT ] = { false, false, 0, 1, 2 }, 114 [ TGSI_OPCODE_SNE ] = { false, false, 0, 1, 2 }, 115 [ TGSI_OPCODE_SSG ] = { false, false, 0, 1, 1 }, 116 [ TGSI_OPCODE_TEX ] = { true, false, 0, 1, 2 }, 117 [ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 }, 118 [ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 }, 119 [ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 }, 120 }; 121 122 static boolean op_has_dst(unsigned opcode) 123 { 124 return (op_table[opcode].num_dst > 0); 125 } 126 127 static int op_num_dst(unsigned opcode) 128 { 129 return op_table[opcode].num_dst; 130 } 131 132 static int op_num_src(unsigned opcode) 133 { 134 return op_table[opcode].num_src; 135 } 136 137 static boolean op_commutes(unsigned opcode) 138 { 139 return op_table[opcode].commutes; 140 } 141 142 static unsigned mask_for_unswizzled(int num_components) 143 { 144 unsigned mask = 0; 145 switch(num_components) 146 { 147 case 4: 148 mask |= TGSI_WRITEMASK_W; 149 case 3: 150 mask |= TGSI_WRITEMASK_Z; 151 case 2: 152 mask |= TGSI_WRITEMASK_Y; 153 case 1: 154 mask |= TGSI_WRITEMASK_X; 155 } 156 return mask; 157 } 158 159 static boolean is_unswizzled(struct i915_full_src_register *r, 160 unsigned write_mask) 161 { 162 if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X) 163 return FALSE; 164 if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y) 165 return FALSE; 166 if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) 167 return FALSE; 168 if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W) 169 return FALSE; 170 return TRUE; 171 } 172 173 static boolean op_is_texture(unsigned opcode) 174 { 175 return op_table[opcode].is_texture; 176 } 177 178 static unsigned op_neutral_element(unsigned opcode) 179 { 180 unsigned ne = op_table[opcode].neutral_element; 181 if (!ne) { 182 debug_printf("No neutral element for opcode %d\n",opcode); 183 ne = TGSI_SWIZZLE_ZERO; 184 } 185 return ne; 186 } 187 188 /* 189 * Sets the swizzle to the neutral element for the operation for the bits 190 * of writemask which are set, swizzle to identity otherwise. 191 */ 192 static void set_neutral_element_swizzle(struct i915_full_src_register *r, 193 unsigned write_mask, 194 unsigned neutral) 195 { 196 if ( write_mask & TGSI_WRITEMASK_X ) 197 r->Register.SwizzleX = neutral; 198 else 199 r->Register.SwizzleX = TGSI_SWIZZLE_X; 200 201 if ( write_mask & TGSI_WRITEMASK_Y ) 202 r->Register.SwizzleY = neutral; 203 else 204 r->Register.SwizzleY = TGSI_SWIZZLE_Y; 205 206 if ( write_mask & TGSI_WRITEMASK_Z ) 207 r->Register.SwizzleZ = neutral; 208 else 209 r->Register.SwizzleZ = TGSI_SWIZZLE_Z; 210 211 if ( write_mask & TGSI_WRITEMASK_W ) 212 r->Register.SwizzleW = neutral; 213 else 214 r->Register.SwizzleW = TGSI_SWIZZLE_W; 215 } 216 217 static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i) 218 { 219 o->File = i->File; 220 o->Indirect = i->Indirect; 221 o->Dimension = i->Dimension; 222 o->Index = i->Index; 223 o->SwizzleX = i->SwizzleX; 224 o->SwizzleY = i->SwizzleY; 225 o->SwizzleZ = i->SwizzleZ; 226 o->SwizzleW = i->SwizzleW; 227 o->Absolute = i->Absolute; 228 o->Negate = i->Negate; 229 } 230 231 static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i) 232 { 233 o->File = i->File; 234 o->WriteMask = i->WriteMask; 235 o->Indirect = i->Indirect; 236 o->Dimension = i->Dimension; 237 o->Index = i->Index; 238 } 239 240 static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i) 241 { 242 memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); 243 memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); 244 245 copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); 246 247 copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); 248 copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); 249 copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); 250 } 251 252 static void copy_token(union i915_full_token *o, union tgsi_full_token *i) 253 { 254 if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) 255 memcpy(o, i, sizeof(*o)); 256 else 257 copy_instruction(&o->FullInstruction, &i->FullInstruction); 258 259 } 260 261 static void liveness_mark_written(struct i915_optimize_context *ctx, 262 struct i915_full_dst_register *dst_reg, 263 int pos) 264 { 265 int dst_reg_index; 266 if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) { 267 dst_reg_index = dst_reg->Register.Index; 268 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS); 269 /* dead -> live transition */ 270 if (ctx->first_write[dst_reg_index] != -1) 271 ctx->first_write[dst_reg_index] = pos; 272 } 273 } 274 275 static void liveness_mark_read(struct i915_optimize_context *ctx, 276 struct i915_full_src_register *src_reg, 277 int pos) 278 { 279 int src_reg_index; 280 if (src_reg->Register.File == TGSI_FILE_TEMPORARY) { 281 src_reg_index = src_reg->Register.Index; 282 assert(src_reg_index < TGSI_EXEC_NUM_TEMPS); 283 /* live -> dead transition */ 284 if (ctx->last_read[src_reg_index] != -1) 285 ctx->last_read[src_reg_index] = pos; 286 } 287 } 288 289 static void liveness_analysis(struct i915_optimize_context *ctx, 290 struct i915_token_list *tokens) 291 { 292 struct i915_full_dst_register *dst_reg; 293 struct i915_full_src_register *src_reg; 294 union i915_full_token *current; 295 unsigned opcode; 296 int num_dst, num_src; 297 int i = 0; 298 299 for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) 300 { 301 ctx->first_write[i] = -1; 302 ctx->last_read[i] = -1; 303 } 304 305 for(i = 0; i < tokens->NumTokens; i++) 306 { 307 current = &tokens->Tokens[i]; 308 309 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) 310 continue; 311 312 opcode = current->FullInstruction.Instruction.Opcode; 313 num_dst = op_num_dst(opcode); 314 315 switch(num_dst) 316 { 317 case 1: 318 dst_reg = ¤t->FullInstruction.Dst[0]; 319 liveness_mark_written(ctx, dst_reg, i); 320 case 0: 321 break; 322 default: 323 debug_printf("Op %d has %d dst regs\n", opcode, num_dst); 324 break; 325 } 326 } 327 328 for(i = tokens->NumTokens - 1; i >= 0; i--) 329 { 330 current = &tokens->Tokens[i]; 331 332 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) 333 continue; 334 335 opcode = current->FullInstruction.Instruction.Opcode; 336 num_src = op_num_src(opcode); 337 338 switch(num_src) 339 { 340 case 3: 341 src_reg = ¤t->FullInstruction.Src[2]; 342 liveness_mark_read(ctx, src_reg, i); 343 case 2: 344 src_reg = ¤t->FullInstruction.Src[1]; 345 liveness_mark_read(ctx, src_reg, i); 346 case 1: 347 src_reg = ¤t->FullInstruction.Src[0]; 348 liveness_mark_read(ctx, src_reg, i); 349 case 0: 350 break; 351 default: 352 debug_printf("Op %d has %d src regs\n", opcode, num_src); 353 break; 354 } 355 } 356 } 357 358 static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from) 359 { 360 int dst_reg_index = dst_reg->Register.Index; 361 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS); 362 return (from >= ctx->last_read[dst_reg_index]); 363 } 364 365 /* Returns a mask with the components used for a texture access instruction */ 366 static unsigned i915_tex_mask(union i915_full_token *instr) 367 { 368 unsigned mask; 369 370 /* Get the number of coords */ 371 mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture)); 372 373 /* Add the W component if projective */ 374 if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP) 375 mask |= TGSI_WRITEMASK_W; 376 377 return mask; 378 } 379 380 static boolean target_is_texture2d(uint tex) 381 { 382 switch (tex) { 383 case TGSI_TEXTURE_2D: 384 case TGSI_TEXTURE_RECT: 385 return true; 386 default: 387 return false; 388 } 389 } 390 391 392 /* 393 * Optimize away useless indirect texture reads: 394 * MOV TEMP[0].xy, IN[0].xyyy 395 * TEX TEMP[1], TEMP[0], SAMP[0], 2D 396 * into: 397 * TEX TEMP[1], IN[0], SAMP[0], 2D 398 * 399 * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/.. 400 */ 401 static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx, 402 struct i915_token_list *tokens, 403 int index) 404 { 405 union i915_full_token *current = &tokens->Tokens[index - 1]; 406 union i915_full_token *next = &tokens->Tokens[index]; 407 408 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 409 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 410 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 411 op_is_texture(next->FullInstruction.Instruction.Opcode) && 412 target_is_texture2d(next->FullInstruction.Texture.Texture) && 413 same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && 414 is_unswizzled(¤t->FullInstruction.Src[0], i915_tex_mask(next)) && 415 unused_from(ctx, ¤t->FullInstruction.Dst[0], index)) 416 { 417 memcpy(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0], sizeof(struct i915_src_register)); 418 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 419 } 420 } 421 422 /* 423 * Optimize away things like: 424 * MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0]) 425 * MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0]) 426 * into: 427 * NOP 428 * MOV OUT[0].xyw, TEMP[1].xyww 429 */ 430 static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next) 431 { 432 struct i915_full_src_register *src_reg1, *src_reg2; 433 struct i915_full_dst_register *dst_reg1, *dst_reg2; 434 unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w; 435 436 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 437 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 438 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 439 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 440 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && 441 same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && 442 same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && 443 !same_src_dst_reg(¤t->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) ) 444 { 445 src_reg1 = ¤t->FullInstruction.Src[0]; 446 dst_reg1 = ¤t->FullInstruction.Dst[0]; 447 src_reg2 = &next->FullInstruction.Src[0]; 448 dst_reg2 = &next->FullInstruction.Dst[0]; 449 450 /* Start with swizzles from the first mov */ 451 swizzle_x = src_reg1->Register.SwizzleX; 452 swizzle_y = src_reg1->Register.SwizzleY; 453 swizzle_z = src_reg1->Register.SwizzleZ; 454 swizzle_w = src_reg1->Register.SwizzleW; 455 456 /* Pile the second mov on top */ 457 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X) 458 swizzle_x = src_reg2->Register.SwizzleX; 459 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y) 460 swizzle_y = src_reg2->Register.SwizzleY; 461 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z) 462 swizzle_z = src_reg2->Register.SwizzleZ; 463 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W) 464 swizzle_w = src_reg2->Register.SwizzleW; 465 466 dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask; 467 src_reg2->Register.SwizzleX = swizzle_x; 468 src_reg2->Register.SwizzleY = swizzle_y; 469 src_reg2->Register.SwizzleZ = swizzle_z; 470 src_reg2->Register.SwizzleW = swizzle_w; 471 472 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 473 474 return; 475 } 476 } 477 478 /* 479 * Optimize away things like: 480 * MUL OUT[0].xyz, TEMP[1], TEMP[2] 481 * MOV OUT[0].w, TEMP[2] 482 * into: 483 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] 484 * This is useful for optimizing texenv. 485 */ 486 static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next) 487 { 488 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 489 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 490 op_commutes(current->FullInstruction.Instruction.Opcode) && 491 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && 492 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 493 same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && 494 same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) && 495 !same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && 496 is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && 497 is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && 498 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) 499 { 500 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 501 502 set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0); 503 set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 504 next->FullInstruction.Dst[0].Register.WriteMask, 505 op_neutral_element(current->FullInstruction.Instruction.Opcode)); 506 507 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | 508 next->FullInstruction.Dst[0].Register.WriteMask; 509 return; 510 } 511 512 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 513 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 514 op_commutes(current->FullInstruction.Instruction.Opcode) && 515 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && 516 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 517 same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && 518 same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && 519 !same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && 520 is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && 521 is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && 522 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) 523 { 524 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 525 526 set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0); 527 set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 528 next->FullInstruction.Dst[0].Register.WriteMask, 529 op_neutral_element(current->FullInstruction.Instruction.Opcode)); 530 531 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | 532 next->FullInstruction.Dst[0].Register.WriteMask; 533 return; 534 } 535 } 536 537 /* 538 * Optimize away things like: 539 * MOV TEMP[0].xyz TEMP[0].xyzx 540 * into: 541 * NOP 542 */ 543 static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current) 544 { 545 union i915_full_token current; 546 copy_token(¤t , tgsi_current); 547 if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 548 current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 549 op_has_dst(current.FullInstruction.Instruction.Opcode) && 550 !current.FullInstruction.Instruction.Saturate && 551 current.FullInstruction.Src[0].Register.Absolute == 0 && 552 current.FullInstruction.Src[0].Register.Negate == 0 && 553 is_unswizzled(¤t.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) && 554 same_src_dst_reg(¤t.FullInstruction.Src[0], ¤t.FullInstruction.Dst[0]) ) 555 { 556 return TRUE; 557 } 558 return FALSE; 559 } 560 561 /* 562 * Optimize away things like: 563 * *** TEMP[0], TEMP[1], TEMP[2] 564 * MOV OUT[0] TEMP[0] 565 * into: 566 * *** OUT[0], TEMP[1], TEMP[2] 567 */ 568 static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx, 569 struct i915_token_list *tokens, 570 int index) 571 { 572 union i915_full_token *current = &tokens->Tokens[index - 1]; 573 union i915_full_token *next = &tokens->Tokens[index]; 574 575 // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); 576 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 577 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 578 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 579 op_has_dst(current->FullInstruction.Instruction.Opcode) && 580 !next->FullInstruction.Instruction.Saturate && 581 next->FullInstruction.Src[0].Register.Absolute == 0 && 582 next->FullInstruction.Src[0].Register.Negate == 0 && 583 unused_from(ctx, ¤t->FullInstruction.Dst[0], index) && 584 current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW && 585 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) && 586 current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask && 587 same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) ) 588 { 589 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 590 591 current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0]; 592 return; 593 } 594 } 595 596 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) 597 { 598 struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); 599 struct tgsi_parse_context parse; 600 struct i915_optimize_context *ctx; 601 int i = 0; 602 603 ctx = malloc(sizeof(*ctx)); 604 605 out_tokens->NumTokens = 0; 606 607 /* Count the tokens */ 608 tgsi_parse_init( &parse, tokens ); 609 while( !tgsi_parse_end_of_tokens( &parse ) ) { 610 tgsi_parse_token( &parse ); 611 out_tokens->NumTokens++; 612 } 613 tgsi_parse_free (&parse); 614 615 /* Allocate our tokens */ 616 out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens); 617 618 tgsi_parse_init( &parse, tokens ); 619 while( !tgsi_parse_end_of_tokens( &parse ) ) { 620 tgsi_parse_token( &parse ); 621 622 if (i915_fpc_useless_mov(&parse.FullToken)) { 623 out_tokens->NumTokens--; 624 continue; 625 } 626 627 copy_token(&out_tokens->Tokens[i] , &parse.FullToken); 628 629 i++; 630 } 631 tgsi_parse_free (&parse); 632 633 liveness_analysis(ctx, out_tokens); 634 635 i = 1; 636 while( i < out_tokens->NumTokens) { 637 i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i); 638 i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); 639 i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); 640 i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i); 641 i++; 642 } 643 644 free(ctx); 645 646 return out_tokens; 647 } 648 649 void i915_optimize_free(struct i915_token_list *tokens) 650 { 651 free(tokens->Tokens); 652 free(tokens); 653 } 654 655 656