1 /************************************************************************** 2 * 3 * Copyright 2009 Marek Olk <maraeo (at) gmail.com> 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 20 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 **************************************************************************/ 26 27 /* This file contains the vertex shader tranformations for SW TCL needed 28 * to overcome the limitations of the r300 rasterizer. 29 * 30 * Transformations: 31 * 1) If the secondary color output is present, the primary color must be 32 * present too. 33 * 2) If any back-face color output is present, there must be all 4 color 34 * outputs and missing ones must be inserted. 35 * 3) Insert a trailing texcoord output containing a copy of POS, for WPOS. 36 * 37 * I know this code is cumbersome, but I don't know of any nicer way 38 * of transforming TGSI shaders. ~ M. 39 */ 40 41 #include "r300_vs.h" 42 43 #include <stdio.h> 44 45 #include "tgsi/tgsi_transform.h" 46 #include "tgsi/tgsi_dump.h" 47 48 #include "draw/draw_context.h" 49 50 struct vs_transform_context { 51 struct tgsi_transform_context base; 52 53 boolean color_used[2]; 54 boolean bcolor_used[2]; 55 56 /* Index of the pos output, typically 0. */ 57 unsigned pos_output; 58 /* Index of the pos temp where all writes of pos are redirected to. */ 59 unsigned pos_temp; 60 /* The index of the last generic output, after which we insert a new 61 * output for WPOS. */ 62 int last_generic; 63 64 unsigned num_outputs; 65 /* Used to shift output decl. indices when inserting new ones. */ 66 unsigned decl_shift; 67 /* Used to remap writes to output decls if their indices changed. */ 68 unsigned out_remap[32]; 69 70 /* First instruction processed? */ 71 boolean first_instruction; 72 /* End instruction processed? */ 73 boolean end_instruction; 74 75 boolean temp_used[1024]; 76 }; 77 78 static void emit_temp(struct tgsi_transform_context *ctx, unsigned reg) 79 { 80 struct tgsi_full_declaration decl; 81 82 decl = tgsi_default_full_declaration(); 83 decl.Declaration.File = TGSI_FILE_TEMPORARY; 84 decl.Range.First = decl.Range.Last = reg; 85 ctx->emit_declaration(ctx, &decl); 86 } 87 88 static void emit_output(struct tgsi_transform_context *ctx, 89 unsigned name, unsigned index, unsigned interp, 90 unsigned reg) 91 { 92 struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; 93 struct tgsi_full_declaration decl; 94 95 decl = tgsi_default_full_declaration(); 96 decl.Declaration.File = TGSI_FILE_OUTPUT; 97 decl.Declaration.Interpolate = 1; 98 decl.Declaration.Semantic = TRUE; 99 decl.Semantic.Name = name; 100 decl.Semantic.Index = index; 101 decl.Range.First = decl.Range.Last = reg; 102 decl.Interp.Interpolate = interp; 103 ctx->emit_declaration(ctx, &decl); 104 ++vsctx->num_outputs; 105 } 106 107 static void insert_output_before(struct tgsi_transform_context *ctx, 108 struct tgsi_full_declaration *before, 109 unsigned name, unsigned index, unsigned interp) 110 { 111 struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; 112 unsigned i; 113 114 /* Make a place for the new output. */ 115 for (i = before->Range.First; i < Elements(vsctx->out_remap); i++) { 116 ++vsctx->out_remap[i]; 117 } 118 119 /* Insert the new output. */ 120 emit_output(ctx, name, index, interp, 121 before->Range.First + vsctx->decl_shift); 122 123 ++vsctx->decl_shift; 124 } 125 126 static void insert_output_after(struct tgsi_transform_context *ctx, 127 struct tgsi_full_declaration *after, 128 unsigned name, unsigned index, unsigned interp) 129 { 130 struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; 131 unsigned i; 132 133 /* Make a place for the new output. */ 134 for (i = after->Range.First+1; i < Elements(vsctx->out_remap); i++) { 135 ++vsctx->out_remap[i]; 136 } 137 138 /* Insert the new output. */ 139 emit_output(ctx, name, index, interp, 140 after->Range.First + 1); 141 142 ++vsctx->decl_shift; 143 } 144 145 static void transform_decl(struct tgsi_transform_context *ctx, 146 struct tgsi_full_declaration *decl) 147 { 148 struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx; 149 unsigned i; 150 151 if (decl->Declaration.File == TGSI_FILE_OUTPUT) { 152 switch (decl->Semantic.Name) { 153 case TGSI_SEMANTIC_POSITION: 154 vsctx->pos_output = decl->Range.First; 155 break; 156 157 case TGSI_SEMANTIC_COLOR: 158 assert(decl->Semantic.Index < 2); 159 160 /* We must rasterize the first color if the second one is 161 * used, otherwise the rasterizer doesn't do the color 162 * selection correctly. Declare it, but don't write to it. */ 163 if (decl->Semantic.Index == 1 && !vsctx->color_used[0]) { 164 insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0, 165 TGSI_INTERPOLATE_LINEAR); 166 vsctx->color_used[0] = TRUE; 167 } 168 break; 169 170 case TGSI_SEMANTIC_BCOLOR: 171 assert(decl->Semantic.Index < 2); 172 173 /* We must rasterize all 4 colors if back-face colors are 174 * used, otherwise the rasterizer doesn't do the color 175 * selection correctly. Declare it, but don't write to it. */ 176 if (!vsctx->color_used[0]) { 177 insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0, 178 TGSI_INTERPOLATE_LINEAR); 179 vsctx->color_used[0] = TRUE; 180 } 181 if (!vsctx->color_used[1]) { 182 insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 1, 183 TGSI_INTERPOLATE_LINEAR); 184 vsctx->color_used[1] = TRUE; 185 } 186 if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) { 187 insert_output_before(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0, 188 TGSI_INTERPOLATE_LINEAR); 189 vsctx->bcolor_used[0] = TRUE; 190 } 191 break; 192 193 case TGSI_SEMANTIC_GENERIC: 194 vsctx->last_generic = MAX2(vsctx->last_generic, decl->Semantic.Index); 195 break; 196 } 197 198 /* Since we're inserting new outputs in between, the following outputs 199 * should be moved to the right so that they don't overlap with 200 * the newly added ones. */ 201 decl->Range.First += vsctx->decl_shift; 202 decl->Range.Last += vsctx->decl_shift; 203 204 ++vsctx->num_outputs; 205 } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { 206 for (i = decl->Range.First; i <= decl->Range.Last; i++) { 207 vsctx->temp_used[i] = TRUE; 208 } 209 } 210 211 ctx->emit_declaration(ctx, decl); 212 213 /* Insert BCOLOR1 if needed. */ 214 if (decl->Declaration.File == TGSI_FILE_OUTPUT && 215 decl->Semantic.Name == TGSI_SEMANTIC_BCOLOR && 216 !vsctx->bcolor_used[1]) { 217 insert_output_after(ctx, decl, TGSI_SEMANTIC_BCOLOR, 1, 218 TGSI_INTERPOLATE_LINEAR); 219 } 220 } 221 222 static void transform_inst(struct tgsi_transform_context *ctx, 223 struct tgsi_full_instruction *inst) 224 { 225 struct vs_transform_context *vsctx = (struct vs_transform_context *) ctx; 226 struct tgsi_full_instruction new_inst; 227 unsigned i; 228 229 if (!vsctx->first_instruction) { 230 vsctx->first_instruction = TRUE; 231 232 /* Insert the generic output for WPOS. */ 233 emit_output(ctx, TGSI_SEMANTIC_GENERIC, vsctx->last_generic + 1, 234 TGSI_INTERPOLATE_PERSPECTIVE, vsctx->num_outputs); 235 236 /* Find a free temp for POSITION. */ 237 for (i = 0; i < Elements(vsctx->temp_used); i++) { 238 if (!vsctx->temp_used[i]) { 239 emit_temp(ctx, i); 240 vsctx->pos_temp = i; 241 break; 242 } 243 } 244 } 245 246 if (inst->Instruction.Opcode == TGSI_OPCODE_END) { 247 /* MOV OUT[pos_output], TEMP[pos_temp]; */ 248 new_inst = tgsi_default_full_instruction(); 249 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 250 new_inst.Instruction.NumDstRegs = 1; 251 new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT; 252 new_inst.Dst[0].Register.Index = vsctx->pos_output; 253 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 254 new_inst.Instruction.NumSrcRegs = 1; 255 new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 256 new_inst.Src[0].Register.Index = vsctx->pos_temp; 257 ctx->emit_instruction(ctx, &new_inst); 258 259 /* MOV OUT[n-1], TEMP[pos_temp]; */ 260 new_inst = tgsi_default_full_instruction(); 261 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 262 new_inst.Instruction.NumDstRegs = 1; 263 new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT; 264 new_inst.Dst[0].Register.Index = vsctx->num_outputs - 1; 265 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 266 new_inst.Instruction.NumSrcRegs = 1; 267 new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 268 new_inst.Src[0].Register.Index = vsctx->pos_temp; 269 ctx->emit_instruction(ctx, &new_inst); 270 271 vsctx->end_instruction = TRUE; 272 } else { 273 /* Not an END instruction. */ 274 /* Fix writes to outputs. */ 275 for (i = 0; i < inst->Instruction.NumDstRegs; i++) { 276 struct tgsi_full_dst_register *dst = &inst->Dst[i]; 277 if (dst->Register.File == TGSI_FILE_OUTPUT) { 278 if (dst->Register.Index == vsctx->pos_output) { 279 /* Replace writes to OUT[pos_output] with TEMP[pos_temp]. */ 280 dst->Register.File = TGSI_FILE_TEMPORARY; 281 dst->Register.Index = vsctx->pos_temp; 282 } else { 283 /* Not a position, good... 284 * Since we were changing the indices of output decls, 285 * we must redirect writes into them too. */ 286 dst->Register.Index = vsctx->out_remap[dst->Register.Index]; 287 } 288 } 289 } 290 291 /* Inserting 2 instructions before the END opcode moves all following 292 * labels by 2. Subroutines are always after the END opcode so 293 * they're always moved. */ 294 if (inst->Instruction.Opcode == TGSI_OPCODE_CAL) { 295 inst->Label.Label += 2; 296 } 297 /* The labels of the following opcodes are moved only after 298 * the END opcode. */ 299 if (vsctx->end_instruction && 300 (inst->Instruction.Opcode == TGSI_OPCODE_IF || 301 inst->Instruction.Opcode == TGSI_OPCODE_ELSE || 302 inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP || 303 inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP)) { 304 inst->Label.Label += 2; 305 } 306 } 307 308 ctx->emit_instruction(ctx, inst); 309 } 310 311 void r300_draw_init_vertex_shader(struct r300_context *r300, 312 struct r300_vertex_shader *vs) 313 { 314 struct draw_context *draw = r300->draw; 315 struct pipe_shader_state new_vs; 316 struct tgsi_shader_info info; 317 struct vs_transform_context transform; 318 const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100 /* XXX */; 319 unsigned i; 320 321 tgsi_scan_shader(vs->state.tokens, &info); 322 323 new_vs.tokens = tgsi_alloc_tokens(newLen); 324 if (new_vs.tokens == NULL) 325 return; 326 327 memset(&transform, 0, sizeof(transform)); 328 for (i = 0; i < Elements(transform.out_remap); i++) { 329 transform.out_remap[i] = i; 330 } 331 transform.last_generic = -1; 332 transform.base.transform_instruction = transform_inst; 333 transform.base.transform_declaration = transform_decl; 334 335 for (i = 0; i < info.num_outputs; i++) { 336 unsigned index = info.output_semantic_index[i]; 337 338 switch (info.output_semantic_name[i]) { 339 case TGSI_SEMANTIC_COLOR: 340 assert(index < 2); 341 transform.color_used[index] = TRUE; 342 break; 343 344 case TGSI_SEMANTIC_BCOLOR: 345 assert(index < 2); 346 transform.bcolor_used[index] = TRUE; 347 break; 348 } 349 } 350 351 tgsi_transform_shader(vs->state.tokens, 352 (struct tgsi_token*)new_vs.tokens, 353 newLen, &transform.base); 354 355 #if 0 356 printf("----------------------------------------------\norig shader:\n"); 357 tgsi_dump(vs->state.tokens, 0); 358 printf("----------------------------------------------\nnew shader:\n"); 359 tgsi_dump(new_vs.tokens, 0); 360 printf("----------------------------------------------\n"); 361 #endif 362 363 /* Free old tokens. */ 364 FREE((void*)vs->state.tokens); 365 366 vs->draw_vs = draw_create_vertex_shader(draw, &new_vs); 367 368 /* Instead of duplicating and freeing the tokens, copy the pointer directly. */ 369 vs->state.tokens = new_vs.tokens; 370 371 /* Init the VS output table for the rasterizer. */ 372 r300_init_vs_outputs(r300, vs); 373 374 /* Make the last generic be WPOS. */ 375 vs->outputs.wpos = vs->outputs.generic[transform.last_generic + 1]; 376 vs->outputs.generic[transform.last_generic + 1] = ATTR_UNUSED; 377 } 378