1 /************************************************************************** 2 * 3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * AA point stage: AA points are converted to quads and rendered with a 30 * special fragment shader. Another approach would be to use a texture 31 * map image of a point, but experiments indicate the quality isn't nearly 32 * as good as this approach. 33 * 34 * Note: this looks a lot like draw_aaline.c but there's actually little 35 * if any code that can be shared. 36 * 37 * Authors: Brian Paul 38 */ 39 40 41 #include "pipe/p_context.h" 42 #include "pipe/p_defines.h" 43 #include "pipe/p_shader_tokens.h" 44 45 #include "tgsi/tgsi_transform.h" 46 #include "tgsi/tgsi_dump.h" 47 48 #include "util/u_math.h" 49 #include "util/u_memory.h" 50 51 #include "draw_context.h" 52 #include "draw_vs.h" 53 #include "draw_pipe.h" 54 55 56 /** Approx number of new tokens for instructions in aa_transform_inst() */ 57 #define NUM_NEW_TOKENS 200 58 59 60 /* 61 * Enabling NORMALIZE might give _slightly_ better results. 62 * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or 63 * d=x*x+y*y. Since we're working with a unit circle, the later seems 64 * close enough and saves some costly instructions. 65 */ 66 #define NORMALIZE 0 67 68 69 /** 70 * Subclass of pipe_shader_state to carry extra fragment shader info. 71 */ 72 struct aapoint_fragment_shader 73 { 74 struct pipe_shader_state state; 75 void *driver_fs; /**< the regular shader */ 76 void *aapoint_fs; /**< the aa point-augmented shader */ 77 int generic_attrib; /**< The generic input attrib/texcoord we'll use */ 78 }; 79 80 81 /** 82 * Subclass of draw_stage 83 */ 84 struct aapoint_stage 85 { 86 struct draw_stage stage; 87 88 /** half of pipe_rasterizer_state::point_size */ 89 float radius; 90 91 /** vertex attrib slot containing point size */ 92 int psize_slot; 93 94 /** this is the vertex attrib slot for the new texcoords */ 95 uint tex_slot; 96 97 /** vertex attrib slot containing position */ 98 uint pos_slot; 99 100 /** Currently bound fragment shader */ 101 struct aapoint_fragment_shader *fs; 102 103 /* 104 * Driver interface/override functions 105 */ 106 void * (*driver_create_fs_state)(struct pipe_context *, 107 const struct pipe_shader_state *); 108 void (*driver_bind_fs_state)(struct pipe_context *, void *); 109 void (*driver_delete_fs_state)(struct pipe_context *, void *); 110 }; 111 112 113 114 /** 115 * Subclass of tgsi_transform_context, used for transforming the 116 * user's fragment shader to add the special AA instructions. 117 */ 118 struct aa_transform_context { 119 struct tgsi_transform_context base; 120 uint tempsUsed; /**< bitmask */ 121 int colorOutput; /**< which output is the primary color */ 122 int maxInput, maxGeneric; /**< max input index found */ 123 int tmp0, colorTemp; /**< temp registers */ 124 boolean firstInstruction; 125 }; 126 127 128 /** 129 * TGSI declaration transform callback. 130 * Look for two free temp regs and available input reg for new texcoords. 131 */ 132 static void 133 aa_transform_decl(struct tgsi_transform_context *ctx, 134 struct tgsi_full_declaration *decl) 135 { 136 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; 137 138 if (decl->Declaration.File == TGSI_FILE_OUTPUT && 139 decl->Semantic.Name == TGSI_SEMANTIC_COLOR && 140 decl->Semantic.Index == 0) { 141 aactx->colorOutput = decl->Range.First; 142 } 143 else if (decl->Declaration.File == TGSI_FILE_INPUT) { 144 if ((int) decl->Range.Last > aactx->maxInput) 145 aactx->maxInput = decl->Range.Last; 146 if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC && 147 (int) decl->Semantic.Index > aactx->maxGeneric) { 148 aactx->maxGeneric = decl->Semantic.Index; 149 } 150 } 151 else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { 152 uint i; 153 for (i = decl->Range.First; 154 i <= decl->Range.Last; i++) { 155 aactx->tempsUsed |= (1 << i); 156 } 157 } 158 159 ctx->emit_declaration(ctx, decl); 160 } 161 162 163 /** 164 * TGSI instruction transform callback. 165 * Replace writes to result.color w/ a temp reg. 166 * Upon END instruction, insert texture sampling code for antialiasing. 167 */ 168 static void 169 aa_transform_inst(struct tgsi_transform_context *ctx, 170 struct tgsi_full_instruction *inst) 171 { 172 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; 173 struct tgsi_full_instruction newInst; 174 175 if (aactx->firstInstruction) { 176 /* emit our new declarations before the first instruction */ 177 178 struct tgsi_full_declaration decl; 179 const int texInput = aactx->maxInput + 1; 180 int tmp0; 181 uint i; 182 183 /* find two free temp regs */ 184 for (i = 0; i < 32; i++) { 185 if ((aactx->tempsUsed & (1 << i)) == 0) { 186 /* found a free temp */ 187 if (aactx->tmp0 < 0) 188 aactx->tmp0 = i; 189 else if (aactx->colorTemp < 0) 190 aactx->colorTemp = i; 191 else 192 break; 193 } 194 } 195 196 assert(aactx->colorTemp != aactx->tmp0); 197 198 tmp0 = aactx->tmp0; 199 200 /* declare new generic input/texcoord */ 201 decl = tgsi_default_full_declaration(); 202 decl.Declaration.File = TGSI_FILE_INPUT; 203 /* XXX this could be linear... */ 204 decl.Declaration.Interpolate = 1; 205 decl.Declaration.Semantic = 1; 206 decl.Semantic.Name = TGSI_SEMANTIC_GENERIC; 207 decl.Semantic.Index = aactx->maxGeneric + 1; 208 decl.Range.First = 209 decl.Range.Last = texInput; 210 decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; 211 ctx->emit_declaration(ctx, &decl); 212 213 /* declare new temp regs */ 214 decl = tgsi_default_full_declaration(); 215 decl.Declaration.File = TGSI_FILE_TEMPORARY; 216 decl.Range.First = 217 decl.Range.Last = tmp0; 218 ctx->emit_declaration(ctx, &decl); 219 220 decl = tgsi_default_full_declaration(); 221 decl.Declaration.File = TGSI_FILE_TEMPORARY; 222 decl.Range.First = 223 decl.Range.Last = aactx->colorTemp; 224 ctx->emit_declaration(ctx, &decl); 225 226 aactx->firstInstruction = FALSE; 227 228 229 /* 230 * Emit code to compute fragment coverage, kill if outside point radius 231 * 232 * Temp reg0 usage: 233 * t0.x = distance of fragment from center point 234 * t0.y = boolean, is t0.x > 1.0, also misc temp usage 235 * t0.z = temporary for computing 1/(1-k) value 236 * t0.w = final coverage value 237 */ 238 239 /* MUL t0.xy, tex, tex; # compute x^2, y^2 */ 240 newInst = tgsi_default_full_instruction(); 241 newInst.Instruction.Opcode = TGSI_OPCODE_MUL; 242 newInst.Instruction.NumDstRegs = 1; 243 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 244 newInst.Dst[0].Register.Index = tmp0; 245 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY; 246 newInst.Instruction.NumSrcRegs = 2; 247 newInst.Src[0].Register.File = TGSI_FILE_INPUT; 248 newInst.Src[0].Register.Index = texInput; 249 newInst.Src[1].Register.File = TGSI_FILE_INPUT; 250 newInst.Src[1].Register.Index = texInput; 251 ctx->emit_instruction(ctx, &newInst); 252 253 /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ 254 newInst = tgsi_default_full_instruction(); 255 newInst.Instruction.Opcode = TGSI_OPCODE_ADD; 256 newInst.Instruction.NumDstRegs = 1; 257 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 258 newInst.Dst[0].Register.Index = tmp0; 259 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; 260 newInst.Instruction.NumSrcRegs = 2; 261 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 262 newInst.Src[0].Register.Index = tmp0; 263 newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; 264 newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; 265 newInst.Src[1].Register.Index = tmp0; 266 newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_Y; 267 ctx->emit_instruction(ctx, &newInst); 268 269 #if NORMALIZE /* OPTIONAL normalization of length */ 270 /* RSQ t0.x, t0.x; */ 271 newInst = tgsi_default_full_instruction(); 272 newInst.Instruction.Opcode = TGSI_OPCODE_RSQ; 273 newInst.Instruction.NumDstRegs = 1; 274 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 275 newInst.Dst[0].Register.Index = tmp0; 276 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; 277 newInst.Instruction.NumSrcRegs = 1; 278 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 279 newInst.Src[0].Register.Index = tmp0; 280 ctx->emit_instruction(ctx, &newInst); 281 282 /* RCP t0.x, t0.x; */ 283 newInst = tgsi_default_full_instruction(); 284 newInst.Instruction.Opcode = TGSI_OPCODE_RCP; 285 newInst.Instruction.NumDstRegs = 1; 286 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 287 newInst.Dst[0].Register.Index = tmp0; 288 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; 289 newInst.Instruction.NumSrcRegs = 1; 290 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 291 newInst.Src[0].Register.Index = tmp0; 292 ctx->emit_instruction(ctx, &newInst); 293 #endif 294 295 /* SGT t0.y, t0.xxxx, tex.wwww; # bool b = d > 1 (NOTE tex.w == 1) */ 296 newInst = tgsi_default_full_instruction(); 297 newInst.Instruction.Opcode = TGSI_OPCODE_SGT; 298 newInst.Instruction.NumDstRegs = 1; 299 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 300 newInst.Dst[0].Register.Index = tmp0; 301 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; 302 newInst.Instruction.NumSrcRegs = 2; 303 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 304 newInst.Src[0].Register.Index = tmp0; 305 newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; 306 newInst.Src[1].Register.File = TGSI_FILE_INPUT; 307 newInst.Src[1].Register.Index = texInput; 308 newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W; 309 ctx->emit_instruction(ctx, &newInst); 310 311 /* KIL -tmp0.yyyy; # if -tmp0.y < 0, KILL */ 312 newInst = tgsi_default_full_instruction(); 313 newInst.Instruction.Opcode = TGSI_OPCODE_KIL; 314 newInst.Instruction.NumDstRegs = 0; 315 newInst.Instruction.NumSrcRegs = 1; 316 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 317 newInst.Src[0].Register.Index = tmp0; 318 newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y; 319 newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y; 320 newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y; 321 newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y; 322 newInst.Src[0].Register.Negate = 1; 323 ctx->emit_instruction(ctx, &newInst); 324 325 326 /* compute coverage factor = (1-d)/(1-k) */ 327 328 /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ 329 newInst = tgsi_default_full_instruction(); 330 newInst.Instruction.Opcode = TGSI_OPCODE_SUB; 331 newInst.Instruction.NumDstRegs = 1; 332 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 333 newInst.Dst[0].Register.Index = tmp0; 334 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z; 335 newInst.Instruction.NumSrcRegs = 2; 336 newInst.Src[0].Register.File = TGSI_FILE_INPUT; 337 newInst.Src[0].Register.Index = texInput; 338 newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_W; 339 newInst.Src[1].Register.File = TGSI_FILE_INPUT; 340 newInst.Src[1].Register.Index = texInput; 341 newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; 342 ctx->emit_instruction(ctx, &newInst); 343 344 /* RCP t0.z, t0.z; # t0.z = 1 / m */ 345 newInst = tgsi_default_full_instruction(); 346 newInst.Instruction.Opcode = TGSI_OPCODE_RCP; 347 newInst.Instruction.NumDstRegs = 1; 348 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 349 newInst.Dst[0].Register.Index = tmp0; 350 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z; 351 newInst.Instruction.NumSrcRegs = 1; 352 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 353 newInst.Src[0].Register.Index = tmp0; 354 newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z; 355 ctx->emit_instruction(ctx, &newInst); 356 357 /* SUB t0.y, 1, t0.x; # d = 1 - d */ 358 newInst = tgsi_default_full_instruction(); 359 newInst.Instruction.Opcode = TGSI_OPCODE_SUB; 360 newInst.Instruction.NumDstRegs = 1; 361 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 362 newInst.Dst[0].Register.Index = tmp0; 363 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; 364 newInst.Instruction.NumSrcRegs = 2; 365 newInst.Src[0].Register.File = TGSI_FILE_INPUT; 366 newInst.Src[0].Register.Index = texInput; 367 newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_W; 368 newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; 369 newInst.Src[1].Register.Index = tmp0; 370 newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_X; 371 ctx->emit_instruction(ctx, &newInst); 372 373 /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ 374 newInst = tgsi_default_full_instruction(); 375 newInst.Instruction.Opcode = TGSI_OPCODE_MUL; 376 newInst.Instruction.NumDstRegs = 1; 377 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 378 newInst.Dst[0].Register.Index = tmp0; 379 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; 380 newInst.Instruction.NumSrcRegs = 2; 381 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 382 newInst.Src[0].Register.Index = tmp0; 383 newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y; 384 newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; 385 newInst.Src[1].Register.Index = tmp0; 386 newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_Z; 387 ctx->emit_instruction(ctx, &newInst); 388 389 /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ 390 newInst = tgsi_default_full_instruction(); 391 newInst.Instruction.Opcode = TGSI_OPCODE_SLE; 392 newInst.Instruction.NumDstRegs = 1; 393 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 394 newInst.Dst[0].Register.Index = tmp0; 395 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; 396 newInst.Instruction.NumSrcRegs = 2; 397 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 398 newInst.Src[0].Register.Index = tmp0; 399 newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; 400 newInst.Src[1].Register.File = TGSI_FILE_INPUT; 401 newInst.Src[1].Register.Index = texInput; 402 newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Z; 403 ctx->emit_instruction(ctx, &newInst); 404 405 /* CMP t0.w, -t0.y, tex.w, t0.w; 406 * # if -t0.y < 0 then 407 * t0.w = 1 408 * else 409 * t0.w = t0.w 410 */ 411 newInst = tgsi_default_full_instruction(); 412 newInst.Instruction.Opcode = TGSI_OPCODE_CMP; 413 newInst.Instruction.NumDstRegs = 1; 414 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 415 newInst.Dst[0].Register.Index = tmp0; 416 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; 417 newInst.Instruction.NumSrcRegs = 3; 418 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 419 newInst.Src[0].Register.Index = tmp0; 420 newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y; 421 newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y; 422 newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y; 423 newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y; 424 newInst.Src[0].Register.Negate = 1; 425 newInst.Src[1].Register.File = TGSI_FILE_INPUT; 426 newInst.Src[1].Register.Index = texInput; 427 newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_W; 428 newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W; 429 newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_W; 430 newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; 431 newInst.Src[2].Register.File = TGSI_FILE_TEMPORARY; 432 newInst.Src[2].Register.Index = tmp0; 433 newInst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_W; 434 newInst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_W; 435 newInst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_W; 436 newInst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; 437 ctx->emit_instruction(ctx, &newInst); 438 439 } 440 441 if (inst->Instruction.Opcode == TGSI_OPCODE_END) { 442 /* add alpha modulation code at tail of program */ 443 444 /* MOV result.color.xyz, colorTemp; */ 445 newInst = tgsi_default_full_instruction(); 446 newInst.Instruction.Opcode = TGSI_OPCODE_MOV; 447 newInst.Instruction.NumDstRegs = 1; 448 newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; 449 newInst.Dst[0].Register.Index = aactx->colorOutput; 450 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; 451 newInst.Instruction.NumSrcRegs = 1; 452 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 453 newInst.Src[0].Register.Index = aactx->colorTemp; 454 ctx->emit_instruction(ctx, &newInst); 455 456 /* MUL result.color.w, colorTemp, tmp0.w; */ 457 newInst = tgsi_default_full_instruction(); 458 newInst.Instruction.Opcode = TGSI_OPCODE_MUL; 459 newInst.Instruction.NumDstRegs = 1; 460 newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; 461 newInst.Dst[0].Register.Index = aactx->colorOutput; 462 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; 463 newInst.Instruction.NumSrcRegs = 2; 464 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; 465 newInst.Src[0].Register.Index = aactx->colorTemp; 466 newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; 467 newInst.Src[1].Register.Index = aactx->tmp0; 468 ctx->emit_instruction(ctx, &newInst); 469 } 470 else { 471 /* Not an END instruction. 472 * Look for writes to result.color and replace with colorTemp reg. 473 */ 474 uint i; 475 476 for (i = 0; i < inst->Instruction.NumDstRegs; i++) { 477 struct tgsi_full_dst_register *dst = &inst->Dst[i]; 478 if (dst->Register.File == TGSI_FILE_OUTPUT && 479 dst->Register.Index == aactx->colorOutput) { 480 dst->Register.File = TGSI_FILE_TEMPORARY; 481 dst->Register.Index = aactx->colorTemp; 482 } 483 } 484 } 485 486 ctx->emit_instruction(ctx, inst); 487 } 488 489 490 /** 491 * Generate the frag shader we'll use for drawing AA points. 492 * This will be the user's shader plus some texture/modulate instructions. 493 */ 494 static boolean 495 generate_aapoint_fs(struct aapoint_stage *aapoint) 496 { 497 const struct pipe_shader_state *orig_fs = &aapoint->fs->state; 498 struct pipe_shader_state aapoint_fs; 499 struct aa_transform_context transform; 500 const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS; 501 struct pipe_context *pipe = aapoint->stage.draw->pipe; 502 503 aapoint_fs = *orig_fs; /* copy to init */ 504 aapoint_fs.tokens = tgsi_alloc_tokens(newLen); 505 if (aapoint_fs.tokens == NULL) 506 return FALSE; 507 508 memset(&transform, 0, sizeof(transform)); 509 transform.colorOutput = -1; 510 transform.maxInput = -1; 511 transform.maxGeneric = -1; 512 transform.colorTemp = -1; 513 transform.tmp0 = -1; 514 transform.firstInstruction = TRUE; 515 transform.base.transform_instruction = aa_transform_inst; 516 transform.base.transform_declaration = aa_transform_decl; 517 518 tgsi_transform_shader(orig_fs->tokens, 519 (struct tgsi_token *) aapoint_fs.tokens, 520 newLen, &transform.base); 521 522 #if 0 /* DEBUG */ 523 debug_printf("draw_aapoint, orig shader:\n"); 524 tgsi_dump(orig_fs->tokens, 0); 525 debug_printf("draw_aapoint, new shader:\n"); 526 tgsi_dump(aapoint_fs.tokens, 0); 527 #endif 528 529 aapoint->fs->aapoint_fs 530 = aapoint->driver_create_fs_state(pipe, &aapoint_fs); 531 if (aapoint->fs->aapoint_fs == NULL) 532 goto fail; 533 534 aapoint->fs->generic_attrib = transform.maxGeneric + 1; 535 FREE((void *)aapoint_fs.tokens); 536 return TRUE; 537 538 fail: 539 FREE((void *)aapoint_fs.tokens); 540 return FALSE; 541 } 542 543 544 /** 545 * When we're about to draw our first AA point in a batch, this function is 546 * called to tell the driver to bind our modified fragment shader. 547 */ 548 static boolean 549 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint) 550 { 551 struct draw_context *draw = aapoint->stage.draw; 552 struct pipe_context *pipe = draw->pipe; 553 554 if (!aapoint->fs->aapoint_fs && 555 !generate_aapoint_fs(aapoint)) 556 return FALSE; 557 558 draw->suspend_flushing = TRUE; 559 aapoint->driver_bind_fs_state(pipe, aapoint->fs->aapoint_fs); 560 draw->suspend_flushing = FALSE; 561 562 return TRUE; 563 } 564 565 566 567 static INLINE struct aapoint_stage * 568 aapoint_stage( struct draw_stage *stage ) 569 { 570 return (struct aapoint_stage *) stage; 571 } 572 573 574 575 576 /** 577 * Draw an AA point by drawing a quad. 578 */ 579 static void 580 aapoint_point(struct draw_stage *stage, struct prim_header *header) 581 { 582 const struct aapoint_stage *aapoint = aapoint_stage(stage); 583 struct prim_header tri; 584 struct vertex_header *v[4]; 585 const uint tex_slot = aapoint->tex_slot; 586 const uint pos_slot = aapoint->pos_slot; 587 float radius, *pos, *tex; 588 uint i; 589 float k; 590 591 if (aapoint->psize_slot >= 0) { 592 radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0]; 593 } 594 else { 595 radius = aapoint->radius; 596 } 597 598 /* 599 * Note: the texcoords (generic attrib, really) we use are special: 600 * The S and T components simply vary from -1 to +1. 601 * The R component is k, below. 602 * The Q component is 1.0 and will used as a handy constant in the 603 * fragment shader. 604 */ 605 606 /* 607 * k is the threshold distance from the point's center at which 608 * we begin alpha attenuation (the coverage value). 609 * Operating within a unit circle, we'll compute the fragment's 610 * distance 'd' from the center point using the texcoords. 611 * IF d > 1.0 THEN 612 * KILL fragment 613 * ELSE IF d > k THEN 614 * compute coverage in [0,1] proportional to d in [k, 1]. 615 * ELSE 616 * coverage = 1.0; // full coverage 617 * ENDIF 618 * 619 * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to 620 * avoid using IF/ELSE/ENDIF TGSI opcodes. 621 */ 622 623 #if !NORMALIZE 624 k = 1.0f / radius; 625 k = 1.0f - 2.0f * k + k * k; 626 #else 627 k = 1.0f - 1.0f / radius; 628 #endif 629 630 /* allocate/dup new verts */ 631 for (i = 0; i < 4; i++) { 632 v[i] = dup_vert(stage, header->v[0], i); 633 } 634 635 /* new verts */ 636 pos = v[0]->data[pos_slot]; 637 pos[0] -= radius; 638 pos[1] -= radius; 639 640 pos = v[1]->data[pos_slot]; 641 pos[0] += radius; 642 pos[1] -= radius; 643 644 pos = v[2]->data[pos_slot]; 645 pos[0] += radius; 646 pos[1] += radius; 647 648 pos = v[3]->data[pos_slot]; 649 pos[0] -= radius; 650 pos[1] += radius; 651 652 /* new texcoords */ 653 tex = v[0]->data[tex_slot]; 654 ASSIGN_4V(tex, -1, -1, k, 1); 655 656 tex = v[1]->data[tex_slot]; 657 ASSIGN_4V(tex, 1, -1, k, 1); 658 659 tex = v[2]->data[tex_slot]; 660 ASSIGN_4V(tex, 1, 1, k, 1); 661 662 tex = v[3]->data[tex_slot]; 663 ASSIGN_4V(tex, -1, 1, k, 1); 664 665 /* emit 2 tris for the quad strip */ 666 tri.v[0] = v[0]; 667 tri.v[1] = v[1]; 668 tri.v[2] = v[2]; 669 stage->next->tri( stage->next, &tri ); 670 671 tri.v[0] = v[0]; 672 tri.v[1] = v[2]; 673 tri.v[2] = v[3]; 674 stage->next->tri( stage->next, &tri ); 675 } 676 677 678 static void 679 aapoint_first_point(struct draw_stage *stage, struct prim_header *header) 680 { 681 auto struct aapoint_stage *aapoint = aapoint_stage(stage); 682 struct draw_context *draw = stage->draw; 683 struct pipe_context *pipe = draw->pipe; 684 const struct pipe_rasterizer_state *rast = draw->rasterizer; 685 void *r; 686 687 assert(draw->rasterizer->point_smooth); 688 689 if (draw->rasterizer->point_size <= 2.0) 690 aapoint->radius = 1.0; 691 else 692 aapoint->radius = 0.5f * draw->rasterizer->point_size; 693 694 /* 695 * Bind (generate) our fragprog. 696 */ 697 bind_aapoint_fragment_shader(aapoint); 698 699 /* update vertex attrib info */ 700 aapoint->pos_slot = draw_current_shader_position_output(draw); 701 702 /* allocate the extra post-transformed vertex attribute */ 703 aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw, 704 TGSI_SEMANTIC_GENERIC, 705 aapoint->fs->generic_attrib); 706 assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ 707 708 /* find psize slot in post-transform vertex */ 709 aapoint->psize_slot = -1; 710 if (draw->rasterizer->point_size_per_vertex) { 711 const struct tgsi_shader_info *info = draw_get_shader_info(draw); 712 uint i; 713 /* find PSIZ vertex output */ 714 for (i = 0; i < info->num_outputs; i++) { 715 if (info->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { 716 aapoint->psize_slot = i; 717 break; 718 } 719 } 720 } 721 722 draw->suspend_flushing = TRUE; 723 724 /* Disable triangle culling, stippling, unfilled mode etc. */ 725 r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade); 726 pipe->bind_rasterizer_state(pipe, r); 727 728 draw->suspend_flushing = FALSE; 729 730 /* now really draw first point */ 731 stage->point = aapoint_point; 732 stage->point(stage, header); 733 } 734 735 736 static void 737 aapoint_flush(struct draw_stage *stage, unsigned flags) 738 { 739 struct draw_context *draw = stage->draw; 740 struct aapoint_stage *aapoint = aapoint_stage(stage); 741 struct pipe_context *pipe = draw->pipe; 742 743 stage->point = aapoint_first_point; 744 stage->next->flush( stage->next, flags ); 745 746 /* restore original frag shader */ 747 draw->suspend_flushing = TRUE; 748 aapoint->driver_bind_fs_state(pipe, aapoint->fs ? aapoint->fs->driver_fs : NULL); 749 750 /* restore original rasterizer state */ 751 if (draw->rast_handle) { 752 pipe->bind_rasterizer_state(pipe, draw->rast_handle); 753 } 754 755 draw->suspend_flushing = FALSE; 756 757 draw_remove_extra_vertex_attribs(draw); 758 } 759 760 761 static void 762 aapoint_reset_stipple_counter(struct draw_stage *stage) 763 { 764 stage->next->reset_stipple_counter( stage->next ); 765 } 766 767 768 static void 769 aapoint_destroy(struct draw_stage *stage) 770 { 771 struct aapoint_stage* aapoint = aapoint_stage(stage); 772 struct pipe_context *pipe = stage->draw->pipe; 773 774 draw_free_temp_verts( stage ); 775 776 /* restore the old entry points */ 777 pipe->create_fs_state = aapoint->driver_create_fs_state; 778 pipe->bind_fs_state = aapoint->driver_bind_fs_state; 779 pipe->delete_fs_state = aapoint->driver_delete_fs_state; 780 781 FREE( stage ); 782 } 783 784 785 static struct aapoint_stage * 786 draw_aapoint_stage(struct draw_context *draw) 787 { 788 struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage); 789 if (aapoint == NULL) 790 goto fail; 791 792 aapoint->stage.draw = draw; 793 aapoint->stage.name = "aapoint"; 794 aapoint->stage.next = NULL; 795 aapoint->stage.point = aapoint_first_point; 796 aapoint->stage.line = draw_pipe_passthrough_line; 797 aapoint->stage.tri = draw_pipe_passthrough_tri; 798 aapoint->stage.flush = aapoint_flush; 799 aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; 800 aapoint->stage.destroy = aapoint_destroy; 801 802 if (!draw_alloc_temp_verts( &aapoint->stage, 4 )) 803 goto fail; 804 805 return aapoint; 806 807 fail: 808 if (aapoint) 809 aapoint->stage.destroy(&aapoint->stage); 810 811 return NULL; 812 813 } 814 815 816 static struct aapoint_stage * 817 aapoint_stage_from_pipe(struct pipe_context *pipe) 818 { 819 struct draw_context *draw = (struct draw_context *) pipe->draw; 820 return aapoint_stage(draw->pipeline.aapoint); 821 } 822 823 824 /** 825 * This function overrides the driver's create_fs_state() function and 826 * will typically be called by the state tracker. 827 */ 828 static void * 829 aapoint_create_fs_state(struct pipe_context *pipe, 830 const struct pipe_shader_state *fs) 831 { 832 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); 833 struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader); 834 if (aafs == NULL) 835 return NULL; 836 837 aafs->state.tokens = tgsi_dup_tokens(fs->tokens); 838 839 /* pass-through */ 840 aafs->driver_fs = aapoint->driver_create_fs_state(pipe, fs); 841 842 return aafs; 843 } 844 845 846 static void 847 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs) 848 { 849 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); 850 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; 851 /* save current */ 852 aapoint->fs = aafs; 853 /* pass-through */ 854 aapoint->driver_bind_fs_state(pipe, 855 (aafs ? aafs->driver_fs : NULL)); 856 } 857 858 859 static void 860 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs) 861 { 862 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); 863 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; 864 865 /* pass-through */ 866 aapoint->driver_delete_fs_state(pipe, aafs->driver_fs); 867 868 if (aafs->aapoint_fs) 869 aapoint->driver_delete_fs_state(pipe, aafs->aapoint_fs); 870 871 FREE((void*)aafs->state.tokens); 872 873 FREE(aafs); 874 } 875 876 877 /** 878 * Called by drivers that want to install this AA point prim stage 879 * into the draw module's pipeline. This will not be used if the 880 * hardware has native support for AA points. 881 */ 882 boolean 883 draw_install_aapoint_stage(struct draw_context *draw, 884 struct pipe_context *pipe) 885 { 886 struct aapoint_stage *aapoint; 887 888 pipe->draw = (void *) draw; 889 890 /* 891 * Create / install AA point drawing / prim stage 892 */ 893 aapoint = draw_aapoint_stage( draw ); 894 if (aapoint == NULL) 895 return FALSE; 896 897 /* save original driver functions */ 898 aapoint->driver_create_fs_state = pipe->create_fs_state; 899 aapoint->driver_bind_fs_state = pipe->bind_fs_state; 900 aapoint->driver_delete_fs_state = pipe->delete_fs_state; 901 902 /* override the driver's functions */ 903 pipe->create_fs_state = aapoint_create_fs_state; 904 pipe->bind_fs_state = aapoint_bind_fs_state; 905 pipe->delete_fs_state = aapoint_delete_fs_state; 906 907 draw->pipeline.aapoint = &aapoint->stage; 908 909 return TRUE; 910 } 911