Home | History | Annotate | Download | only in draw
      1 /**************************************************************************
      2  *
      3  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * AA point stage:  AA points are converted to quads and rendered with a
     30  * special fragment shader.  Another approach would be to use a texture
     31  * map image of a point, but experiments indicate the quality isn't nearly
     32  * as good as this approach.
     33  *
     34  * Note: this looks a lot like draw_aaline.c but there's actually little
     35  * if any code that can be shared.
     36  *
     37  * Authors:  Brian Paul
     38  */
     39 
     40 
     41 #include "pipe/p_context.h"
     42 #include "pipe/p_defines.h"
     43 #include "pipe/p_shader_tokens.h"
     44 
     45 #include "tgsi/tgsi_transform.h"
     46 #include "tgsi/tgsi_dump.h"
     47 
     48 #include "util/u_math.h"
     49 #include "util/u_memory.h"
     50 
     51 #include "draw_context.h"
     52 #include "draw_vs.h"
     53 #include "draw_pipe.h"
     54 
     55 
     56 /** Approx number of new tokens for instructions in aa_transform_inst() */
     57 #define NUM_NEW_TOKENS 200
     58 
     59 
     60 /*
     61  * Enabling NORMALIZE might give _slightly_ better results.
     62  * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
     63  * d=x*x+y*y.  Since we're working with a unit circle, the later seems
     64  * close enough and saves some costly instructions.
     65  */
     66 #define NORMALIZE 0
     67 
     68 
     69 /**
     70  * Subclass of pipe_shader_state to carry extra fragment shader info.
     71  */
     72 struct aapoint_fragment_shader
     73 {
     74    struct pipe_shader_state state;
     75    void *driver_fs;   /**< the regular shader */
     76    void *aapoint_fs;  /**< the aa point-augmented shader */
     77    int generic_attrib; /**< The generic input attrib/texcoord we'll use */
     78 };
     79 
     80 
     81 /**
     82  * Subclass of draw_stage
     83  */
     84 struct aapoint_stage
     85 {
     86    struct draw_stage stage;
     87 
     88    /** half of pipe_rasterizer_state::point_size */
     89    float radius;
     90 
     91    /** vertex attrib slot containing point size */
     92    int psize_slot;
     93 
     94    /** this is the vertex attrib slot for the new texcoords */
     95    uint tex_slot;
     96 
     97    /** vertex attrib slot containing position */
     98    uint pos_slot;
     99 
    100    /** Currently bound fragment shader */
    101    struct aapoint_fragment_shader *fs;
    102 
    103    /*
    104     * Driver interface/override functions
    105     */
    106    void * (*driver_create_fs_state)(struct pipe_context *,
    107                                     const struct pipe_shader_state *);
    108    void (*driver_bind_fs_state)(struct pipe_context *, void *);
    109    void (*driver_delete_fs_state)(struct pipe_context *, void *);
    110 };
    111 
    112 
    113 
    114 /**
    115  * Subclass of tgsi_transform_context, used for transforming the
    116  * user's fragment shader to add the special AA instructions.
    117  */
    118 struct aa_transform_context {
    119    struct tgsi_transform_context base;
    120    uint tempsUsed;  /**< bitmask */
    121    int colorOutput; /**< which output is the primary color */
    122    int maxInput, maxGeneric;  /**< max input index found */
    123    int tmp0, colorTemp;  /**< temp registers */
    124    boolean firstInstruction;
    125 };
    126 
    127 
    128 /**
    129  * TGSI declaration transform callback.
    130  * Look for two free temp regs and available input reg for new texcoords.
    131  */
    132 static void
    133 aa_transform_decl(struct tgsi_transform_context *ctx,
    134                   struct tgsi_full_declaration *decl)
    135 {
    136    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
    137 
    138    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
    139        decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
    140        decl->Semantic.Index == 0) {
    141       aactx->colorOutput = decl->Range.First;
    142    }
    143    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
    144       if ((int) decl->Range.Last > aactx->maxInput)
    145          aactx->maxInput = decl->Range.Last;
    146       if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
    147            (int) decl->Semantic.Index > aactx->maxGeneric) {
    148          aactx->maxGeneric = decl->Semantic.Index;
    149       }
    150    }
    151    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
    152       uint i;
    153       for (i = decl->Range.First;
    154            i <= decl->Range.Last; i++) {
    155          aactx->tempsUsed |= (1 << i);
    156       }
    157    }
    158 
    159    ctx->emit_declaration(ctx, decl);
    160 }
    161 
    162 
    163 /**
    164  * TGSI instruction transform callback.
    165  * Replace writes to result.color w/ a temp reg.
    166  * Upon END instruction, insert texture sampling code for antialiasing.
    167  */
    168 static void
    169 aa_transform_inst(struct tgsi_transform_context *ctx,
    170                   struct tgsi_full_instruction *inst)
    171 {
    172    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
    173    struct tgsi_full_instruction newInst;
    174 
    175    if (aactx->firstInstruction) {
    176       /* emit our new declarations before the first instruction */
    177 
    178       struct tgsi_full_declaration decl;
    179       const int texInput = aactx->maxInput + 1;
    180       int tmp0;
    181       uint i;
    182 
    183       /* find two free temp regs */
    184       for (i = 0; i < 32; i++) {
    185          if ((aactx->tempsUsed & (1 << i)) == 0) {
    186             /* found a free temp */
    187             if (aactx->tmp0 < 0)
    188                aactx->tmp0 = i;
    189             else if (aactx->colorTemp < 0)
    190                aactx->colorTemp = i;
    191             else
    192                break;
    193          }
    194       }
    195 
    196       assert(aactx->colorTemp != aactx->tmp0);
    197 
    198       tmp0 = aactx->tmp0;
    199 
    200       /* declare new generic input/texcoord */
    201       decl = tgsi_default_full_declaration();
    202       decl.Declaration.File = TGSI_FILE_INPUT;
    203       /* XXX this could be linear... */
    204       decl.Declaration.Interpolate = 1;
    205       decl.Declaration.Semantic = 1;
    206       decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
    207       decl.Semantic.Index = aactx->maxGeneric + 1;
    208       decl.Range.First =
    209       decl.Range.Last = texInput;
    210       decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
    211       ctx->emit_declaration(ctx, &decl);
    212 
    213       /* declare new temp regs */
    214       decl = tgsi_default_full_declaration();
    215       decl.Declaration.File = TGSI_FILE_TEMPORARY;
    216       decl.Range.First =
    217       decl.Range.Last = tmp0;
    218       ctx->emit_declaration(ctx, &decl);
    219 
    220       decl = tgsi_default_full_declaration();
    221       decl.Declaration.File = TGSI_FILE_TEMPORARY;
    222       decl.Range.First =
    223       decl.Range.Last = aactx->colorTemp;
    224       ctx->emit_declaration(ctx, &decl);
    225 
    226       aactx->firstInstruction = FALSE;
    227 
    228 
    229       /*
    230        * Emit code to compute fragment coverage, kill if outside point radius
    231        *
    232        * Temp reg0 usage:
    233        *  t0.x = distance of fragment from center point
    234        *  t0.y = boolean, is t0.x > 1.0, also misc temp usage
    235        *  t0.z = temporary for computing 1/(1-k) value
    236        *  t0.w = final coverage value
    237        */
    238 
    239       /* MUL t0.xy, tex, tex;  # compute x^2, y^2 */
    240       newInst = tgsi_default_full_instruction();
    241       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
    242       newInst.Instruction.NumDstRegs = 1;
    243       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
    244       newInst.Dst[0].Register.Index = tmp0;
    245       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY;
    246       newInst.Instruction.NumSrcRegs = 2;
    247       newInst.Src[0].Register.File = TGSI_FILE_INPUT;
    248       newInst.Src[0].Register.Index = texInput;
    249       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
    250       newInst.Src[1].Register.Index = texInput;
    251       ctx->emit_instruction(ctx, &newInst);
    252 
    253       /* ADD t0.x, t0.x, t0.y;  # x^2 + y^2 */
    254       newInst = tgsi_default_full_instruction();
    255       newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
    256       newInst.Instruction.NumDstRegs = 1;
    257       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
    258       newInst.Dst[0].Register.Index = tmp0;
    259       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
    260       newInst.Instruction.NumSrcRegs = 2;
    261       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
    262       newInst.Src[0].Register.Index = tmp0;
    263       newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
    264       newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
    265       newInst.Src[1].Register.Index = tmp0;
    266       newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_Y;
    267       ctx->emit_instruction(ctx, &newInst);
    268 
    269 #if NORMALIZE  /* OPTIONAL normalization of length */
    270       /* RSQ t0.x, t0.x; */
    271       newInst = tgsi_default_full_instruction();
    272       newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
    273       newInst.Instruction.NumDstRegs = 1;
    274       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
    275       newInst.Dst[0].Register.Index = tmp0;
    276       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
    277       newInst.Instruction.NumSrcRegs = 1;
    278       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
    279       newInst.Src[0].Register.Index = tmp0;
    280       ctx->emit_instruction(ctx, &newInst);
    281 
    282       /* RCP t0.x, t0.x; */
    283       newInst = tgsi_default_full_instruction();
    284       newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
    285       newInst.Instruction.NumDstRegs = 1;
    286       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
    287       newInst.Dst[0].Register.Index = tmp0;
    288       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
    289       newInst.Instruction.NumSrcRegs = 1;
    290       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
    291       newInst.Src[0].Register.Index = tmp0;
    292       ctx->emit_instruction(ctx, &newInst);
    293 #endif
    294 
    295       /* SGT t0.y, t0.xxxx, tex.wwww;  # bool b = d > 1 (NOTE tex.w == 1) */
    296       newInst = tgsi_default_full_instruction();
    297       newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
    298       newInst.Instruction.NumDstRegs = 1;
    299       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
    300       newInst.Dst[0].Register.Index = tmp0;
    301       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
    302       newInst.Instruction.NumSrcRegs = 2;
    303       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
    304       newInst.Src[0].Register.Index = tmp0;
    305       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
    306       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
    307       newInst.Src[1].Register.Index = texInput;
    308       newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
    309       ctx->emit_instruction(ctx, &newInst);
    310 
    311       /* KIL -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
    312       newInst = tgsi_default_full_instruction();
    313       newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
    314       newInst.Instruction.NumDstRegs = 0;
    315       newInst.Instruction.NumSrcRegs = 1;
    316       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
    317       newInst.Src[0].Register.Index = tmp0;
    318       newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y;
    319       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
    320       newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y;
    321       newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
    322       newInst.Src[0].Register.Negate = 1;
    323       ctx->emit_instruction(ctx, &newInst);
    324 
    325 
    326       /* compute coverage factor = (1-d)/(1-k) */
    327 
    328       /* SUB t0.z, tex.w, tex.z;  # m = 1 - k */
    329       newInst = tgsi_default_full_instruction();
    330       newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
    331       newInst.Instruction.NumDstRegs = 1;
    332       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
    333       newInst.Dst[0].Register.Index = tmp0;
    334       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
    335       newInst.Instruction.NumSrcRegs = 2;
    336       newInst.Src[0].Register.File = TGSI_FILE_INPUT;
    337       newInst.Src[0].Register.Index = texInput;
    338       newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_W;
    339       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
    340       newInst.Src[1].Register.Index = texInput;
    341       newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
    342       ctx->emit_instruction(ctx, &newInst);
    343 
    344       /* RCP t0.z, t0.z;  # t0.z = 1 / m */
    345       newInst = tgsi_default_full_instruction();
    346       newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
    347       newInst.Instruction.NumDstRegs = 1;
    348       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
    349       newInst.Dst[0].Register.Index = tmp0;
    350       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
    351       newInst.Instruction.NumSrcRegs = 1;
    352       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
    353       newInst.Src[0].Register.Index = tmp0;
    354       newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z;
    355       ctx->emit_instruction(ctx, &newInst);
    356 
    357       /* SUB t0.y, 1, t0.x;  # d = 1 - d */
    358       newInst = tgsi_default_full_instruction();
    359       newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
    360       newInst.Instruction.NumDstRegs = 1;
    361       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
    362       newInst.Dst[0].Register.Index = tmp0;
    363       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
    364       newInst.Instruction.NumSrcRegs = 2;
    365       newInst.Src[0].Register.File = TGSI_FILE_INPUT;
    366       newInst.Src[0].Register.Index = texInput;
    367       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_W;
    368       newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
    369       newInst.Src[1].Register.Index = tmp0;
    370       newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_X;
    371       ctx->emit_instruction(ctx, &newInst);
    372 
    373       /* MUL t0.w, t0.y, t0.z;   # coverage = d * m */
    374       newInst = tgsi_default_full_instruction();
    375       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
    376       newInst.Instruction.NumDstRegs = 1;
    377       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
    378       newInst.Dst[0].Register.Index = tmp0;
    379       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
    380       newInst.Instruction.NumSrcRegs = 2;
    381       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
    382       newInst.Src[0].Register.Index = tmp0;
    383       newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
    384       newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
    385       newInst.Src[1].Register.Index = tmp0;
    386       newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_Z;
    387       ctx->emit_instruction(ctx, &newInst);
    388 
    389       /* SLE t0.y, t0.x, tex.z;  # bool b = distance <= k */
    390       newInst = tgsi_default_full_instruction();
    391       newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
    392       newInst.Instruction.NumDstRegs = 1;
    393       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
    394       newInst.Dst[0].Register.Index = tmp0;
    395       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
    396       newInst.Instruction.NumSrcRegs = 2;
    397       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
    398       newInst.Src[0].Register.Index = tmp0;
    399       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
    400       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
    401       newInst.Src[1].Register.Index = texInput;
    402       newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Z;
    403       ctx->emit_instruction(ctx, &newInst);
    404 
    405       /* CMP t0.w, -t0.y, tex.w, t0.w;
    406        *  # if -t0.y < 0 then
    407        *       t0.w = 1
    408        *    else
    409        *       t0.w = t0.w
    410        */
    411       newInst = tgsi_default_full_instruction();
    412       newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
    413       newInst.Instruction.NumDstRegs = 1;
    414       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
    415       newInst.Dst[0].Register.Index = tmp0;
    416       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
    417       newInst.Instruction.NumSrcRegs = 3;
    418       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
    419       newInst.Src[0].Register.Index = tmp0;
    420       newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y;
    421       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
    422       newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y;
    423       newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
    424       newInst.Src[0].Register.Negate = 1;
    425       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
    426       newInst.Src[1].Register.Index = texInput;
    427       newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_W;
    428       newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
    429       newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_W;
    430       newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
    431       newInst.Src[2].Register.File = TGSI_FILE_TEMPORARY;
    432       newInst.Src[2].Register.Index = tmp0;
    433       newInst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_W;
    434       newInst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_W;
    435       newInst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_W;
    436       newInst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
    437       ctx->emit_instruction(ctx, &newInst);
    438 
    439    }
    440 
    441    if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
    442       /* add alpha modulation code at tail of program */
    443 
    444       /* MOV result.color.xyz, colorTemp; */
    445       newInst = tgsi_default_full_instruction();
    446       newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
    447       newInst.Instruction.NumDstRegs = 1;
    448       newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
    449       newInst.Dst[0].Register.Index = aactx->colorOutput;
    450       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
    451       newInst.Instruction.NumSrcRegs = 1;
    452       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
    453       newInst.Src[0].Register.Index = aactx->colorTemp;
    454       ctx->emit_instruction(ctx, &newInst);
    455 
    456       /* MUL result.color.w, colorTemp, tmp0.w; */
    457       newInst = tgsi_default_full_instruction();
    458       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
    459       newInst.Instruction.NumDstRegs = 1;
    460       newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
    461       newInst.Dst[0].Register.Index = aactx->colorOutput;
    462       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
    463       newInst.Instruction.NumSrcRegs = 2;
    464       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
    465       newInst.Src[0].Register.Index = aactx->colorTemp;
    466       newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
    467       newInst.Src[1].Register.Index = aactx->tmp0;
    468       ctx->emit_instruction(ctx, &newInst);
    469    }
    470    else {
    471       /* Not an END instruction.
    472        * Look for writes to result.color and replace with colorTemp reg.
    473        */
    474       uint i;
    475 
    476       for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
    477          struct tgsi_full_dst_register *dst = &inst->Dst[i];
    478          if (dst->Register.File == TGSI_FILE_OUTPUT &&
    479              dst->Register.Index == aactx->colorOutput) {
    480             dst->Register.File = TGSI_FILE_TEMPORARY;
    481             dst->Register.Index = aactx->colorTemp;
    482          }
    483       }
    484    }
    485 
    486    ctx->emit_instruction(ctx, inst);
    487 }
    488 
    489 
    490 /**
    491  * Generate the frag shader we'll use for drawing AA points.
    492  * This will be the user's shader plus some texture/modulate instructions.
    493  */
    494 static boolean
    495 generate_aapoint_fs(struct aapoint_stage *aapoint)
    496 {
    497    const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
    498    struct pipe_shader_state aapoint_fs;
    499    struct aa_transform_context transform;
    500    const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
    501    struct pipe_context *pipe = aapoint->stage.draw->pipe;
    502 
    503    aapoint_fs = *orig_fs; /* copy to init */
    504    aapoint_fs.tokens = tgsi_alloc_tokens(newLen);
    505    if (aapoint_fs.tokens == NULL)
    506       return FALSE;
    507 
    508    memset(&transform, 0, sizeof(transform));
    509    transform.colorOutput = -1;
    510    transform.maxInput = -1;
    511    transform.maxGeneric = -1;
    512    transform.colorTemp = -1;
    513    transform.tmp0 = -1;
    514    transform.firstInstruction = TRUE;
    515    transform.base.transform_instruction = aa_transform_inst;
    516    transform.base.transform_declaration = aa_transform_decl;
    517 
    518    tgsi_transform_shader(orig_fs->tokens,
    519                          (struct tgsi_token *) aapoint_fs.tokens,
    520                          newLen, &transform.base);
    521 
    522 #if 0 /* DEBUG */
    523    debug_printf("draw_aapoint, orig shader:\n");
    524    tgsi_dump(orig_fs->tokens, 0);
    525    debug_printf("draw_aapoint, new shader:\n");
    526    tgsi_dump(aapoint_fs.tokens, 0);
    527 #endif
    528 
    529    aapoint->fs->aapoint_fs
    530       = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
    531    if (aapoint->fs->aapoint_fs == NULL)
    532       goto fail;
    533 
    534    aapoint->fs->generic_attrib = transform.maxGeneric + 1;
    535    FREE((void *)aapoint_fs.tokens);
    536    return TRUE;
    537 
    538 fail:
    539    FREE((void *)aapoint_fs.tokens);
    540    return FALSE;
    541 }
    542 
    543 
    544 /**
    545  * When we're about to draw our first AA point in a batch, this function is
    546  * called to tell the driver to bind our modified fragment shader.
    547  */
    548 static boolean
    549 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
    550 {
    551    struct draw_context *draw = aapoint->stage.draw;
    552    struct pipe_context *pipe = draw->pipe;
    553 
    554    if (!aapoint->fs->aapoint_fs &&
    555        !generate_aapoint_fs(aapoint))
    556       return FALSE;
    557 
    558    draw->suspend_flushing = TRUE;
    559    aapoint->driver_bind_fs_state(pipe, aapoint->fs->aapoint_fs);
    560    draw->suspend_flushing = FALSE;
    561 
    562    return TRUE;
    563 }
    564 
    565 
    566 
    567 static INLINE struct aapoint_stage *
    568 aapoint_stage( struct draw_stage *stage )
    569 {
    570    return (struct aapoint_stage *) stage;
    571 }
    572 
    573 
    574 
    575 
    576 /**
    577  * Draw an AA point by drawing a quad.
    578  */
    579 static void
    580 aapoint_point(struct draw_stage *stage, struct prim_header *header)
    581 {
    582    const struct aapoint_stage *aapoint = aapoint_stage(stage);
    583    struct prim_header tri;
    584    struct vertex_header *v[4];
    585    const uint tex_slot = aapoint->tex_slot;
    586    const uint pos_slot = aapoint->pos_slot;
    587    float radius, *pos, *tex;
    588    uint i;
    589    float k;
    590 
    591    if (aapoint->psize_slot >= 0) {
    592       radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
    593    }
    594    else {
    595       radius = aapoint->radius;
    596    }
    597 
    598    /*
    599     * Note: the texcoords (generic attrib, really) we use are special:
    600     * The S and T components simply vary from -1 to +1.
    601     * The R component is k, below.
    602     * The Q component is 1.0 and will used as a handy constant in the
    603     * fragment shader.
    604     */
    605 
    606    /*
    607     * k is the threshold distance from the point's center at which
    608     * we begin alpha attenuation (the coverage value).
    609     * Operating within a unit circle, we'll compute the fragment's
    610     * distance 'd' from the center point using the texcoords.
    611     * IF d > 1.0 THEN
    612     *    KILL fragment
    613     * ELSE IF d > k THEN
    614     *    compute coverage in [0,1] proportional to d in [k, 1].
    615     * ELSE
    616     *    coverage = 1.0;  // full coverage
    617     * ENDIF
    618     *
    619     * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
    620     * avoid using IF/ELSE/ENDIF TGSI opcodes.
    621     */
    622 
    623 #if !NORMALIZE
    624    k = 1.0f / radius;
    625    k = 1.0f - 2.0f * k + k * k;
    626 #else
    627    k = 1.0f - 1.0f / radius;
    628 #endif
    629 
    630    /* allocate/dup new verts */
    631    for (i = 0; i < 4; i++) {
    632       v[i] = dup_vert(stage, header->v[0], i);
    633    }
    634 
    635    /* new verts */
    636    pos = v[0]->data[pos_slot];
    637    pos[0] -= radius;
    638    pos[1] -= radius;
    639 
    640    pos = v[1]->data[pos_slot];
    641    pos[0] += radius;
    642    pos[1] -= radius;
    643 
    644    pos = v[2]->data[pos_slot];
    645    pos[0] += radius;
    646    pos[1] += radius;
    647 
    648    pos = v[3]->data[pos_slot];
    649    pos[0] -= radius;
    650    pos[1] += radius;
    651 
    652    /* new texcoords */
    653    tex = v[0]->data[tex_slot];
    654    ASSIGN_4V(tex, -1, -1, k, 1);
    655 
    656    tex = v[1]->data[tex_slot];
    657    ASSIGN_4V(tex,  1, -1, k, 1);
    658 
    659    tex = v[2]->data[tex_slot];
    660    ASSIGN_4V(tex,  1,  1, k, 1);
    661 
    662    tex = v[3]->data[tex_slot];
    663    ASSIGN_4V(tex, -1,  1, k, 1);
    664 
    665    /* emit 2 tris for the quad strip */
    666    tri.v[0] = v[0];
    667    tri.v[1] = v[1];
    668    tri.v[2] = v[2];
    669    stage->next->tri( stage->next, &tri );
    670 
    671    tri.v[0] = v[0];
    672    tri.v[1] = v[2];
    673    tri.v[2] = v[3];
    674    stage->next->tri( stage->next, &tri );
    675 }
    676 
    677 
    678 static void
    679 aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
    680 {
    681    auto struct aapoint_stage *aapoint = aapoint_stage(stage);
    682    struct draw_context *draw = stage->draw;
    683    struct pipe_context *pipe = draw->pipe;
    684    const struct pipe_rasterizer_state *rast = draw->rasterizer;
    685    void *r;
    686 
    687    assert(draw->rasterizer->point_smooth);
    688 
    689    if (draw->rasterizer->point_size <= 2.0)
    690       aapoint->radius = 1.0;
    691    else
    692       aapoint->radius = 0.5f * draw->rasterizer->point_size;
    693 
    694    /*
    695     * Bind (generate) our fragprog.
    696     */
    697    bind_aapoint_fragment_shader(aapoint);
    698 
    699    /* update vertex attrib info */
    700    aapoint->pos_slot = draw_current_shader_position_output(draw);
    701 
    702    /* allocate the extra post-transformed vertex attribute */
    703    aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw,
    704                                                       TGSI_SEMANTIC_GENERIC,
    705                                                       aapoint->fs->generic_attrib);
    706    assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
    707 
    708    /* find psize slot in post-transform vertex */
    709    aapoint->psize_slot = -1;
    710    if (draw->rasterizer->point_size_per_vertex) {
    711       const struct tgsi_shader_info *info = draw_get_shader_info(draw);
    712       uint i;
    713       /* find PSIZ vertex output */
    714       for (i = 0; i < info->num_outputs; i++) {
    715          if (info->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
    716             aapoint->psize_slot = i;
    717             break;
    718          }
    719       }
    720    }
    721 
    722    draw->suspend_flushing = TRUE;
    723 
    724    /* Disable triangle culling, stippling, unfilled mode etc. */
    725    r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
    726    pipe->bind_rasterizer_state(pipe, r);
    727 
    728    draw->suspend_flushing = FALSE;
    729 
    730    /* now really draw first point */
    731    stage->point = aapoint_point;
    732    stage->point(stage, header);
    733 }
    734 
    735 
    736 static void
    737 aapoint_flush(struct draw_stage *stage, unsigned flags)
    738 {
    739    struct draw_context *draw = stage->draw;
    740    struct aapoint_stage *aapoint = aapoint_stage(stage);
    741    struct pipe_context *pipe = draw->pipe;
    742 
    743    stage->point = aapoint_first_point;
    744    stage->next->flush( stage->next, flags );
    745 
    746    /* restore original frag shader */
    747    draw->suspend_flushing = TRUE;
    748    aapoint->driver_bind_fs_state(pipe, aapoint->fs ? aapoint->fs->driver_fs : NULL);
    749 
    750    /* restore original rasterizer state */
    751    if (draw->rast_handle) {
    752       pipe->bind_rasterizer_state(pipe, draw->rast_handle);
    753    }
    754 
    755    draw->suspend_flushing = FALSE;
    756 
    757    draw_remove_extra_vertex_attribs(draw);
    758 }
    759 
    760 
    761 static void
    762 aapoint_reset_stipple_counter(struct draw_stage *stage)
    763 {
    764    stage->next->reset_stipple_counter( stage->next );
    765 }
    766 
    767 
    768 static void
    769 aapoint_destroy(struct draw_stage *stage)
    770 {
    771    struct aapoint_stage* aapoint = aapoint_stage(stage);
    772    struct pipe_context *pipe = stage->draw->pipe;
    773 
    774    draw_free_temp_verts( stage );
    775 
    776    /* restore the old entry points */
    777    pipe->create_fs_state = aapoint->driver_create_fs_state;
    778    pipe->bind_fs_state = aapoint->driver_bind_fs_state;
    779    pipe->delete_fs_state = aapoint->driver_delete_fs_state;
    780 
    781    FREE( stage );
    782 }
    783 
    784 
    785 static struct aapoint_stage *
    786 draw_aapoint_stage(struct draw_context *draw)
    787 {
    788    struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
    789    if (aapoint == NULL)
    790       goto fail;
    791 
    792    aapoint->stage.draw = draw;
    793    aapoint->stage.name = "aapoint";
    794    aapoint->stage.next = NULL;
    795    aapoint->stage.point = aapoint_first_point;
    796    aapoint->stage.line = draw_pipe_passthrough_line;
    797    aapoint->stage.tri = draw_pipe_passthrough_tri;
    798    aapoint->stage.flush = aapoint_flush;
    799    aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
    800    aapoint->stage.destroy = aapoint_destroy;
    801 
    802    if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
    803       goto fail;
    804 
    805    return aapoint;
    806 
    807  fail:
    808    if (aapoint)
    809       aapoint->stage.destroy(&aapoint->stage);
    810 
    811    return NULL;
    812 
    813 }
    814 
    815 
    816 static struct aapoint_stage *
    817 aapoint_stage_from_pipe(struct pipe_context *pipe)
    818 {
    819    struct draw_context *draw = (struct draw_context *) pipe->draw;
    820    return aapoint_stage(draw->pipeline.aapoint);
    821 }
    822 
    823 
    824 /**
    825  * This function overrides the driver's create_fs_state() function and
    826  * will typically be called by the state tracker.
    827  */
    828 static void *
    829 aapoint_create_fs_state(struct pipe_context *pipe,
    830                        const struct pipe_shader_state *fs)
    831 {
    832    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
    833    struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
    834    if (aafs == NULL)
    835       return NULL;
    836 
    837    aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
    838 
    839    /* pass-through */
    840    aafs->driver_fs = aapoint->driver_create_fs_state(pipe, fs);
    841 
    842    return aafs;
    843 }
    844 
    845 
    846 static void
    847 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
    848 {
    849    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
    850    struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
    851    /* save current */
    852    aapoint->fs = aafs;
    853    /* pass-through */
    854    aapoint->driver_bind_fs_state(pipe,
    855                                  (aafs ? aafs->driver_fs : NULL));
    856 }
    857 
    858 
    859 static void
    860 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
    861 {
    862    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
    863    struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
    864 
    865    /* pass-through */
    866    aapoint->driver_delete_fs_state(pipe, aafs->driver_fs);
    867 
    868    if (aafs->aapoint_fs)
    869       aapoint->driver_delete_fs_state(pipe, aafs->aapoint_fs);
    870 
    871    FREE((void*)aafs->state.tokens);
    872 
    873    FREE(aafs);
    874 }
    875 
    876 
    877 /**
    878  * Called by drivers that want to install this AA point prim stage
    879  * into the draw module's pipeline.  This will not be used if the
    880  * hardware has native support for AA points.
    881  */
    882 boolean
    883 draw_install_aapoint_stage(struct draw_context *draw,
    884                            struct pipe_context *pipe)
    885 {
    886    struct aapoint_stage *aapoint;
    887 
    888    pipe->draw = (void *) draw;
    889 
    890    /*
    891     * Create / install AA point drawing / prim stage
    892     */
    893    aapoint = draw_aapoint_stage( draw );
    894    if (aapoint == NULL)
    895       return FALSE;
    896 
    897    /* save original driver functions */
    898    aapoint->driver_create_fs_state = pipe->create_fs_state;
    899    aapoint->driver_bind_fs_state = pipe->bind_fs_state;
    900    aapoint->driver_delete_fs_state = pipe->delete_fs_state;
    901 
    902    /* override the driver's functions */
    903    pipe->create_fs_state = aapoint_create_fs_state;
    904    pipe->bind_fs_state = aapoint_bind_fs_state;
    905    pipe->delete_fs_state = aapoint_delete_fs_state;
    906 
    907    draw->pipeline.aapoint = &aapoint->stage;
    908 
    909    return TRUE;
    910 }
    911