Home | History | Annotate | Download | only in draw
      1 /**************************************************************************
      2  *
      3  * Copyright 2008 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * AA point stage:  AA points are converted to quads and rendered with a
     30  * special fragment shader.  Another approach would be to use a texture
     31  * map image of a point, but experiments indicate the quality isn't nearly
     32  * as good as this approach.
     33  *
     34  * Note: this looks a lot like draw_aaline.c but there's actually little
     35  * if any code that can be shared.
     36  *
     37  * Authors:  Brian Paul
     38  */
     39 
     40 
     41 #include "pipe/p_context.h"
     42 #include "pipe/p_defines.h"
     43 #include "pipe/p_shader_tokens.h"
     44 
     45 #include "tgsi/tgsi_transform.h"
     46 #include "tgsi/tgsi_dump.h"
     47 
     48 #include "util/u_math.h"
     49 #include "util/u_memory.h"
     50 
     51 #include "draw_context.h"
     52 #include "draw_vs.h"
     53 #include "draw_pipe.h"
     54 
     55 
     56 /** Approx number of new tokens for instructions in aa_transform_inst() */
     57 #define NUM_NEW_TOKENS 200
     58 
     59 
     60 /*
     61  * Enabling NORMALIZE might give _slightly_ better results.
     62  * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
     63  * d=x*x+y*y.  Since we're working with a unit circle, the later seems
     64  * close enough and saves some costly instructions.
     65  */
     66 #define NORMALIZE 0
     67 
     68 
     69 /**
     70  * Subclass of pipe_shader_state to carry extra fragment shader info.
     71  */
     72 struct aapoint_fragment_shader
     73 {
     74    struct pipe_shader_state state;
     75    void *driver_fs;   /**< the regular shader */
     76    void *aapoint_fs;  /**< the aa point-augmented shader */
     77    int generic_attrib; /**< The generic input attrib/texcoord we'll use */
     78 };
     79 
     80 
     81 /**
     82  * Subclass of draw_stage
     83  */
     84 struct aapoint_stage
     85 {
     86    struct draw_stage stage;
     87 
     88    /** half of pipe_rasterizer_state::point_size */
     89    float radius;
     90 
     91    /** vertex attrib slot containing point size */
     92    int psize_slot;
     93 
     94    /** this is the vertex attrib slot for the new texcoords */
     95    uint tex_slot;
     96 
     97    /** vertex attrib slot containing position */
     98    uint pos_slot;
     99 
    100    /** Currently bound fragment shader */
    101    struct aapoint_fragment_shader *fs;
    102 
    103    /*
    104     * Driver interface/override functions
    105     */
    106    void * (*driver_create_fs_state)(struct pipe_context *,
    107                                     const struct pipe_shader_state *);
    108    void (*driver_bind_fs_state)(struct pipe_context *, void *);
    109    void (*driver_delete_fs_state)(struct pipe_context *, void *);
    110 };
    111 
    112 
    113 
    114 /**
    115  * Subclass of tgsi_transform_context, used for transforming the
    116  * user's fragment shader to add the special AA instructions.
    117  */
    118 struct aa_transform_context {
    119    struct tgsi_transform_context base;
    120    uint tempsUsed;  /**< bitmask */
    121    int colorOutput; /**< which output is the primary color */
    122    int maxInput, maxGeneric;  /**< max input index found */
    123    int tmp0, colorTemp;  /**< temp registers */
    124 };
    125 
    126 
    127 /**
    128  * TGSI declaration transform callback.
    129  * Look for two free temp regs and available input reg for new texcoords.
    130  */
    131 static void
    132 aa_transform_decl(struct tgsi_transform_context *ctx,
    133                   struct tgsi_full_declaration *decl)
    134 {
    135    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
    136 
    137    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
    138        decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
    139        decl->Semantic.Index == 0) {
    140       aactx->colorOutput = decl->Range.First;
    141    }
    142    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
    143       if ((int) decl->Range.Last > aactx->maxInput)
    144          aactx->maxInput = decl->Range.Last;
    145       if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
    146            (int) decl->Semantic.Index > aactx->maxGeneric) {
    147          aactx->maxGeneric = decl->Semantic.Index;
    148       }
    149    }
    150    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
    151       uint i;
    152       for (i = decl->Range.First;
    153            i <= decl->Range.Last; i++) {
    154          aactx->tempsUsed |= (1 << i);
    155       }
    156    }
    157 
    158    ctx->emit_declaration(ctx, decl);
    159 }
    160 
    161 
    162 /**
    163  * TGSI transform callback.
    164  * Insert new declarations and instructions before first instruction.
    165  */
    166 static void
    167 aa_transform_prolog(struct tgsi_transform_context *ctx)
    168 {
    169    /* emit our new declarations before the first instruction */
    170    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
    171    struct tgsi_full_instruction newInst;
    172    const int texInput = aactx->maxInput + 1;
    173    int tmp0;
    174    uint i;
    175 
    176    /* find two free temp regs */
    177    for (i = 0; i < 32; i++) {
    178       if ((aactx->tempsUsed & (1 << i)) == 0) {
    179          /* found a free temp */
    180          if (aactx->tmp0 < 0)
    181             aactx->tmp0 = i;
    182          else if (aactx->colorTemp < 0)
    183             aactx->colorTemp = i;
    184          else
    185             break;
    186       }
    187    }
    188 
    189    assert(aactx->colorTemp != aactx->tmp0);
    190 
    191    tmp0 = aactx->tmp0;
    192 
    193    /* declare new generic input/texcoord */
    194    tgsi_transform_input_decl(ctx, texInput,
    195                              TGSI_SEMANTIC_GENERIC, aactx->maxGeneric + 1,
    196                              TGSI_INTERPOLATE_LINEAR);
    197 
    198    /* declare new temp regs */
    199    tgsi_transform_temp_decl(ctx, tmp0);
    200    tgsi_transform_temp_decl(ctx, aactx->colorTemp);
    201 
    202    /*
    203     * Emit code to compute fragment coverage, kill if outside point radius
    204     *
    205     * Temp reg0 usage:
    206     *  t0.x = distance of fragment from center point
    207     *  t0.y = boolean, is t0.x > 1.0, also misc temp usage
    208     *  t0.z = temporary for computing 1/(1-k) value
    209     *  t0.w = final coverage value
    210     */
    211 
    212    /* MUL t0.xy, tex, tex;  # compute x^2, y^2 */
    213    tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
    214                            TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY,
    215                            TGSI_FILE_INPUT, texInput,
    216                            TGSI_FILE_INPUT, texInput, false);
    217 
    218    /* ADD t0.x, t0.x, t0.y;  # x^2 + y^2 */
    219    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
    220                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
    221                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
    222                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, false);
    223 
    224 #if NORMALIZE  /* OPTIONAL normalization of length */
    225    /* RSQ t0.x, t0.x; */
    226    tgsi_transform_op1_inst(ctx, TGSI_OPCODE_RSQ,
    227                            TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
    228                            TGSI_FILE_TEMPORARY, tmp0);
    229 
    230    /* RCP t0.x, t0.x; */
    231    tgsi_transform_op1_inst(ctx, TGSI_OPCODE_RCP,
    232                            TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
    233                            TGSI_FILE_TEMPORARY, tmp0);
    234 #endif
    235 
    236    /* SGT t0.y, t0.xxxx, tex.wwww;  # bool b = d > 1 (NOTE tex.w == 1) */
    237    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SGT,
    238                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
    239                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
    240                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W, false);
    241 
    242    /* KILL_IF -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
    243    tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
    244                             TGSI_SWIZZLE_Y, TRUE);
    245 
    246    /* compute coverage factor = (1-d)/(1-k) */
    247 
    248    /* SUB t0.z, tex.w, tex.z;  # m = 1 - k */
    249    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
    250                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Z,
    251                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
    252                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, true);
    253 
    254    /* RCP t0.z, t0.z;  # t0.z = 1 / m */
    255    newInst = tgsi_default_full_instruction();
    256    newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
    257    newInst.Instruction.NumDstRegs = 1;
    258    newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
    259    newInst.Dst[0].Register.Index = tmp0;
    260    newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
    261    newInst.Instruction.NumSrcRegs = 1;
    262    newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
    263    newInst.Src[0].Register.Index = tmp0;
    264    newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z;
    265    ctx->emit_instruction(ctx, &newInst);
    266 
    267    /* SUB t0.y, 1, t0.x;  # d = 1 - d */
    268    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
    269                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
    270                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
    271                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, true);
    272 
    273    /* MUL t0.w, t0.y, t0.z;   # coverage = d * m */
    274    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
    275                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
    276                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y,
    277                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Z, false);
    278 
    279    /* SLE t0.y, t0.x, tex.z;  # bool b = distance <= k */
    280    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SLE,
    281                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
    282                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
    283                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, false);
    284 
    285    /* CMP t0.w, -t0.y, tex.w, t0.w;
    286     *  # if -t0.y < 0 then
    287     *       t0.w = 1
    288     *    else
    289     *       t0.w = t0.w
    290     */
    291    tgsi_transform_op3_swz_inst(ctx, TGSI_OPCODE_CMP,
    292                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
    293                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, 1,
    294                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
    295                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W);
    296 }
    297 
    298 
    299 /**
    300  * TGSI transform callback.
    301  * Insert new instructions before the END instruction.
    302  */
    303 static void
    304 aa_transform_epilog(struct tgsi_transform_context *ctx)
    305 {
    306    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
    307 
    308    /* add alpha modulation code at tail of program */
    309 
    310    /* MOV result.color.xyz, colorTemp; */
    311    tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
    312                            TGSI_FILE_OUTPUT, aactx->colorOutput,
    313                            TGSI_WRITEMASK_XYZ,
    314                            TGSI_FILE_TEMPORARY, aactx->colorTemp);
    315 
    316    /* MUL result.color.w, colorTemp, tmp0.w; */
    317    tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
    318                            TGSI_FILE_OUTPUT, aactx->colorOutput,
    319                            TGSI_WRITEMASK_W,
    320                            TGSI_FILE_TEMPORARY, aactx->colorTemp,
    321                            TGSI_FILE_TEMPORARY, aactx->tmp0, false);
    322 }
    323 
    324 
    325 /**
    326  * TGSI transform callback.
    327  * Called per instruction.
    328  * Replace writes to result.color w/ a temp reg.
    329  */
    330 static void
    331 aa_transform_inst(struct tgsi_transform_context *ctx,
    332                   struct tgsi_full_instruction *inst)
    333 {
    334    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
    335    unsigned i;
    336 
    337    /* Not an END instruction.
    338     * Look for writes to result.color and replace with colorTemp reg.
    339     */
    340    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
    341       struct tgsi_full_dst_register *dst = &inst->Dst[i];
    342       if (dst->Register.File == TGSI_FILE_OUTPUT &&
    343           dst->Register.Index == aactx->colorOutput) {
    344          dst->Register.File = TGSI_FILE_TEMPORARY;
    345          dst->Register.Index = aactx->colorTemp;
    346       }
    347    }
    348 
    349    ctx->emit_instruction(ctx, inst);
    350 }
    351 
    352 
    353 /**
    354  * Generate the frag shader we'll use for drawing AA points.
    355  * This will be the user's shader plus some texture/modulate instructions.
    356  */
    357 static boolean
    358 generate_aapoint_fs(struct aapoint_stage *aapoint)
    359 {
    360    const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
    361    struct pipe_shader_state aapoint_fs;
    362    struct aa_transform_context transform;
    363    const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
    364    struct pipe_context *pipe = aapoint->stage.draw->pipe;
    365 
    366    aapoint_fs = *orig_fs; /* copy to init */
    367    aapoint_fs.tokens = tgsi_alloc_tokens(newLen);
    368    if (aapoint_fs.tokens == NULL)
    369       return FALSE;
    370 
    371    memset(&transform, 0, sizeof(transform));
    372    transform.colorOutput = -1;
    373    transform.maxInput = -1;
    374    transform.maxGeneric = -1;
    375    transform.colorTemp = -1;
    376    transform.tmp0 = -1;
    377    transform.base.prolog = aa_transform_prolog;
    378    transform.base.epilog = aa_transform_epilog;
    379    transform.base.transform_instruction = aa_transform_inst;
    380    transform.base.transform_declaration = aa_transform_decl;
    381 
    382    tgsi_transform_shader(orig_fs->tokens,
    383                          (struct tgsi_token *) aapoint_fs.tokens,
    384                          newLen, &transform.base);
    385 
    386 #if 0 /* DEBUG */
    387    debug_printf("draw_aapoint, orig shader:\n");
    388    tgsi_dump(orig_fs->tokens, 0);
    389    debug_printf("draw_aapoint, new shader:\n");
    390    tgsi_dump(aapoint_fs.tokens, 0);
    391 #endif
    392 
    393    aapoint->fs->aapoint_fs
    394       = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
    395    if (aapoint->fs->aapoint_fs == NULL)
    396       goto fail;
    397 
    398    aapoint->fs->generic_attrib = transform.maxGeneric + 1;
    399    FREE((void *)aapoint_fs.tokens);
    400    return TRUE;
    401 
    402 fail:
    403    FREE((void *)aapoint_fs.tokens);
    404    return FALSE;
    405 }
    406 
    407 
    408 /**
    409  * When we're about to draw our first AA point in a batch, this function is
    410  * called to tell the driver to bind our modified fragment shader.
    411  */
    412 static boolean
    413 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
    414 {
    415    struct draw_context *draw = aapoint->stage.draw;
    416    struct pipe_context *pipe = draw->pipe;
    417 
    418    if (!aapoint->fs->aapoint_fs &&
    419        !generate_aapoint_fs(aapoint))
    420       return FALSE;
    421 
    422    draw->suspend_flushing = TRUE;
    423    aapoint->driver_bind_fs_state(pipe, aapoint->fs->aapoint_fs);
    424    draw->suspend_flushing = FALSE;
    425 
    426    return TRUE;
    427 }
    428 
    429 
    430 
    431 static inline struct aapoint_stage *
    432 aapoint_stage( struct draw_stage *stage )
    433 {
    434    return (struct aapoint_stage *) stage;
    435 }
    436 
    437 
    438 
    439 
    440 /**
    441  * Draw an AA point by drawing a quad.
    442  */
    443 static void
    444 aapoint_point(struct draw_stage *stage, struct prim_header *header)
    445 {
    446    const struct aapoint_stage *aapoint = aapoint_stage(stage);
    447    struct prim_header tri;
    448    struct vertex_header *v[4];
    449    const uint tex_slot = aapoint->tex_slot;
    450    const uint pos_slot = aapoint->pos_slot;
    451    float radius, *pos, *tex;
    452    uint i;
    453    float k;
    454 
    455    if (aapoint->psize_slot >= 0) {
    456       radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
    457    }
    458    else {
    459       radius = aapoint->radius;
    460    }
    461 
    462    /*
    463     * Note: the texcoords (generic attrib, really) we use are special:
    464     * The S and T components simply vary from -1 to +1.
    465     * The R component is k, below.
    466     * The Q component is 1.0 and will used as a handy constant in the
    467     * fragment shader.
    468     */
    469 
    470    /*
    471     * k is the threshold distance from the point's center at which
    472     * we begin alpha attenuation (the coverage value).
    473     * Operating within a unit circle, we'll compute the fragment's
    474     * distance 'd' from the center point using the texcoords.
    475     * IF d > 1.0 THEN
    476     *    KILL fragment
    477     * ELSE IF d > k THEN
    478     *    compute coverage in [0,1] proportional to d in [k, 1].
    479     * ELSE
    480     *    coverage = 1.0;  // full coverage
    481     * ENDIF
    482     *
    483     * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
    484     * avoid using IF/ELSE/ENDIF TGSI opcodes.
    485     */
    486 
    487 #if !NORMALIZE
    488    k = 1.0f / radius;
    489    k = 1.0f - 2.0f * k + k * k;
    490 #else
    491    k = 1.0f - 1.0f / radius;
    492 #endif
    493 
    494    /* allocate/dup new verts */
    495    for (i = 0; i < 4; i++) {
    496       v[i] = dup_vert(stage, header->v[0], i);
    497    }
    498 
    499    /* new verts */
    500    pos = v[0]->data[pos_slot];
    501    pos[0] -= radius;
    502    pos[1] -= radius;
    503 
    504    pos = v[1]->data[pos_slot];
    505    pos[0] += radius;
    506    pos[1] -= radius;
    507 
    508    pos = v[2]->data[pos_slot];
    509    pos[0] += radius;
    510    pos[1] += radius;
    511 
    512    pos = v[3]->data[pos_slot];
    513    pos[0] -= radius;
    514    pos[1] += radius;
    515 
    516    /* new texcoords */
    517    tex = v[0]->data[tex_slot];
    518    ASSIGN_4V(tex, -1, -1, k, 1);
    519 
    520    tex = v[1]->data[tex_slot];
    521    ASSIGN_4V(tex,  1, -1, k, 1);
    522 
    523    tex = v[2]->data[tex_slot];
    524    ASSIGN_4V(tex,  1,  1, k, 1);
    525 
    526    tex = v[3]->data[tex_slot];
    527    ASSIGN_4V(tex, -1,  1, k, 1);
    528 
    529    /* emit 2 tris for the quad strip */
    530    tri.v[0] = v[0];
    531    tri.v[1] = v[1];
    532    tri.v[2] = v[2];
    533    stage->next->tri( stage->next, &tri );
    534 
    535    tri.v[0] = v[0];
    536    tri.v[1] = v[2];
    537    tri.v[2] = v[3];
    538    stage->next->tri( stage->next, &tri );
    539 }
    540 
    541 
    542 static void
    543 aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
    544 {
    545    auto struct aapoint_stage *aapoint = aapoint_stage(stage);
    546    struct draw_context *draw = stage->draw;
    547    struct pipe_context *pipe = draw->pipe;
    548    const struct pipe_rasterizer_state *rast = draw->rasterizer;
    549    void *r;
    550 
    551    assert(draw->rasterizer->point_smooth);
    552 
    553    if (draw->rasterizer->point_size <= 2.0)
    554       aapoint->radius = 1.0;
    555    else
    556       aapoint->radius = 0.5f * draw->rasterizer->point_size;
    557 
    558    /*
    559     * Bind (generate) our fragprog.
    560     */
    561    bind_aapoint_fragment_shader(aapoint);
    562 
    563    draw_aapoint_prepare_outputs(draw, draw->pipeline.aapoint);
    564 
    565    draw->suspend_flushing = TRUE;
    566 
    567    /* Disable triangle culling, stippling, unfilled mode etc. */
    568    r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
    569    pipe->bind_rasterizer_state(pipe, r);
    570 
    571    draw->suspend_flushing = FALSE;
    572 
    573    /* now really draw first point */
    574    stage->point = aapoint_point;
    575    stage->point(stage, header);
    576 }
    577 
    578 
    579 static void
    580 aapoint_flush(struct draw_stage *stage, unsigned flags)
    581 {
    582    struct draw_context *draw = stage->draw;
    583    struct aapoint_stage *aapoint = aapoint_stage(stage);
    584    struct pipe_context *pipe = draw->pipe;
    585 
    586    stage->point = aapoint_first_point;
    587    stage->next->flush( stage->next, flags );
    588 
    589    /* restore original frag shader */
    590    draw->suspend_flushing = TRUE;
    591    aapoint->driver_bind_fs_state(pipe, aapoint->fs ? aapoint->fs->driver_fs : NULL);
    592 
    593    /* restore original rasterizer state */
    594    if (draw->rast_handle) {
    595       pipe->bind_rasterizer_state(pipe, draw->rast_handle);
    596    }
    597 
    598    draw->suspend_flushing = FALSE;
    599 
    600    draw_remove_extra_vertex_attribs(draw);
    601 }
    602 
    603 
    604 static void
    605 aapoint_reset_stipple_counter(struct draw_stage *stage)
    606 {
    607    stage->next->reset_stipple_counter( stage->next );
    608 }
    609 
    610 
    611 static void
    612 aapoint_destroy(struct draw_stage *stage)
    613 {
    614    struct aapoint_stage* aapoint = aapoint_stage(stage);
    615    struct pipe_context *pipe = stage->draw->pipe;
    616 
    617    draw_free_temp_verts( stage );
    618 
    619    /* restore the old entry points */
    620    pipe->create_fs_state = aapoint->driver_create_fs_state;
    621    pipe->bind_fs_state = aapoint->driver_bind_fs_state;
    622    pipe->delete_fs_state = aapoint->driver_delete_fs_state;
    623 
    624    FREE( stage );
    625 }
    626 
    627 void
    628 draw_aapoint_prepare_outputs(struct draw_context *draw,
    629                              struct draw_stage *stage)
    630 {
    631    struct aapoint_stage *aapoint = aapoint_stage(stage);
    632    const struct pipe_rasterizer_state *rast = draw->rasterizer;
    633 
    634    /* update vertex attrib info */
    635    aapoint->pos_slot = draw_current_shader_position_output(draw);
    636 
    637    if (!rast->point_smooth)
    638       return;
    639 
    640    /* allocate the extra post-transformed vertex attribute */
    641    aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw,
    642                                                       TGSI_SEMANTIC_GENERIC,
    643                                                       aapoint->fs->generic_attrib);
    644    assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
    645 
    646    /* find psize slot in post-transform vertex */
    647    aapoint->psize_slot = -1;
    648    if (draw->rasterizer->point_size_per_vertex) {
    649       const struct tgsi_shader_info *info = draw_get_shader_info(draw);
    650       uint i;
    651       /* find PSIZ vertex output */
    652       for (i = 0; i < info->num_outputs; i++) {
    653          if (info->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
    654             aapoint->psize_slot = i;
    655             break;
    656          }
    657       }
    658    }
    659 }
    660 
    661 static struct aapoint_stage *
    662 draw_aapoint_stage(struct draw_context *draw)
    663 {
    664    struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
    665    if (!aapoint)
    666       goto fail;
    667 
    668    aapoint->stage.draw = draw;
    669    aapoint->stage.name = "aapoint";
    670    aapoint->stage.next = NULL;
    671    aapoint->stage.point = aapoint_first_point;
    672    aapoint->stage.line = draw_pipe_passthrough_line;
    673    aapoint->stage.tri = draw_pipe_passthrough_tri;
    674    aapoint->stage.flush = aapoint_flush;
    675    aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
    676    aapoint->stage.destroy = aapoint_destroy;
    677 
    678    if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
    679       goto fail;
    680 
    681    return aapoint;
    682 
    683  fail:
    684    if (aapoint)
    685       aapoint->stage.destroy(&aapoint->stage);
    686 
    687    return NULL;
    688 
    689 }
    690 
    691 
    692 static struct aapoint_stage *
    693 aapoint_stage_from_pipe(struct pipe_context *pipe)
    694 {
    695    struct draw_context *draw = (struct draw_context *) pipe->draw;
    696    return aapoint_stage(draw->pipeline.aapoint);
    697 }
    698 
    699 
    700 /**
    701  * This function overrides the driver's create_fs_state() function and
    702  * will typically be called by the state tracker.
    703  */
    704 static void *
    705 aapoint_create_fs_state(struct pipe_context *pipe,
    706                        const struct pipe_shader_state *fs)
    707 {
    708    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
    709    struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
    710    if (!aafs)
    711       return NULL;
    712 
    713    aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
    714 
    715    /* pass-through */
    716    aafs->driver_fs = aapoint->driver_create_fs_state(pipe, fs);
    717 
    718    return aafs;
    719 }
    720 
    721 
    722 static void
    723 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
    724 {
    725    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
    726    struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
    727    /* save current */
    728    aapoint->fs = aafs;
    729    /* pass-through */
    730    aapoint->driver_bind_fs_state(pipe,
    731                                  (aafs ? aafs->driver_fs : NULL));
    732 }
    733 
    734 
    735 static void
    736 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
    737 {
    738    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
    739    struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
    740 
    741    /* pass-through */
    742    aapoint->driver_delete_fs_state(pipe, aafs->driver_fs);
    743 
    744    if (aafs->aapoint_fs)
    745       aapoint->driver_delete_fs_state(pipe, aafs->aapoint_fs);
    746 
    747    FREE((void*)aafs->state.tokens);
    748 
    749    FREE(aafs);
    750 }
    751 
    752 
    753 /**
    754  * Called by drivers that want to install this AA point prim stage
    755  * into the draw module's pipeline.  This will not be used if the
    756  * hardware has native support for AA points.
    757  */
    758 boolean
    759 draw_install_aapoint_stage(struct draw_context *draw,
    760                            struct pipe_context *pipe)
    761 {
    762    struct aapoint_stage *aapoint;
    763 
    764    pipe->draw = (void *) draw;
    765 
    766    /*
    767     * Create / install AA point drawing / prim stage
    768     */
    769    aapoint = draw_aapoint_stage( draw );
    770    if (!aapoint)
    771       return FALSE;
    772 
    773    /* save original driver functions */
    774    aapoint->driver_create_fs_state = pipe->create_fs_state;
    775    aapoint->driver_bind_fs_state = pipe->bind_fs_state;
    776    aapoint->driver_delete_fs_state = pipe->delete_fs_state;
    777 
    778    /* override the driver's functions */
    779    pipe->create_fs_state = aapoint_create_fs_state;
    780    pipe->bind_fs_state = aapoint_bind_fs_state;
    781    pipe->delete_fs_state = aapoint_delete_fs_state;
    782 
    783    draw->pipeline.aapoint = &aapoint->stage;
    784 
    785    return TRUE;
    786 }
    787