Home | History | Annotate | Download | only in r300
      1 /*
      2  * Copyright 2008 Corbin Simpson <MostAwesomeDude (at) gmail.com>
      3  * Copyright 2009 Marek Olk <maraeo (at) gmail.com>
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * on the rights to use, copy, modify, merge, publish, distribute, sub
      9  * license, and/or sell copies of the Software, and to permit persons to whom
     10  * the Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice (including the next
     13  * paragraph) shall be included in all copies or substantial portions of the
     14  * Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     22  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
     23 
     24 #include "draw/draw_context.h"
     25 
     26 #include "util/u_framebuffer.h"
     27 #include "util/u_half.h"
     28 #include "util/u_math.h"
     29 #include "util/u_mm.h"
     30 #include "util/u_memory.h"
     31 #include "util/u_pack_color.h"
     32 #include "util/u_transfer.h"
     33 
     34 #include "tgsi/tgsi_parse.h"
     35 
     36 #include "pipe/p_config.h"
     37 
     38 #include "r300_cb.h"
     39 #include "r300_context.h"
     40 #include "r300_emit.h"
     41 #include "r300_reg.h"
     42 #include "r300_screen.h"
     43 #include "r300_screen_buffer.h"
     44 #include "r300_state_inlines.h"
     45 #include "r300_fs.h"
     46 #include "r300_texture.h"
     47 #include "r300_vs.h"
     48 
     49 /* r300_state: Functions used to intialize state context by translating
     50  * Gallium state objects into semi-native r300 state objects. */
     51 
     52 #define UPDATE_STATE(cso, atom) \
     53     if (cso != atom.state) { \
     54         atom.state = cso;    \
     55         r300_mark_atom_dirty(r300, &(atom));   \
     56     }
     57 
     58 static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA,
     59                                             unsigned dstRGB, unsigned dstA)
     60 {
     61     /* If the blend equation is ADD or REVERSE_SUBTRACT,
     62      * SRC_ALPHA == 0, and the following state is set, the colorbuffer
     63      * will not be changed.
     64      * Notice that the dst factors are the src factors inverted. */
     65     return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
     66             srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
     67             srcRGB == PIPE_BLENDFACTOR_ZERO) &&
     68            (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
     69             srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
     70             srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
     71             srcA == PIPE_BLENDFACTOR_ZERO) &&
     72            (dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
     73             dstRGB == PIPE_BLENDFACTOR_ONE) &&
     74            (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
     75             dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
     76             dstA == PIPE_BLENDFACTOR_ONE);
     77 }
     78 
     79 static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA,
     80                                             unsigned dstRGB, unsigned dstA)
     81 {
     82     /* If the blend equation is ADD or REVERSE_SUBTRACT,
     83      * SRC_ALPHA == 1, and the following state is set, the colorbuffer
     84      * will not be changed.
     85      * Notice that the dst factors are the src factors inverted. */
     86     return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
     87             srcRGB == PIPE_BLENDFACTOR_ZERO) &&
     88            (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
     89             srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
     90             srcA == PIPE_BLENDFACTOR_ZERO) &&
     91            (dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
     92             dstRGB == PIPE_BLENDFACTOR_ONE) &&
     93            (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
     94             dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
     95             dstA == PIPE_BLENDFACTOR_ONE);
     96 }
     97 
     98 static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA,
     99                                             unsigned dstRGB, unsigned dstA)
    100 {
    101     /* If the blend equation is ADD or REVERSE_SUBTRACT,
    102      * SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer
    103      * will not be changed.
    104      * Notice that the dst factors are the src factors inverted. */
    105     return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
    106             srcRGB == PIPE_BLENDFACTOR_ZERO) &&
    107            (srcA == PIPE_BLENDFACTOR_ZERO) &&
    108            (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
    109             dstRGB == PIPE_BLENDFACTOR_ONE) &&
    110            (dstA == PIPE_BLENDFACTOR_ONE);
    111 }
    112 
    113 static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA,
    114                                             unsigned dstRGB, unsigned dstA)
    115 {
    116     /* If the blend equation is ADD or REVERSE_SUBTRACT,
    117      * SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer
    118      * will not be changed.
    119      * Notice that the dst factors are the src factors inverted. */
    120     return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
    121             srcRGB == PIPE_BLENDFACTOR_ZERO) &&
    122            (srcA == PIPE_BLENDFACTOR_ZERO) &&
    123            (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
    124             dstRGB == PIPE_BLENDFACTOR_ONE) &&
    125            (dstA == PIPE_BLENDFACTOR_ONE);
    126 }
    127 
    128 static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA,
    129                                                   unsigned dstRGB, unsigned dstA)
    130 {
    131     /* If the blend equation is ADD or REVERSE_SUBTRACT,
    132      * SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set,
    133      * the colorbuffer will not be changed.
    134      * Notice that the dst factors are the src factors inverted. */
    135     return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
    136             srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
    137             srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
    138             srcRGB == PIPE_BLENDFACTOR_ZERO) &&
    139            (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
    140             srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
    141             srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
    142             srcA == PIPE_BLENDFACTOR_ZERO) &&
    143            (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
    144             dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
    145             dstRGB == PIPE_BLENDFACTOR_ONE) &&
    146            (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
    147             dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
    148             dstA == PIPE_BLENDFACTOR_ONE);
    149 }
    150 
    151 static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA,
    152                                                   unsigned dstRGB, unsigned dstA)
    153 {
    154     /* If the blend equation is ADD or REVERSE_SUBTRACT,
    155      * SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set,
    156      * the colorbuffer will not be changed.
    157      * Notice that the dst factors are the src factors inverted. */
    158     return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
    159             srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
    160             srcRGB == PIPE_BLENDFACTOR_ZERO) &&
    161            (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
    162             srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
    163             srcA == PIPE_BLENDFACTOR_ZERO) &&
    164            (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
    165             dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
    166             dstRGB == PIPE_BLENDFACTOR_ONE) &&
    167            (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
    168             dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
    169             dstA == PIPE_BLENDFACTOR_ONE);
    170 }
    171 
    172 /* The hardware colormask is clunky a must be swizzled depending on the format.
    173  * This was figured out by trial-and-error. */
    174 static unsigned bgra_cmask(unsigned mask)
    175 {
    176     return ((mask & PIPE_MASK_R) << 2) |
    177            ((mask & PIPE_MASK_B) >> 2) |
    178            (mask & (PIPE_MASK_G | PIPE_MASK_A));
    179 }
    180 
    181 static unsigned rgba_cmask(unsigned mask)
    182 {
    183     return mask & PIPE_MASK_RGBA;
    184 }
    185 
    186 static unsigned rrrr_cmask(unsigned mask)
    187 {
    188     return (mask & PIPE_MASK_R) |
    189            ((mask & PIPE_MASK_R) << 1) |
    190            ((mask & PIPE_MASK_R) << 2) |
    191            ((mask & PIPE_MASK_R) << 3);
    192 }
    193 
    194 static unsigned aaaa_cmask(unsigned mask)
    195 {
    196     return ((mask & PIPE_MASK_A) >> 3) |
    197            ((mask & PIPE_MASK_A) >> 2) |
    198            ((mask & PIPE_MASK_A) >> 1) |
    199            (mask & PIPE_MASK_A);
    200 }
    201 
    202 static unsigned grrg_cmask(unsigned mask)
    203 {
    204     return ((mask & PIPE_MASK_R) << 1) |
    205            ((mask & PIPE_MASK_R) << 2) |
    206            ((mask & PIPE_MASK_G) >> 1) |
    207            ((mask & PIPE_MASK_G) << 2);
    208 }
    209 
    210 static unsigned arra_cmask(unsigned mask)
    211 {
    212     return ((mask & PIPE_MASK_R) << 1) |
    213            ((mask & PIPE_MASK_R) << 2) |
    214            ((mask & PIPE_MASK_A) >> 3) |
    215            (mask & PIPE_MASK_A);
    216 }
    217 
    218 /* Create a new blend state based on the CSO blend state.
    219  *
    220  * This encompasses alpha blending, logic/raster ops, and blend dithering. */
    221 static void* r300_create_blend_state(struct pipe_context* pipe,
    222                                      const struct pipe_blend_state* state)
    223 {
    224     struct r300_screen* r300screen = r300_screen(pipe->screen);
    225     struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state);
    226     uint32_t blend_control = 0;       /* R300_RB3D_CBLEND: 0x4e04 */
    227     uint32_t blend_control_noclamp = 0;    /* R300_RB3D_CBLEND: 0x4e04 */
    228     uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */
    229     uint32_t alpha_blend_control_noclamp = 0; /* R300_RB3D_ABLEND: 0x4e08 */
    230     uint32_t rop = 0;                 /* R300_RB3D_ROPCNTL: 0x4e18 */
    231     uint32_t dither = 0;              /* R300_RB3D_DITHER_CTL: 0x4e50 */
    232     int i;
    233     CB_LOCALS;
    234 
    235     blend->state = *state;
    236 
    237     if (state->rt[0].blend_enable)
    238     {
    239         unsigned eqRGB = state->rt[0].rgb_func;
    240         unsigned srcRGB = state->rt[0].rgb_src_factor;
    241         unsigned dstRGB = state->rt[0].rgb_dst_factor;
    242 
    243         unsigned eqA = state->rt[0].alpha_func;
    244         unsigned srcA = state->rt[0].alpha_src_factor;
    245         unsigned dstA = state->rt[0].alpha_dst_factor;
    246 
    247         /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha,
    248          * this is just the crappy D3D naming */
    249         blend_control = blend_control_noclamp =
    250             R300_ALPHA_BLEND_ENABLE |
    251             ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) |
    252             ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT);
    253         blend_control |=
    254             r300_translate_blend_function(eqRGB, TRUE);
    255         blend_control_noclamp |=
    256             r300_translate_blend_function(eqRGB, FALSE);
    257 
    258         /* Optimization: some operations do not require the destination color.
    259          *
    260          * When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled,
    261          * otherwise blending gives incorrect results. It seems to be
    262          * a hardware bug. */
    263         if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN ||
    264             eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX ||
    265             dstRGB != PIPE_BLENDFACTOR_ZERO ||
    266             dstA != PIPE_BLENDFACTOR_ZERO ||
    267             srcRGB == PIPE_BLENDFACTOR_DST_COLOR ||
    268             srcRGB == PIPE_BLENDFACTOR_DST_ALPHA ||
    269             srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR ||
    270             srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
    271             srcA == PIPE_BLENDFACTOR_DST_COLOR ||
    272             srcA == PIPE_BLENDFACTOR_DST_ALPHA ||
    273             srcA == PIPE_BLENDFACTOR_INV_DST_COLOR ||
    274             srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
    275             srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) {
    276             /* Enable reading from the colorbuffer. */
    277             blend_control |= R300_READ_ENABLE;
    278             blend_control_noclamp |= R300_READ_ENABLE;
    279 
    280             if (r300screen->caps.is_r500) {
    281                 /* Optimization: Depending on incoming pixels, we can
    282                  * conditionally disable the reading in hardware... */
    283                 if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN &&
    284                     eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) {
    285                     /* Disable reading if SRC_ALPHA == 0. */
    286                     if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
    287                          dstRGB == PIPE_BLENDFACTOR_ZERO) &&
    288                         (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
    289                          dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
    290                          dstA == PIPE_BLENDFACTOR_ZERO) &&
    291                         (srcRGB != PIPE_BLENDFACTOR_DST_COLOR &&
    292                          srcRGB != PIPE_BLENDFACTOR_DST_ALPHA &&
    293                          srcRGB != PIPE_BLENDFACTOR_INV_DST_COLOR &&
    294                          srcRGB != PIPE_BLENDFACTOR_INV_DST_ALPHA)) {
    295                          blend_control |= R500_SRC_ALPHA_0_NO_READ;
    296                     }
    297 
    298                     /* Disable reading if SRC_ALPHA == 1. */
    299                     if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
    300                          dstRGB == PIPE_BLENDFACTOR_ZERO) &&
    301                         (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
    302                          dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
    303                          dstA == PIPE_BLENDFACTOR_ZERO) &&
    304                         (srcRGB != PIPE_BLENDFACTOR_DST_COLOR &&
    305                          srcRGB != PIPE_BLENDFACTOR_DST_ALPHA &&
    306                          srcRGB != PIPE_BLENDFACTOR_INV_DST_COLOR &&
    307                          srcRGB != PIPE_BLENDFACTOR_INV_DST_ALPHA)) {
    308                          blend_control |= R500_SRC_ALPHA_1_NO_READ;
    309                     }
    310                 }
    311             }
    312         }
    313 
    314         /* Optimization: discard pixels which don't change the colorbuffer.
    315          *
    316          * The code below is non-trivial and some math is involved.
    317          *
    318          * Discarding pixels must be disabled when FP16 AA is enabled.
    319          * This is a hardware bug. Also, this implementation wouldn't work
    320          * with FP blending enabled and equation clamping disabled.
    321          *
    322          * Equations other than ADD are rarely used and therefore won't be
    323          * optimized. */
    324         if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) &&
    325             (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) {
    326             /* ADD: X+Y
    327              * REVERSE_SUBTRACT: Y-X
    328              *
    329              * The idea is:
    330              * If X = src*srcFactor = 0 and Y = dst*dstFactor = 1,
    331              * then CB will not be changed.
    332              *
    333              * Given the srcFactor and dstFactor variables, we can derive
    334              * what src and dst should be equal to and discard appropriate
    335              * pixels.
    336              */
    337             if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) {
    338                 blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
    339             } else if (blend_discard_if_src_alpha_1(srcRGB, srcA,
    340                                                     dstRGB, dstA)) {
    341                 blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1;
    342             } else if (blend_discard_if_src_color_0(srcRGB, srcA,
    343                                                     dstRGB, dstA)) {
    344                 blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0;
    345             } else if (blend_discard_if_src_color_1(srcRGB, srcA,
    346                                                     dstRGB, dstA)) {
    347                 blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1;
    348             } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA,
    349                                                           dstRGB, dstA)) {
    350                 blend_control |=
    351                     R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0;
    352             } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA,
    353                                                           dstRGB, dstA)) {
    354                 blend_control |=
    355                     R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1;
    356             }
    357         }
    358 
    359         /* separate alpha */
    360         if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
    361             blend_control |= R300_SEPARATE_ALPHA_ENABLE;
    362             blend_control_noclamp |= R300_SEPARATE_ALPHA_ENABLE;
    363             alpha_blend_control = alpha_blend_control_noclamp =
    364                 (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) |
    365                 (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT);
    366             alpha_blend_control |=
    367                 r300_translate_blend_function(eqA, TRUE);
    368             alpha_blend_control_noclamp |=
    369                 r300_translate_blend_function(eqA, FALSE);
    370         }
    371     }
    372 
    373     /* PIPE_LOGICOP_* don't need to be translated, fortunately. */
    374     if (state->logicop_enable) {
    375         rop = R300_RB3D_ROPCNTL_ROP_ENABLE |
    376                 (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT;
    377     }
    378 
    379     /* Neither fglrx nor classic r300 ever set this, regardless of dithering
    380      * state. Since it's an optional implementation detail, we can leave it
    381      * out and never dither.
    382      *
    383      * This could be revisited if we ever get quality or conformance hints.
    384      *
    385     if (state->dither) {
    386         dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT |
    387                         R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT;
    388     }
    389     */
    390 
    391     /* Build a command buffer. */
    392     {
    393         unsigned (*func[COLORMASK_NUM_SWIZZLES])(unsigned) = {
    394             bgra_cmask,
    395             rgba_cmask,
    396             rrrr_cmask,
    397             aaaa_cmask,
    398             grrg_cmask,
    399             arra_cmask
    400         };
    401 
    402         for (i = 0; i < COLORMASK_NUM_SWIZZLES; i++) {
    403             BEGIN_CB(blend->cb_clamp[i], 8);
    404             OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
    405             OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
    406             OUT_CB(blend_control);
    407             OUT_CB(alpha_blend_control);
    408             OUT_CB(func[i](state->rt[0].colormask));
    409             OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
    410             END_CB;
    411         }
    412     }
    413 
    414     /* Build a command buffer. */
    415     BEGIN_CB(blend->cb_noclamp, 8);
    416     OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
    417     OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
    418     OUT_CB(blend_control_noclamp);
    419     OUT_CB(alpha_blend_control_noclamp);
    420     OUT_CB(rgba_cmask(state->rt[0].colormask));
    421     OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
    422     END_CB;
    423 
    424     /* The same as above, but with no colorbuffer reads and writes. */
    425     BEGIN_CB(blend->cb_no_readwrite, 8);
    426     OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
    427     OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
    428     OUT_CB(0);
    429     OUT_CB(0);
    430     OUT_CB(0);
    431     OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
    432     END_CB;
    433 
    434     return (void*)blend;
    435 }
    436 
    437 /* Bind blend state. */
    438 static void r300_bind_blend_state(struct pipe_context* pipe,
    439                                   void* state)
    440 {
    441     struct r300_context* r300 = r300_context(pipe);
    442 
    443     UPDATE_STATE(state, r300->blend_state);
    444 }
    445 
    446 /* Free blend state. */
    447 static void r300_delete_blend_state(struct pipe_context* pipe,
    448                                     void* state)
    449 {
    450     FREE(state);
    451 }
    452 
    453 /* Convert float to 10bit integer */
    454 static unsigned float_to_fixed10(float f)
    455 {
    456     return CLAMP((unsigned)(f * 1023.9f), 0, 1023);
    457 }
    458 
    459 /* Set blend color.
    460  * Setup both R300 and R500 registers, figure out later which one to write. */
    461 static void r300_set_blend_color(struct pipe_context* pipe,
    462                                  const struct pipe_blend_color* color)
    463 {
    464     struct r300_context* r300 = r300_context(pipe);
    465     struct pipe_framebuffer_state *fb = r300->fb_state.state;
    466     struct r300_blend_color_state *state =
    467         (struct r300_blend_color_state*)r300->blend_color_state.state;
    468     struct pipe_blend_color c;
    469     enum pipe_format format = fb->nr_cbufs ? fb->cbufs[0]->format : 0;
    470     float tmp;
    471     CB_LOCALS;
    472 
    473     state->state = *color; /* Save it, so that we can reuse it in set_fb_state */
    474     c = *color;
    475 
    476     /* The blend color is dependent on the colorbuffer format. */
    477     if (fb->nr_cbufs) {
    478         switch (format) {
    479         case PIPE_FORMAT_R8_UNORM:
    480         case PIPE_FORMAT_L8_UNORM:
    481         case PIPE_FORMAT_I8_UNORM:
    482             c.color[1] = c.color[0];
    483             break;
    484 
    485         case PIPE_FORMAT_A8_UNORM:
    486             c.color[1] = c.color[3];
    487             break;
    488 
    489         case PIPE_FORMAT_R8G8_UNORM:
    490             c.color[2] = c.color[1];
    491             break;
    492 
    493         case PIPE_FORMAT_L8A8_UNORM:
    494             c.color[2] = c.color[3];
    495             break;
    496 
    497         case PIPE_FORMAT_R8G8B8A8_UNORM:
    498         case PIPE_FORMAT_R8G8B8X8_UNORM:
    499             tmp = c.color[0];
    500             c.color[0] = c.color[2];
    501             c.color[2] = tmp;
    502             break;
    503 
    504         default:;
    505         }
    506     }
    507 
    508     if (r300->screen->caps.is_r500) {
    509         BEGIN_CB(state->cb, 3);
    510         OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);
    511 
    512         switch (format) {
    513         case PIPE_FORMAT_R16G16B16A16_FLOAT:
    514             OUT_CB(util_float_to_half(c.color[2]) |
    515                    (util_float_to_half(c.color[3]) << 16));
    516             OUT_CB(util_float_to_half(c.color[0]) |
    517                    (util_float_to_half(c.color[1]) << 16));
    518             break;
    519 
    520         default:
    521             OUT_CB(float_to_fixed10(c.color[0]) |
    522                    (float_to_fixed10(c.color[3]) << 16));
    523             OUT_CB(float_to_fixed10(c.color[2]) |
    524                    (float_to_fixed10(c.color[1]) << 16));
    525         }
    526 
    527         END_CB;
    528     } else {
    529         union util_color uc;
    530         util_pack_color(c.color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
    531 
    532         BEGIN_CB(state->cb, 2);
    533         OUT_CB_REG(R300_RB3D_BLEND_COLOR, uc.ui);
    534         END_CB;
    535     }
    536 
    537     r300_mark_atom_dirty(r300, &r300->blend_color_state);
    538 }
    539 
    540 static void r300_set_clip_state(struct pipe_context* pipe,
    541                                 const struct pipe_clip_state* state)
    542 {
    543     struct r300_context* r300 = r300_context(pipe);
    544     struct r300_clip_state *clip =
    545             (struct r300_clip_state*)r300->clip_state.state;
    546     CB_LOCALS;
    547 
    548     if (r300->screen->caps.has_tcl) {
    549         BEGIN_CB(clip->cb, r300->clip_state.size);
    550         OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG,
    551                    (r300->screen->caps.is_r500 ?
    552                     R500_PVS_UCP_START : R300_PVS_UCP_START));
    553         OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4);
    554         OUT_CB_TABLE(state->ucp, 6 * 4);
    555         END_CB;
    556 
    557         r300_mark_atom_dirty(r300, &r300->clip_state);
    558     } else {
    559         draw_set_clip_state(r300->draw, state);
    560     }
    561 }
    562 
    563 static void
    564 r300_set_sample_mask(struct pipe_context *pipe,
    565                      unsigned sample_mask)
    566 {
    567 }
    568 
    569 
    570 /* Create a new depth, stencil, and alpha state based on the CSO dsa state.
    571  *
    572  * This contains the depth buffer, stencil buffer, alpha test, and such.
    573  * On the Radeon, depth and stencil buffer setup are intertwined, which is
    574  * the reason for some of the strange-looking assignments across registers. */
    575 static void*
    576         r300_create_dsa_state(struct pipe_context* pipe,
    577                               const struct pipe_depth_stencil_alpha_state* state)
    578 {
    579     struct r300_capabilities *caps = &r300_screen(pipe->screen)->caps;
    580     struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state);
    581     CB_LOCALS;
    582 
    583     dsa->dsa = *state;
    584 
    585     /* Depth test setup. - separate write mask depth for decomp flush */
    586     if (state->depth.writemask) {
    587         dsa->z_buffer_control |= R300_Z_WRITE_ENABLE;
    588     }
    589 
    590     if (state->depth.enabled) {
    591         dsa->z_buffer_control |= R300_Z_ENABLE;
    592 
    593         dsa->z_stencil_control |=
    594             (r300_translate_depth_stencil_function(state->depth.func) <<
    595                 R300_Z_FUNC_SHIFT);
    596     } else {
    597         /* We must enable depth test, otherwise occlusion queries won't work. */
    598         dsa->z_buffer_control |= R300_Z_ENABLE;
    599         dsa->z_stencil_control |= R300_ZS_ALWAYS;
    600     }
    601 
    602     /* Stencil buffer setup. */
    603     if (state->stencil[0].enabled) {
    604         dsa->z_buffer_control |= R300_STENCIL_ENABLE;
    605         dsa->z_stencil_control |=
    606             (r300_translate_depth_stencil_function(state->stencil[0].func) <<
    607                 R300_S_FRONT_FUNC_SHIFT) |
    608             (r300_translate_stencil_op(state->stencil[0].fail_op) <<
    609                 R300_S_FRONT_SFAIL_OP_SHIFT) |
    610             (r300_translate_stencil_op(state->stencil[0].zpass_op) <<
    611                 R300_S_FRONT_ZPASS_OP_SHIFT) |
    612             (r300_translate_stencil_op(state->stencil[0].zfail_op) <<
    613                 R300_S_FRONT_ZFAIL_OP_SHIFT);
    614 
    615         dsa->stencil_ref_mask =
    616                 (state->stencil[0].valuemask << R300_STENCILMASK_SHIFT) |
    617                 (state->stencil[0].writemask << R300_STENCILWRITEMASK_SHIFT);
    618 
    619         if (state->stencil[1].enabled) {
    620             dsa->two_sided = TRUE;
    621 
    622             dsa->z_buffer_control |= R300_STENCIL_FRONT_BACK;
    623             dsa->z_stencil_control |=
    624             (r300_translate_depth_stencil_function(state->stencil[1].func) <<
    625                 R300_S_BACK_FUNC_SHIFT) |
    626             (r300_translate_stencil_op(state->stencil[1].fail_op) <<
    627                 R300_S_BACK_SFAIL_OP_SHIFT) |
    628             (r300_translate_stencil_op(state->stencil[1].zpass_op) <<
    629                 R300_S_BACK_ZPASS_OP_SHIFT) |
    630             (r300_translate_stencil_op(state->stencil[1].zfail_op) <<
    631                 R300_S_BACK_ZFAIL_OP_SHIFT);
    632 
    633             dsa->stencil_ref_bf =
    634                 (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) |
    635                 (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT);
    636 
    637             if (caps->is_r500) {
    638                 dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK;
    639             } else {
    640                 dsa->two_sided_stencil_ref =
    641                   (state->stencil[0].valuemask != state->stencil[1].valuemask ||
    642                    state->stencil[0].writemask != state->stencil[1].writemask);
    643             }
    644         }
    645     }
    646 
    647     /* Alpha test setup. */
    648     if (state->alpha.enabled) {
    649         dsa->alpha_function =
    650             r300_translate_alpha_function(state->alpha.func) |
    651             R300_FG_ALPHA_FUNC_ENABLE;
    652 
    653         dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value);
    654         dsa->alpha_value = util_float_to_half(state->alpha.ref_value);
    655 
    656         if (caps->is_r500) {
    657             dsa->alpha_function_fp16 = dsa->alpha_function |
    658                                        R500_FG_ALPHA_FUNC_FP16_ENABLE;
    659             dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT;
    660         }
    661     }
    662 
    663     BEGIN_CB(&dsa->cb_begin, 10);
    664     OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
    665     OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
    666     OUT_CB(dsa->z_buffer_control);
    667     OUT_CB(dsa->z_stencil_control);
    668     OUT_CB(dsa->stencil_ref_mask);
    669     OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf);
    670     OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value);
    671     END_CB;
    672 
    673     BEGIN_CB(&dsa->cb_begin_fp16, 10);
    674     OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function_fp16);
    675     OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
    676     OUT_CB(dsa->z_buffer_control);
    677     OUT_CB(dsa->z_stencil_control);
    678     OUT_CB(dsa->stencil_ref_mask);
    679     OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf);
    680     OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value);
    681     END_CB;
    682 
    683     /* We must enable depth test, otherwise occlusion queries won't work.
    684      * We setup a dummy zbuffer to silent the CS checker, see emit_fb_state. */
    685     BEGIN_CB(dsa->cb_zb_no_readwrite, 10);
    686     OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
    687     OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
    688     OUT_CB(R300_Z_ENABLE);
    689     OUT_CB(R300_ZS_ALWAYS);
    690     OUT_CB(0);
    691     OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0);
    692     OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value);
    693     END_CB;
    694 
    695     BEGIN_CB(dsa->cb_fp16_zb_no_readwrite, 10);
    696     OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function_fp16);
    697     OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
    698     OUT_CB(R300_Z_ENABLE);
    699     OUT_CB(R300_ZS_ALWAYS);
    700     OUT_CB(0);
    701     OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0);
    702     OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value);
    703     END_CB;
    704 
    705     return (void*)dsa;
    706 }
    707 
    708 static void r300_dsa_inject_stencilref(struct r300_context *r300)
    709 {
    710     struct r300_dsa_state *dsa =
    711             (struct r300_dsa_state*)r300->dsa_state.state;
    712 
    713     if (!dsa)
    714         return;
    715 
    716     dsa->stencil_ref_mask =
    717         (dsa->stencil_ref_mask & ~R300_STENCILREF_MASK) |
    718         r300->stencil_ref.ref_value[0];
    719     dsa->stencil_ref_bf =
    720         (dsa->stencil_ref_bf & ~R300_STENCILREF_MASK) |
    721         r300->stencil_ref.ref_value[1];
    722 }
    723 
    724 /* Bind DSA state. */
    725 static void r300_bind_dsa_state(struct pipe_context* pipe,
    726                                 void* state)
    727 {
    728     struct r300_context* r300 = r300_context(pipe);
    729 
    730     if (!state) {
    731         return;
    732     }
    733 
    734     UPDATE_STATE(state, r300->dsa_state);
    735 
    736     r300_mark_atom_dirty(r300, &r300->hyperz_state); /* Will be updated before the emission. */
    737     r300_dsa_inject_stencilref(r300);
    738 }
    739 
    740 /* Free DSA state. */
    741 static void r300_delete_dsa_state(struct pipe_context* pipe,
    742                                   void* state)
    743 {
    744     FREE(state);
    745 }
    746 
    747 static void r300_set_stencil_ref(struct pipe_context* pipe,
    748                                  const struct pipe_stencil_ref* sr)
    749 {
    750     struct r300_context* r300 = r300_context(pipe);
    751 
    752     r300->stencil_ref = *sr;
    753 
    754     r300_dsa_inject_stencilref(r300);
    755     r300_mark_atom_dirty(r300, &r300->dsa_state);
    756 }
    757 
    758 static void r300_tex_set_tiling_flags(struct r300_context *r300,
    759                                       struct r300_resource *tex,
    760                                       unsigned level)
    761 {
    762     /* Check if the macrotile flag needs to be changed.
    763      * Skip changing the flags otherwise. */
    764     if (tex->tex.macrotile[tex->surface_level] !=
    765         tex->tex.macrotile[level]) {
    766         r300->rws->buffer_set_tiling(tex->buf, r300->cs,
    767                 tex->tex.microtile, tex->tex.macrotile[level],
    768                 0, 0, 0, 0, 0,
    769                 tex->tex.stride_in_bytes[0]);
    770 
    771         tex->surface_level = level;
    772     }
    773 }
    774 
    775 /* This switcheroo is needed just because of goddamned MACRO_SWITCH. */
    776 static void r300_fb_set_tiling_flags(struct r300_context *r300,
    777                                const struct pipe_framebuffer_state *state)
    778 {
    779     unsigned i;
    780 
    781     /* Set tiling flags for new surfaces. */
    782     for (i = 0; i < state->nr_cbufs; i++) {
    783         r300_tex_set_tiling_flags(r300,
    784                                   r300_resource(state->cbufs[i]->texture),
    785                                   state->cbufs[i]->u.tex.level);
    786     }
    787     if (state->zsbuf) {
    788         r300_tex_set_tiling_flags(r300,
    789                                   r300_resource(state->zsbuf->texture),
    790                                   state->zsbuf->u.tex.level);
    791     }
    792 }
    793 
    794 static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index,
    795                                     const char *binding)
    796 {
    797     struct pipe_resource *tex = surf->texture;
    798     struct r300_resource *rtex = r300_resource(tex);
    799 
    800     fprintf(stderr,
    801             "r300:   %s[%i] Dim: %ix%i, Firstlayer: %i, "
    802             "Lastlayer: %i, Level: %i, Format: %s\n"
    803 
    804             "r300:     TEX: Macro: %s, Micro: %s, "
    805             "Dim: %ix%ix%i, LastLevel: %i, Format: %s\n",
    806 
    807             binding, index, surf->width, surf->height,
    808             surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level,
    809             util_format_short_name(surf->format),
    810 
    811             rtex->tex.macrotile[0] ? "YES" : " NO",
    812             rtex->tex.microtile ? "YES" : " NO",
    813             tex->width0, tex->height0, tex->depth0,
    814             tex->last_level, util_format_short_name(surf->format));
    815 }
    816 
    817 void r300_mark_fb_state_dirty(struct r300_context *r300,
    818                               enum r300_fb_state_change change)
    819 {
    820     struct pipe_framebuffer_state *state = r300->fb_state.state;
    821 
    822     r300_mark_atom_dirty(r300, &r300->gpu_flush);
    823     r300_mark_atom_dirty(r300, &r300->fb_state);
    824 
    825     /* What is marked as dirty depends on the enum r300_fb_state_change. */
    826     if (change == R300_CHANGED_FB_STATE) {
    827         r300_mark_atom_dirty(r300, &r300->aa_state);
    828         r300_mark_atom_dirty(r300, &r300->dsa_state); /* for AlphaRef */
    829         r300_set_blend_color(&r300->context, r300->blend_color_state.state);
    830     }
    831 
    832     if (change == R300_CHANGED_FB_STATE ||
    833         change == R300_CHANGED_HYPERZ_FLAG) {
    834         r300_mark_atom_dirty(r300, &r300->hyperz_state);
    835     }
    836 
    837     if (change == R300_CHANGED_FB_STATE ||
    838         change == R300_CHANGED_MULTIWRITE) {
    839         r300_mark_atom_dirty(r300, &r300->fb_state_pipelined);
    840     }
    841 
    842     /* Now compute the fb_state atom size. */
    843     r300->fb_state.size = 2 + (8 * state->nr_cbufs);
    844 
    845     if (r300->cbzb_clear) {
    846         r300->fb_state.size += 10;
    847     } else if (state->zsbuf) {
    848         r300->fb_state.size += 10;
    849         if (r300->hyperz_enabled)
    850             r300->fb_state.size += 8;
    851     } else if (state->nr_cbufs) {
    852         r300->fb_state.size += 10;
    853     }
    854 
    855     /* The size of the rest of atoms stays the same. */
    856 }
    857 
    858 static void
    859 r300_set_framebuffer_state(struct pipe_context* pipe,
    860                            const struct pipe_framebuffer_state* state)
    861 {
    862     struct r300_context* r300 = r300_context(pipe);
    863     struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
    864     struct pipe_framebuffer_state *old_state = r300->fb_state.state;
    865     unsigned max_width, max_height, i;
    866     uint32_t zbuffer_bpp = 0;
    867     boolean unlock_zbuffer = FALSE;
    868 
    869     if (r300->screen->caps.is_r500) {
    870         max_width = max_height = 4096;
    871     } else if (r300->screen->caps.is_r400) {
    872         max_width = max_height = 4021;
    873     } else {
    874         max_width = max_height = 2560;
    875     }
    876 
    877     if (state->width > max_width || state->height > max_height) {
    878         fprintf(stderr, "r300: Implementation error: Render targets are too "
    879         "big in %s, refusing to bind framebuffer state!\n", __FUNCTION__);
    880         return;
    881     }
    882 
    883     if (old_state->zsbuf && r300->zmask_in_use && !r300->locked_zbuffer) {
    884         /* There is a zmask in use, what are we gonna do? */
    885         if (state->zsbuf) {
    886             if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) {
    887                 /* Decompress the currently bound zbuffer before we bind another one. */
    888                 r300_decompress_zmask(r300);
    889                 r300->hiz_in_use = FALSE;
    890             }
    891         } else {
    892             /* We don't bind another zbuffer, so lock the current one. */
    893             pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf);
    894         }
    895     } else if (r300->locked_zbuffer) {
    896         /* We have a locked zbuffer now, what are we gonna do? */
    897         if (state->zsbuf) {
    898             if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) {
    899                 /* We are binding some other zbuffer, so decompress the locked one,
    900                  * it gets unlocked automatically. */
    901                 r300_decompress_zmask_locked_unsafe(r300);
    902                 r300->hiz_in_use = FALSE;
    903             } else {
    904                 /* We are binding the locked zbuffer again, so unlock it. */
    905                 unlock_zbuffer = TRUE;
    906             }
    907         }
    908     }
    909     assert(state->zsbuf || (r300->locked_zbuffer && !unlock_zbuffer) || !r300->zmask_in_use);
    910 
    911     /* Need to reset clamping or colormask. */
    912     r300_mark_atom_dirty(r300, &r300->blend_state);
    913 
    914     /* Re-swizzle the blend color. */
    915     r300_set_blend_color(pipe, &((struct r300_blend_color_state*)r300->blend_color_state.state)->state);
    916 
    917     /* If zsbuf is set from NULL to non-NULL or vice versa.. */
    918     if (!!old_state->zsbuf != !!state->zsbuf) {
    919         r300_mark_atom_dirty(r300, &r300->dsa_state);
    920     }
    921 
    922     if (r300->screen->info.drm_minor < 12) {
    923        /* The tiling flags are dependent on the surface miplevel, unfortunately.
    924         * This workarounds a bad design decision in old kernels which were
    925         * rewriting tile fields in registers. */
    926         r300_fb_set_tiling_flags(r300, state);
    927     }
    928 
    929     util_copy_framebuffer_state(r300->fb_state.state, state);
    930 
    931     if (unlock_zbuffer) {
    932         pipe_surface_reference(&r300->locked_zbuffer, NULL);
    933     }
    934 
    935     r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
    936 
    937     if (state->zsbuf) {
    938         switch (util_format_get_blocksize(state->zsbuf->format)) {
    939         case 2:
    940             zbuffer_bpp = 16;
    941             break;
    942         case 4:
    943             zbuffer_bpp = 24;
    944             break;
    945         }
    946 
    947         /* Polygon offset depends on the zbuffer bit depth. */
    948         if (r300->zbuffer_bpp != zbuffer_bpp) {
    949             r300->zbuffer_bpp = zbuffer_bpp;
    950 
    951             if (r300->polygon_offset_enabled)
    952                 r300_mark_atom_dirty(r300, &r300->rs_state);
    953         }
    954     }
    955 
    956     /* Set up AA config. */
    957     if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) {
    958         aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE;
    959 
    960         switch (state->cbufs[0]->texture->nr_samples) {
    961         case 2:
    962             aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
    963             break;
    964         case 3:
    965             aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3;
    966             break;
    967         case 4:
    968             aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
    969             break;
    970         case 6:
    971             aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
    972             break;
    973         }
    974     } else {
    975         aa->aa_config = 0;
    976     }
    977 
    978     if (DBG_ON(r300, DBG_FB)) {
    979         fprintf(stderr, "r300: set_framebuffer_state:\n");
    980         for (i = 0; i < state->nr_cbufs; i++) {
    981             r300_print_fb_surf_info(state->cbufs[i], i, "CB");
    982         }
    983         if (state->zsbuf) {
    984             r300_print_fb_surf_info(state->zsbuf, 0, "ZB");
    985         }
    986     }
    987 }
    988 
    989 /* Create fragment shader state. */
    990 static void* r300_create_fs_state(struct pipe_context* pipe,
    991                                   const struct pipe_shader_state* shader)
    992 {
    993     struct r300_fragment_shader* fs = NULL;
    994 
    995     fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader);
    996 
    997     /* Copy state directly into shader. */
    998     fs->state = *shader;
    999     fs->state.tokens = tgsi_dup_tokens(shader->tokens);
   1000 
   1001     return (void*)fs;
   1002 }
   1003 
   1004 void r300_mark_fs_code_dirty(struct r300_context *r300)
   1005 {
   1006     struct r300_fragment_shader* fs = r300_fs(r300);
   1007 
   1008     r300_mark_atom_dirty(r300, &r300->fs);
   1009     r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
   1010     r300_mark_atom_dirty(r300, &r300->fs_constants);
   1011     r300->fs.size = fs->shader->cb_code_size;
   1012 
   1013     if (r300->screen->caps.is_r500) {
   1014         r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 7;
   1015         r300->fs_constants.size = fs->shader->externals_count * 4 + 3;
   1016     } else {
   1017         r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 5;
   1018         r300->fs_constants.size = fs->shader->externals_count * 4 + 1;
   1019     }
   1020 
   1021     ((struct r300_constant_buffer*)r300->fs_constants.state)->remap_table =
   1022             fs->shader->code.constants_remap_table;
   1023 }
   1024 
   1025 /* Bind fragment shader state. */
   1026 static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
   1027 {
   1028     struct r300_context* r300 = r300_context(pipe);
   1029     struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
   1030 
   1031     if (fs == NULL) {
   1032         r300->fs.state = NULL;
   1033         return;
   1034     }
   1035 
   1036     r300->fs.state = fs;
   1037     r300->fs_status = FRAGMENT_SHADER_DIRTY;
   1038 
   1039     r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */
   1040 }
   1041 
   1042 /* Delete fragment shader state. */
   1043 static void r300_delete_fs_state(struct pipe_context* pipe, void* shader)
   1044 {
   1045     struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
   1046     struct r300_fragment_shader_code *tmp, *ptr = fs->first;
   1047 
   1048     while (ptr) {
   1049         tmp = ptr;
   1050         ptr = ptr->next;
   1051         rc_constants_destroy(&tmp->code.constants);
   1052         FREE(tmp->cb_code);
   1053         FREE(tmp);
   1054     }
   1055     FREE((void*)fs->state.tokens);
   1056     FREE(shader);
   1057 }
   1058 
   1059 static void r300_set_polygon_stipple(struct pipe_context* pipe,
   1060                                      const struct pipe_poly_stipple* state)
   1061 {
   1062     /* XXX no idea how to set this up, but not terribly important */
   1063 }
   1064 
   1065 /* Create a new rasterizer state based on the CSO rasterizer state.
   1066  *
   1067  * This is a very large chunk of state, and covers most of the graphics
   1068  * backend (GB), geometry assembly (GA), and setup unit (SU) blocks.
   1069  *
   1070  * In a not entirely unironic sidenote, this state has nearly nothing to do
   1071  * with the actual block on the Radeon called the rasterizer (RS). */
   1072 static void* r300_create_rs_state(struct pipe_context* pipe,
   1073                                   const struct pipe_rasterizer_state* state)
   1074 {
   1075     struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state);
   1076     uint32_t vap_control_status;    /* R300_VAP_CNTL_STATUS: 0x2140 */
   1077     uint32_t vap_clip_cntl;         /* R300_VAP_CLIP_CNTL: 0x221C */
   1078     uint32_t point_size;            /* R300_GA_POINT_SIZE: 0x421c */
   1079     uint32_t point_minmax;          /* R300_GA_POINT_MINMAX: 0x4230 */
   1080     uint32_t line_control;          /* R300_GA_LINE_CNTL: 0x4234 */
   1081     uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */
   1082     uint32_t cull_mode;             /* R300_SU_CULL_MODE: 0x42b8 */
   1083     uint32_t line_stipple_config;   /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
   1084     uint32_t line_stipple_value;    /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
   1085     uint32_t polygon_mode;          /* R300_GA_POLY_MODE: 0x4288 */
   1086     uint32_t clip_rule;             /* R300_SC_CLIP_RULE: 0x43D0 */
   1087     uint32_t round_mode;            /* R300_GA_ROUND_MODE: 0x428c */
   1088 
   1089     /* Point sprites texture coordinates, 0: lower left, 1: upper right */
   1090     float point_texcoord_left = 0;  /* R300_GA_POINT_S0: 0x4200 */
   1091     float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */
   1092     float point_texcoord_right = 1; /* R300_GA_POINT_S1: 0x4208 */
   1093     float point_texcoord_top = 0;   /* R300_GA_POINT_T1: 0x420c */
   1094     boolean vclamp = state->clamp_vertex_color ||
   1095                      !r300_context(pipe)->screen->caps.is_r500;
   1096     CB_LOCALS;
   1097 
   1098     /* Copy rasterizer state. */
   1099     rs->rs = *state;
   1100     rs->rs_draw = *state;
   1101 
   1102     rs->rs.sprite_coord_enable = state->point_quad_rasterization *
   1103                                  state->sprite_coord_enable;
   1104 
   1105     /* Override some states for Draw. */
   1106     rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */
   1107     rs->rs_draw.offset_point = 0;
   1108     rs->rs_draw.offset_line = 0;
   1109     rs->rs_draw.offset_tri = 0;
   1110     rs->rs_draw.offset_clamp = 0;
   1111 
   1112 #ifdef PIPE_ARCH_LITTLE_ENDIAN
   1113     vap_control_status = R300_VC_NO_SWAP;
   1114 #else
   1115     vap_control_status = R300_VC_32BIT_SWAP;
   1116 #endif
   1117 
   1118     /* If no TCL engine is present, turn off the HW TCL. */
   1119     if (!r300_screen(pipe->screen)->caps.has_tcl) {
   1120         vap_control_status |= R300_VAP_TCL_BYPASS;
   1121     }
   1122 
   1123     /* Point size width and height. */
   1124     point_size =
   1125         pack_float_16_6x(state->point_size) |
   1126         (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT);
   1127 
   1128     /* Point size clamping. */
   1129     if (state->point_size_per_vertex) {
   1130         /* Per-vertex point size.
   1131          * Clamp to [0, max FB size] */
   1132         float min_psiz = util_get_min_point_size(state);
   1133         float max_psiz = pipe->screen->get_paramf(pipe->screen,
   1134                                         PIPE_CAPF_MAX_POINT_WIDTH);
   1135         point_minmax =
   1136             (pack_float_16_6x(min_psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) |
   1137             (pack_float_16_6x(max_psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT);
   1138     } else {
   1139         /* We cannot disable the point-size vertex output,
   1140          * so clamp it. */
   1141         float psiz = state->point_size;
   1142         point_minmax =
   1143             (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) |
   1144             (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT);
   1145     }
   1146 
   1147     /* Line control. */
   1148     line_control = pack_float_16_6x(state->line_width) |
   1149         R300_GA_LINE_CNTL_END_TYPE_COMP;
   1150 
   1151     /* Enable polygon mode */
   1152     polygon_mode = 0;
   1153     if (state->fill_front != PIPE_POLYGON_MODE_FILL ||
   1154         state->fill_back != PIPE_POLYGON_MODE_FILL) {
   1155         polygon_mode = R300_GA_POLY_MODE_DUAL;
   1156     }
   1157 
   1158     /* Front face */
   1159     if (state->front_ccw)
   1160         cull_mode = R300_FRONT_FACE_CCW;
   1161     else
   1162         cull_mode = R300_FRONT_FACE_CW;
   1163 
   1164     /* Polygon offset */
   1165     polygon_offset_enable = 0;
   1166     if (util_get_offset(state, state->fill_front)) {
   1167        polygon_offset_enable |= R300_FRONT_ENABLE;
   1168     }
   1169     if (util_get_offset(state, state->fill_back)) {
   1170        polygon_offset_enable |= R300_BACK_ENABLE;
   1171     }
   1172 
   1173     rs->polygon_offset_enable = polygon_offset_enable != 0;
   1174 
   1175     /* Polygon mode */
   1176     if (polygon_mode) {
   1177        polygon_mode |=
   1178           r300_translate_polygon_mode_front(state->fill_front);
   1179        polygon_mode |=
   1180           r300_translate_polygon_mode_back(state->fill_back);
   1181     }
   1182 
   1183     if (state->cull_face & PIPE_FACE_FRONT) {
   1184         cull_mode |= R300_CULL_FRONT;
   1185     }
   1186     if (state->cull_face & PIPE_FACE_BACK) {
   1187         cull_mode |= R300_CULL_BACK;
   1188     }
   1189 
   1190     if (state->line_stipple_enable) {
   1191         line_stipple_config =
   1192             R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE |
   1193             (fui((float)state->line_stipple_factor) &
   1194                 R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK);
   1195         /* XXX this might need to be scaled up */
   1196         line_stipple_value = state->line_stipple_pattern;
   1197     } else {
   1198         line_stipple_config = 0;
   1199         line_stipple_value = 0;
   1200     }
   1201 
   1202     if (state->flatshade) {
   1203         rs->color_control = R300_SHADE_MODEL_FLAT;
   1204     } else {
   1205         rs->color_control = R300_SHADE_MODEL_SMOOTH;
   1206     }
   1207 
   1208     clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
   1209 
   1210     /* Point sprites coord mode */
   1211     if (rs->rs.sprite_coord_enable) {
   1212         switch (state->sprite_coord_mode) {
   1213             case PIPE_SPRITE_COORD_UPPER_LEFT:
   1214                 point_texcoord_top = 0.0f;
   1215                 point_texcoord_bottom = 1.0f;
   1216                 break;
   1217             case PIPE_SPRITE_COORD_LOWER_LEFT:
   1218                 point_texcoord_top = 1.0f;
   1219                 point_texcoord_bottom = 0.0f;
   1220                 break;
   1221         }
   1222     }
   1223 
   1224     if (r300_screen(pipe->screen)->caps.has_tcl) {
   1225        vap_clip_cntl = (state->clip_plane_enable & 63) |
   1226                        R300_PS_UCP_MODE_CLIP_AS_TRIFAN;
   1227     } else {
   1228        vap_clip_cntl = R300_CLIP_DISABLE;
   1229     }
   1230 
   1231     /* Vertex color clamping. FP20 means no clamping. */
   1232     round_mode =
   1233       R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST |
   1234       (!vclamp ? (R300_GA_ROUND_MODE_RGB_CLAMP_FP20 |
   1235                   R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20) : 0);
   1236 
   1237     /* Build the main command buffer. */
   1238     BEGIN_CB(rs->cb_main, RS_STATE_MAIN_SIZE);
   1239     OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status);
   1240     OUT_CB_REG(R300_VAP_CLIP_CNTL, vap_clip_cntl);
   1241     OUT_CB_REG(R300_GA_POINT_SIZE, point_size);
   1242     OUT_CB_REG_SEQ(R300_GA_POINT_MINMAX, 2);
   1243     OUT_CB(point_minmax);
   1244     OUT_CB(line_control);
   1245     OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2);
   1246     OUT_CB(polygon_offset_enable);
   1247     rs->cull_mode_index = 11;
   1248     OUT_CB(cull_mode);
   1249     OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config);
   1250     OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value);
   1251     OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode);
   1252     OUT_CB_REG(R300_GA_ROUND_MODE, round_mode);
   1253     OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule);
   1254     OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4);
   1255     OUT_CB_32F(point_texcoord_left);
   1256     OUT_CB_32F(point_texcoord_bottom);
   1257     OUT_CB_32F(point_texcoord_right);
   1258     OUT_CB_32F(point_texcoord_top);
   1259     END_CB;
   1260 
   1261     /* Build the two command buffers for polygon offset setup. */
   1262     if (polygon_offset_enable) {
   1263         float scale = state->offset_scale * 12;
   1264         float offset = state->offset_units * 4;
   1265 
   1266         BEGIN_CB(rs->cb_poly_offset_zb16, 5);
   1267         OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
   1268         OUT_CB_32F(scale);
   1269         OUT_CB_32F(offset);
   1270         OUT_CB_32F(scale);
   1271         OUT_CB_32F(offset);
   1272         END_CB;
   1273 
   1274         offset = state->offset_units * 2;
   1275 
   1276         BEGIN_CB(rs->cb_poly_offset_zb24, 5);
   1277         OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
   1278         OUT_CB_32F(scale);
   1279         OUT_CB_32F(offset);
   1280         OUT_CB_32F(scale);
   1281         OUT_CB_32F(offset);
   1282         END_CB;
   1283     }
   1284 
   1285     return (void*)rs;
   1286 }
   1287 
   1288 /* Bind rasterizer state. */
   1289 static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
   1290 {
   1291     struct r300_context* r300 = r300_context(pipe);
   1292     struct r300_rs_state* rs = (struct r300_rs_state*)state;
   1293     int last_sprite_coord_enable = r300->sprite_coord_enable;
   1294     boolean last_two_sided_color = r300->two_sided_color;
   1295 
   1296     if (r300->draw && rs) {
   1297         draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state);
   1298     }
   1299 
   1300     if (rs) {
   1301         r300->polygon_offset_enabled = rs->polygon_offset_enable;
   1302         r300->sprite_coord_enable = rs->rs.sprite_coord_enable;
   1303         r300->two_sided_color = rs->rs.light_twoside;
   1304     } else {
   1305         r300->polygon_offset_enabled = FALSE;
   1306         r300->sprite_coord_enable = 0;
   1307         r300->two_sided_color = FALSE;
   1308     }
   1309 
   1310     UPDATE_STATE(state, r300->rs_state);
   1311     r300->rs_state.size = RS_STATE_MAIN_SIZE + (r300->polygon_offset_enabled ? 5 : 0);
   1312 
   1313     if (last_sprite_coord_enable != r300->sprite_coord_enable ||
   1314         last_two_sided_color != r300->two_sided_color) {
   1315         r300_mark_atom_dirty(r300, &r300->rs_block_state);
   1316     }
   1317 }
   1318 
   1319 /* Free rasterizer state. */
   1320 static void r300_delete_rs_state(struct pipe_context* pipe, void* state)
   1321 {
   1322     FREE(state);
   1323 }
   1324 
   1325 static void*
   1326         r300_create_sampler_state(struct pipe_context* pipe,
   1327                                   const struct pipe_sampler_state* state)
   1328 {
   1329     struct r300_context* r300 = r300_context(pipe);
   1330     struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state);
   1331     boolean is_r500 = r300->screen->caps.is_r500;
   1332     int lod_bias;
   1333 
   1334     sampler->state = *state;
   1335 
   1336     /* r300 doesn't handle CLAMP and MIRROR_CLAMP correctly when either MAG
   1337      * or MIN filter is NEAREST. Since texwrap produces same results
   1338      * for CLAMP and CLAMP_TO_EDGE, we use them instead. */
   1339     if (sampler->state.min_img_filter == PIPE_TEX_FILTER_NEAREST ||
   1340         sampler->state.mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
   1341         /* Wrap S. */
   1342         if (sampler->state.wrap_s == PIPE_TEX_WRAP_CLAMP)
   1343             sampler->state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
   1344         else if (sampler->state.wrap_s == PIPE_TEX_WRAP_MIRROR_CLAMP)
   1345             sampler->state.wrap_s = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
   1346 
   1347         /* Wrap T. */
   1348         if (sampler->state.wrap_t == PIPE_TEX_WRAP_CLAMP)
   1349             sampler->state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
   1350         else if (sampler->state.wrap_t == PIPE_TEX_WRAP_MIRROR_CLAMP)
   1351             sampler->state.wrap_t = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
   1352 
   1353         /* Wrap R. */
   1354         if (sampler->state.wrap_r == PIPE_TEX_WRAP_CLAMP)
   1355             sampler->state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
   1356         else if (sampler->state.wrap_r == PIPE_TEX_WRAP_MIRROR_CLAMP)
   1357             sampler->state.wrap_r = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
   1358     }
   1359 
   1360     sampler->filter0 |=
   1361         (r300_translate_wrap(sampler->state.wrap_s) << R300_TX_WRAP_S_SHIFT) |
   1362         (r300_translate_wrap(sampler->state.wrap_t) << R300_TX_WRAP_T_SHIFT) |
   1363         (r300_translate_wrap(sampler->state.wrap_r) << R300_TX_WRAP_R_SHIFT);
   1364 
   1365     sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter,
   1366                                                    state->mag_img_filter,
   1367                                                    state->min_mip_filter,
   1368                                                    state->max_anisotropy > 1);
   1369 
   1370     sampler->filter0 |= r300_anisotropy(state->max_anisotropy);
   1371 
   1372     /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */
   1373     /* We must pass these to the merge function to clamp them properly. */
   1374     sampler->min_lod = (unsigned)MAX2(state->min_lod, 0);
   1375     sampler->max_lod = (unsigned)MAX2(ceilf(state->max_lod), 0);
   1376 
   1377     lod_bias = CLAMP((int)(state->lod_bias * 32 + 1), -(1 << 9), (1 << 9) - 1);
   1378 
   1379     sampler->filter1 |= (lod_bias << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK;
   1380 
   1381     /* This is very high quality anisotropic filtering for R5xx.
   1382      * It's good for benchmarking the performance of texturing but
   1383      * in practice we don't want to slow down the driver because it's
   1384      * a pretty good performance killer. Feel free to play with it. */
   1385     if (DBG_ON(r300, DBG_ANISOHQ) && is_r500) {
   1386         sampler->filter1 |= r500_anisotropy(state->max_anisotropy);
   1387     }
   1388 
   1389     /* R500-specific fixups and optimizations */
   1390     if (r300->screen->caps.is_r500) {
   1391         sampler->filter1 |= R500_BORDER_FIX;
   1392     }
   1393 
   1394     return (void*)sampler;
   1395 }
   1396 
   1397 static void r300_bind_sampler_states(struct pipe_context* pipe,
   1398                                      unsigned count,
   1399                                      void** states)
   1400 {
   1401     struct r300_context* r300 = r300_context(pipe);
   1402     struct r300_textures_state* state =
   1403         (struct r300_textures_state*)r300->textures_state.state;
   1404     unsigned tex_units = r300->screen->caps.num_tex_units;
   1405 
   1406     if (count > tex_units) {
   1407         return;
   1408     }
   1409 
   1410     memcpy(state->sampler_states, states, sizeof(void*) * count);
   1411     state->sampler_state_count = count;
   1412 
   1413     r300_mark_atom_dirty(r300, &r300->textures_state);
   1414 }
   1415 
   1416 static void r300_lacks_vertex_textures(struct pipe_context* pipe,
   1417                                        unsigned count,
   1418                                        void** states)
   1419 {
   1420 }
   1421 
   1422 static void r300_delete_sampler_state(struct pipe_context* pipe, void* state)
   1423 {
   1424     FREE(state);
   1425 }
   1426 
   1427 static uint32_t r300_assign_texture_cache_region(unsigned index, unsigned num)
   1428 {
   1429     /* This looks like a hack, but I believe it's suppose to work like
   1430      * that. To illustrate how this works, let's assume you have 5 textures.
   1431      * From docs, 5 and the successive numbers are:
   1432      *
   1433      * FOURTH_1     = 5
   1434      * FOURTH_2     = 6
   1435      * FOURTH_3     = 7
   1436      * EIGHTH_0     = 8
   1437      * EIGHTH_1     = 9
   1438      *
   1439      * First 3 textures will get 3/4 of size of the cache, divived evenly
   1440      * between them. The last 1/4 of the cache must be divided between
   1441      * the last 2 textures, each will therefore get 1/8 of the cache.
   1442      * Why not just to use "5 + texture_index" ?
   1443      *
   1444      * This simple trick works for all "num" <= 16.
   1445      */
   1446     if (num <= 1)
   1447         return R300_TX_CACHE(R300_TX_CACHE_WHOLE);
   1448     else
   1449         return R300_TX_CACHE(num + index);
   1450 }
   1451 
   1452 static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
   1453                                             unsigned count,
   1454                                             struct pipe_sampler_view** views)
   1455 {
   1456     struct r300_context* r300 = r300_context(pipe);
   1457     struct r300_textures_state* state =
   1458         (struct r300_textures_state*)r300->textures_state.state;
   1459     struct r300_resource *texture;
   1460     unsigned i, real_num_views = 0, view_index = 0;
   1461     unsigned tex_units = r300->screen->caps.num_tex_units;
   1462     boolean dirty_tex = FALSE;
   1463 
   1464     if (count > tex_units) {
   1465         return;
   1466     }
   1467 
   1468     /* Calculate the real number of views. */
   1469     for (i = 0; i < count; i++) {
   1470         if (views[i])
   1471             real_num_views++;
   1472     }
   1473 
   1474     for (i = 0; i < count; i++) {
   1475         pipe_sampler_view_reference(
   1476                 (struct pipe_sampler_view**)&state->sampler_views[i],
   1477                 views[i]);
   1478 
   1479         if (!views[i]) {
   1480             continue;
   1481         }
   1482 
   1483         /* A new sampler view (= texture)... */
   1484         dirty_tex = TRUE;
   1485 
   1486         /* Set the texrect factor in the fragment shader.
   1487              * Needed for RECT and NPOT fallback. */
   1488         texture = r300_resource(views[i]->texture);
   1489         if (texture->tex.is_npot) {
   1490             r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
   1491         }
   1492 
   1493         state->sampler_views[i]->texcache_region =
   1494                 r300_assign_texture_cache_region(view_index, real_num_views);
   1495         view_index++;
   1496     }
   1497 
   1498     for (i = count; i < tex_units; i++) {
   1499         if (state->sampler_views[i]) {
   1500             pipe_sampler_view_reference(
   1501                     (struct pipe_sampler_view**)&state->sampler_views[i],
   1502                     NULL);
   1503         }
   1504     }
   1505 
   1506     state->sampler_view_count = count;
   1507 
   1508     r300_mark_atom_dirty(r300, &r300->textures_state);
   1509 
   1510     if (dirty_tex) {
   1511         r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
   1512     }
   1513 }
   1514 
   1515 struct pipe_sampler_view *
   1516 r300_create_sampler_view_custom(struct pipe_context *pipe,
   1517                          struct pipe_resource *texture,
   1518                          const struct pipe_sampler_view *templ,
   1519                          unsigned width0_override,
   1520                          unsigned height0_override)
   1521 {
   1522     struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view);
   1523     struct r300_resource *tex = r300_resource(texture);
   1524     boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500;
   1525     boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle;
   1526 
   1527     if (view) {
   1528         unsigned hwformat;
   1529 
   1530         view->base = *templ;
   1531         view->base.reference.count = 1;
   1532         view->base.context = pipe;
   1533         view->base.texture = NULL;
   1534         pipe_resource_reference(&view->base.texture, texture);
   1535 
   1536 	view->width0_override = width0_override;
   1537 	view->height0_override = height0_override;
   1538         view->swizzle[0] = templ->swizzle_r;
   1539         view->swizzle[1] = templ->swizzle_g;
   1540         view->swizzle[2] = templ->swizzle_b;
   1541         view->swizzle[3] = templ->swizzle_a;
   1542 
   1543         hwformat = r300_translate_texformat(templ->format,
   1544                                             view->swizzle,
   1545                                             is_r500,
   1546                                             dxtc_swizzle);
   1547 
   1548         if (hwformat == ~0) {
   1549             fprintf(stderr, "r300: Ooops. Got unsupported format %s in %s.\n",
   1550                     util_format_short_name(templ->format), __func__);
   1551         }
   1552         assert(hwformat != ~0);
   1553 
   1554 	r300_texture_setup_format_state(r300_screen(pipe->screen), tex,
   1555 					templ->format, 0,
   1556 	                                width0_override, height0_override,
   1557 					&view->format);
   1558         view->format.format1 |= hwformat;
   1559         if (is_r500) {
   1560             view->format.format2 |= r500_tx_format_msb_bit(templ->format);
   1561         }
   1562     }
   1563 
   1564     return (struct pipe_sampler_view*)view;
   1565 }
   1566 
   1567 static struct pipe_sampler_view *
   1568 r300_create_sampler_view(struct pipe_context *pipe,
   1569                          struct pipe_resource *texture,
   1570                          const struct pipe_sampler_view *templ)
   1571 {
   1572     return r300_create_sampler_view_custom(pipe, texture, templ,
   1573                                            r300_resource(texture)->tex.width0,
   1574                                            r300_resource(texture)->tex.height0);
   1575 }
   1576 
   1577 
   1578 static void
   1579 r300_sampler_view_destroy(struct pipe_context *pipe,
   1580                           struct pipe_sampler_view *view)
   1581 {
   1582    pipe_resource_reference(&view->texture, NULL);
   1583    FREE(view);
   1584 }
   1585 
   1586 static void r300_set_scissor_state(struct pipe_context* pipe,
   1587                                    const struct pipe_scissor_state* state)
   1588 {
   1589     struct r300_context* r300 = r300_context(pipe);
   1590 
   1591     memcpy(r300->scissor_state.state, state,
   1592         sizeof(struct pipe_scissor_state));
   1593 
   1594     r300_mark_atom_dirty(r300, &r300->scissor_state);
   1595 }
   1596 
   1597 static void r300_set_viewport_state(struct pipe_context* pipe,
   1598                                     const struct pipe_viewport_state* state)
   1599 {
   1600     struct r300_context* r300 = r300_context(pipe);
   1601     struct r300_viewport_state* viewport =
   1602         (struct r300_viewport_state*)r300->viewport_state.state;
   1603 
   1604     r300->viewport = *state;
   1605 
   1606     if (r300->draw) {
   1607         draw_set_viewport_state(r300->draw, state);
   1608         viewport->vte_control = R300_VTX_XY_FMT | R300_VTX_Z_FMT;
   1609         return;
   1610     }
   1611 
   1612     /* Do the transform in HW. */
   1613     viewport->vte_control = R300_VTX_W0_FMT;
   1614 
   1615     if (state->scale[0] != 1.0f) {
   1616         viewport->xscale = state->scale[0];
   1617         viewport->vte_control |= R300_VPORT_X_SCALE_ENA;
   1618     }
   1619     if (state->scale[1] != 1.0f) {
   1620         viewport->yscale = state->scale[1];
   1621         viewport->vte_control |= R300_VPORT_Y_SCALE_ENA;
   1622     }
   1623     if (state->scale[2] != 1.0f) {
   1624         viewport->zscale = state->scale[2];
   1625         viewport->vte_control |= R300_VPORT_Z_SCALE_ENA;
   1626     }
   1627     if (state->translate[0] != 0.0f) {
   1628         viewport->xoffset = state->translate[0];
   1629         viewport->vte_control |= R300_VPORT_X_OFFSET_ENA;
   1630     }
   1631     if (state->translate[1] != 0.0f) {
   1632         viewport->yoffset = state->translate[1];
   1633         viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA;
   1634     }
   1635     if (state->translate[2] != 0.0f) {
   1636         viewport->zoffset = state->translate[2];
   1637         viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA;
   1638     }
   1639 
   1640     r300_mark_atom_dirty(r300, &r300->viewport_state);
   1641     if (r300->fs.state && r300_fs(r300)->shader &&
   1642         r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) {
   1643         r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
   1644     }
   1645 }
   1646 
   1647 static void r300_set_vertex_buffers_hwtcl(struct pipe_context* pipe,
   1648                                     unsigned count,
   1649                                     const struct pipe_vertex_buffer* buffers)
   1650 {
   1651     struct r300_context* r300 = r300_context(pipe);
   1652 
   1653     /* There must be at least one vertex buffer set, otherwise it locks up. */
   1654     if (!count) {
   1655         buffers = &r300->dummy_vb;
   1656         count = 1;
   1657     }
   1658 
   1659     util_copy_vertex_buffers(r300->vertex_buffer,
   1660                              &r300->nr_vertex_buffers,
   1661                              buffers, count);
   1662 
   1663     r300->vertex_arrays_dirty = TRUE;
   1664 }
   1665 
   1666 static void r300_set_vertex_buffers_swtcl(struct pipe_context* pipe,
   1667                                     unsigned count,
   1668                                     const struct pipe_vertex_buffer* buffers)
   1669 {
   1670     struct r300_context* r300 = r300_context(pipe);
   1671     unsigned i;
   1672 
   1673     util_copy_vertex_buffers(r300->vertex_buffer,
   1674                              &r300->nr_vertex_buffers,
   1675                              buffers, count);
   1676     draw_set_vertex_buffers(r300->draw, count, buffers);
   1677 
   1678     for (i = 0; i < count; i++) {
   1679         if (buffers[i].user_buffer) {
   1680             draw_set_mapped_vertex_buffer(r300->draw, i,
   1681                                           buffers[i].user_buffer);
   1682         } else if (buffers[i].buffer) {
   1683             draw_set_mapped_vertex_buffer(r300->draw, i,
   1684                 r300_resource(buffers[i].buffer)->malloced_buffer);
   1685         }
   1686     }
   1687 }
   1688 
   1689 static void r300_set_index_buffer_hwtcl(struct pipe_context* pipe,
   1690                                         const struct pipe_index_buffer *ib)
   1691 {
   1692     struct r300_context* r300 = r300_context(pipe);
   1693 
   1694     if (ib) {
   1695         pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer);
   1696         memcpy(&r300->index_buffer, ib, sizeof(*ib));
   1697     } else {
   1698         pipe_resource_reference(&r300->index_buffer.buffer, NULL);
   1699     }
   1700 }
   1701 
   1702 static void r300_set_index_buffer_swtcl(struct pipe_context* pipe,
   1703                                         const struct pipe_index_buffer *ib)
   1704 {
   1705     struct r300_context* r300 = r300_context(pipe);
   1706 
   1707     if (ib) {
   1708         const void *buf = NULL;
   1709         if (ib->user_buffer) {
   1710             buf = ib->user_buffer;
   1711         } else if (ib->buffer) {
   1712             buf = r300_resource(ib->buffer)->malloced_buffer;
   1713         }
   1714         draw_set_indexes(r300->draw,
   1715                          (const ubyte *) buf + ib->offset,
   1716                          ib->index_size);
   1717     }
   1718 }
   1719 
   1720 /* Initialize the PSC tables. */
   1721 static void r300_vertex_psc(struct r300_vertex_element_state *velems)
   1722 {
   1723     struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
   1724     uint16_t type, swizzle;
   1725     enum pipe_format format;
   1726     unsigned i;
   1727 
   1728     /* Vertex shaders have no semantics on their inputs,
   1729      * so PSC should just route stuff based on the vertex elements,
   1730      * and not on attrib information. */
   1731     for (i = 0; i < velems->count; i++) {
   1732         format = velems->velem[i].src_format;
   1733 
   1734         type = r300_translate_vertex_data_type(format);
   1735         if (type == R300_INVALID_FORMAT) {
   1736             fprintf(stderr, "r300: Bad vertex format %s.\n",
   1737                     util_format_short_name(format));
   1738             assert(0);
   1739             abort();
   1740         }
   1741 
   1742         type |= i << R300_DST_VEC_LOC_SHIFT;
   1743         swizzle = r300_translate_vertex_data_swizzle(format);
   1744 
   1745         if (i & 1) {
   1746             vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
   1747             vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
   1748         } else {
   1749             vstream->vap_prog_stream_cntl[i >> 1] |= type;
   1750             vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
   1751         }
   1752     }
   1753 
   1754     /* Set the last vector in the PSC. */
   1755     if (i) {
   1756         i -= 1;
   1757     }
   1758     vstream->vap_prog_stream_cntl[i >> 1] |=
   1759         (R300_LAST_VEC << (i & 1 ? 16 : 0));
   1760 
   1761     vstream->count = (i >> 1) + 1;
   1762 }
   1763 
   1764 static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
   1765                                                unsigned count,
   1766                                                const struct pipe_vertex_element* attribs)
   1767 {
   1768     struct r300_vertex_element_state *velems;
   1769     unsigned i;
   1770     struct pipe_vertex_element dummy_attrib = {0};
   1771 
   1772     /* R300 Programmable Stream Control (PSC) doesn't support 0 vertex elements. */
   1773     if (!count) {
   1774         dummy_attrib.src_format = PIPE_FORMAT_R8G8B8A8_UNORM;
   1775         attribs = &dummy_attrib;
   1776         count = 1;
   1777     } else if (count > 16) {
   1778         fprintf(stderr, "r300: More than 16 vertex elements are not supported,"
   1779                 " requested %i, using 16.\n", count);
   1780         count = 16;
   1781     }
   1782 
   1783     velems = CALLOC_STRUCT(r300_vertex_element_state);
   1784     if (!velems)
   1785         return NULL;
   1786 
   1787     velems->count = count;
   1788     memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
   1789 
   1790     if (r300_screen(pipe->screen)->caps.has_tcl) {
   1791         /* Setup PSC.
   1792          * The unused components will be replaced by (..., 0, 1). */
   1793         r300_vertex_psc(velems);
   1794 
   1795         for (i = 0; i < count; i++) {
   1796             velems->format_size[i] =
   1797                 align(util_format_get_blocksize(velems->velem[i].src_format), 4);
   1798             velems->vertex_size_dwords += velems->format_size[i] / 4;
   1799         }
   1800     }
   1801 
   1802     return velems;
   1803 }
   1804 
   1805 static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
   1806                                             void *state)
   1807 {
   1808     struct r300_context *r300 = r300_context(pipe);
   1809     struct r300_vertex_element_state *velems = state;
   1810 
   1811     if (velems == NULL) {
   1812         return;
   1813     }
   1814 
   1815     r300->velems = velems;
   1816 
   1817     if (r300->draw) {
   1818         draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
   1819         return;
   1820     }
   1821 
   1822     UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
   1823     r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
   1824     r300->vertex_arrays_dirty = TRUE;
   1825 }
   1826 
   1827 static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
   1828 {
   1829     FREE(state);
   1830 }
   1831 
   1832 static void* r300_create_vs_state(struct pipe_context* pipe,
   1833                                   const struct pipe_shader_state* shader)
   1834 {
   1835     struct r300_context* r300 = r300_context(pipe);
   1836     struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader);
   1837 
   1838     /* Copy state directly into shader. */
   1839     vs->state = *shader;
   1840     vs->state.tokens = tgsi_dup_tokens(shader->tokens);
   1841 
   1842     if (r300->screen->caps.has_tcl) {
   1843         r300_init_vs_outputs(r300, vs);
   1844         r300_translate_vertex_shader(r300, vs);
   1845     } else {
   1846         r300_draw_init_vertex_shader(r300, vs);
   1847     }
   1848 
   1849     return vs;
   1850 }
   1851 
   1852 static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
   1853 {
   1854     struct r300_context* r300 = r300_context(pipe);
   1855     struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
   1856 
   1857     if (vs == NULL) {
   1858         r300->vs_state.state = NULL;
   1859         return;
   1860     }
   1861     if (vs == r300->vs_state.state) {
   1862         return;
   1863     }
   1864     r300->vs_state.state = vs;
   1865 
   1866     /* The majority of the RS block bits is dependent on the vertex shader. */
   1867     r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */
   1868 
   1869     if (r300->screen->caps.has_tcl) {
   1870         unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2;
   1871         r300_mark_atom_dirty(r300, &r300->vs_state);
   1872         r300->vs_state.size = vs->code.length + 9 +
   1873 			(R300_VS_MAX_FC_OPS * fc_op_dwords + 4);
   1874 
   1875         r300_mark_atom_dirty(r300, &r300->vs_constants);
   1876         r300->vs_constants.size =
   1877                 2 +
   1878                 (vs->externals_count ? vs->externals_count * 4 + 3 : 0) +
   1879                 (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0);
   1880 
   1881         ((struct r300_constant_buffer*)r300->vs_constants.state)->remap_table =
   1882                 vs->code.constants_remap_table;
   1883 
   1884         r300_mark_atom_dirty(r300, &r300->pvs_flush);
   1885     } else {
   1886         draw_bind_vertex_shader(r300->draw,
   1887                 (struct draw_vertex_shader*)vs->draw_vs);
   1888     }
   1889 }
   1890 
   1891 static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
   1892 {
   1893     struct r300_context* r300 = r300_context(pipe);
   1894     struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
   1895 
   1896     if (r300->screen->caps.has_tcl) {
   1897         rc_constants_destroy(&vs->code.constants);
   1898         if (vs->code.constants_remap_table)
   1899             FREE(vs->code.constants_remap_table);
   1900     } else {
   1901         draw_delete_vertex_shader(r300->draw,
   1902                 (struct draw_vertex_shader*)vs->draw_vs);
   1903     }
   1904 
   1905     FREE((void*)vs->state.tokens);
   1906     FREE(shader);
   1907 }
   1908 
   1909 static void r300_set_constant_buffer(struct pipe_context *pipe,
   1910                                      uint shader, uint index,
   1911                                      struct pipe_constant_buffer *cb)
   1912 {
   1913     struct r300_context* r300 = r300_context(pipe);
   1914     struct r300_constant_buffer *cbuf;
   1915     uint32_t *mapped;
   1916 
   1917     if (!cb)
   1918         return;
   1919 
   1920     switch (shader) {
   1921         case PIPE_SHADER_VERTEX:
   1922             cbuf = (struct r300_constant_buffer*)r300->vs_constants.state;
   1923             break;
   1924         case PIPE_SHADER_FRAGMENT:
   1925             cbuf = (struct r300_constant_buffer*)r300->fs_constants.state;
   1926             break;
   1927         default:
   1928             return;
   1929     }
   1930 
   1931 
   1932     if (cb->user_buffer)
   1933         mapped = (uint32_t*)cb->user_buffer;
   1934     else {
   1935         struct r300_resource *rbuf = r300_resource(cb->buffer);
   1936 
   1937         if (rbuf && rbuf->malloced_buffer)
   1938             mapped = (uint32_t*)rbuf->malloced_buffer;
   1939         else
   1940             return;
   1941     }
   1942 
   1943     if (shader == PIPE_SHADER_FRAGMENT ||
   1944         (shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) {
   1945         cbuf->ptr = mapped;
   1946     }
   1947 
   1948     if (shader == PIPE_SHADER_VERTEX) {
   1949         if (r300->screen->caps.has_tcl) {
   1950             struct r300_vertex_shader *vs =
   1951                     (struct r300_vertex_shader*)r300->vs_state.state;
   1952 
   1953             if (!vs) {
   1954                 cbuf->buffer_base = 0;
   1955                 return;
   1956             }
   1957 
   1958             cbuf->buffer_base = r300->vs_const_base;
   1959             r300->vs_const_base += vs->code.constants.Count;
   1960             if (r300->vs_const_base > R500_MAX_PVS_CONST_VECS) {
   1961                 r300->vs_const_base = vs->code.constants.Count;
   1962                 cbuf->buffer_base = 0;
   1963                 r300_mark_atom_dirty(r300, &r300->pvs_flush);
   1964             }
   1965             r300_mark_atom_dirty(r300, &r300->vs_constants);
   1966         } else if (r300->draw) {
   1967             draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX,
   1968                 0, mapped, cb->buffer_size);
   1969         }
   1970     } else if (shader == PIPE_SHADER_FRAGMENT) {
   1971         r300_mark_atom_dirty(r300, &r300->fs_constants);
   1972     }
   1973 }
   1974 
   1975 static void r300_texture_barrier(struct pipe_context *pipe)
   1976 {
   1977     struct r300_context *r300 = r300_context(pipe);
   1978 
   1979     r300_mark_atom_dirty(r300, &r300->gpu_flush);
   1980     r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
   1981 }
   1982 
   1983 void r300_init_state_functions(struct r300_context* r300)
   1984 {
   1985     r300->context.create_blend_state = r300_create_blend_state;
   1986     r300->context.bind_blend_state = r300_bind_blend_state;
   1987     r300->context.delete_blend_state = r300_delete_blend_state;
   1988 
   1989     r300->context.set_blend_color = r300_set_blend_color;
   1990 
   1991     r300->context.set_clip_state = r300_set_clip_state;
   1992     r300->context.set_sample_mask = r300_set_sample_mask;
   1993 
   1994     r300->context.set_constant_buffer = r300_set_constant_buffer;
   1995 
   1996     r300->context.create_depth_stencil_alpha_state = r300_create_dsa_state;
   1997     r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state;
   1998     r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state;
   1999 
   2000     r300->context.set_stencil_ref = r300_set_stencil_ref;
   2001 
   2002     r300->context.set_framebuffer_state = r300_set_framebuffer_state;
   2003 
   2004     r300->context.create_fs_state = r300_create_fs_state;
   2005     r300->context.bind_fs_state = r300_bind_fs_state;
   2006     r300->context.delete_fs_state = r300_delete_fs_state;
   2007 
   2008     r300->context.set_polygon_stipple = r300_set_polygon_stipple;
   2009 
   2010     r300->context.create_rasterizer_state = r300_create_rs_state;
   2011     r300->context.bind_rasterizer_state = r300_bind_rs_state;
   2012     r300->context.delete_rasterizer_state = r300_delete_rs_state;
   2013 
   2014     r300->context.create_sampler_state = r300_create_sampler_state;
   2015     r300->context.bind_fragment_sampler_states = r300_bind_sampler_states;
   2016     r300->context.bind_vertex_sampler_states = r300_lacks_vertex_textures;
   2017     r300->context.delete_sampler_state = r300_delete_sampler_state;
   2018 
   2019     r300->context.set_fragment_sampler_views = r300_set_fragment_sampler_views;
   2020     r300->context.create_sampler_view = r300_create_sampler_view;
   2021     r300->context.sampler_view_destroy = r300_sampler_view_destroy;
   2022 
   2023     r300->context.set_scissor_state = r300_set_scissor_state;
   2024 
   2025     r300->context.set_viewport_state = r300_set_viewport_state;
   2026 
   2027     if (r300->screen->caps.has_tcl) {
   2028         r300->context.set_vertex_buffers = r300_set_vertex_buffers_hwtcl;
   2029         r300->context.set_index_buffer = r300_set_index_buffer_hwtcl;
   2030     } else {
   2031         r300->context.set_vertex_buffers = r300_set_vertex_buffers_swtcl;
   2032         r300->context.set_index_buffer = r300_set_index_buffer_swtcl;
   2033     }
   2034 
   2035     r300->context.create_vertex_elements_state = r300_create_vertex_elements_state;
   2036     r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state;
   2037     r300->context.delete_vertex_elements_state = r300_delete_vertex_elements_state;
   2038 
   2039     r300->context.create_vs_state = r300_create_vs_state;
   2040     r300->context.bind_vs_state = r300_bind_vs_state;
   2041     r300->context.delete_vs_state = r300_delete_vs_state;
   2042 
   2043     r300->context.texture_barrier = r300_texture_barrier;
   2044 }
   2045