Home | History | Annotate | Download | only in nine
      1 /*
      2  * Copyright 2011 Joakim Sindholt <opensource (at) zhasha.com>
      3  * Copyright 2013 Christoph Bumiller
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * on the rights to use, copy, modify, merge, publish, distribute, sub
      9  * license, and/or sell copies of the Software, and to permit persons to whom
     10  * the Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice (including the next
     13  * paragraph) shall be included in all copies or substantial portions of the
     14  * Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     22  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
     23 
     24 #define NINE_STATE
     25 
     26 #include "device9.h"
     27 #include "swapchain9.h"
     28 #include "basetexture9.h"
     29 #include "buffer9.h"
     30 #include "indexbuffer9.h"
     31 #include "surface9.h"
     32 #include "vertexbuffer9.h"
     33 #include "vertexdeclaration9.h"
     34 #include "vertexshader9.h"
     35 #include "pixelshader9.h"
     36 #include "nine_pipe.h"
     37 #include "nine_ff.h"
     38 #include "nine_limits.h"
     39 #include "pipe/p_context.h"
     40 #include "pipe/p_state.h"
     41 #include "cso_cache/cso_context.h"
     42 #include "util/u_atomic.h"
     43 #include "util/u_upload_mgr.h"
     44 #include "util/u_math.h"
     45 #include "util/u_box.h"
     46 #include "util/u_simple_shaders.h"
     47 #include "util/u_gen_mipmap.h"
     48 
     49 /* CSMT headers */
     50 #include "nine_queue.h"
     51 #include "nine_csmt_helper.h"
     52 #include "os/os_thread.h"
     53 
     54 #define DBG_CHANNEL DBG_DEVICE
     55 
     56 /* Nine CSMT */
     57 
     58 struct csmt_instruction {
     59     int (* func)(struct NineDevice9 *This, struct csmt_instruction *instr);
     60 };
     61 
     62 struct csmt_context {
     63     pipe_thread worker;
     64     struct nine_queue_pool* pool;
     65     BOOL terminate;
     66     pipe_condvar event_processed;
     67     pipe_mutex mutex_processed;
     68     struct NineDevice9 *device;
     69     BOOL processed;
     70     BOOL toPause;
     71     BOOL hasPaused;
     72     pipe_mutex thread_running;
     73     pipe_mutex thread_resume;
     74 };
     75 
     76 /* Wait for instruction to be processed.
     77  * Caller has to ensure that only one thread waits at time.
     78  */
     79 static void
     80 nine_csmt_wait_processed(struct csmt_context *ctx)
     81 {
     82     pipe_mutex_lock(ctx->mutex_processed);
     83     while (!p_atomic_read(&ctx->processed)) {
     84         pipe_condvar_wait(ctx->event_processed, ctx->mutex_processed);
     85     }
     86     pipe_mutex_unlock(ctx->mutex_processed);
     87 }
     88 
     89 /* CSMT worker thread */
     90 static
     91 PIPE_THREAD_ROUTINE(nine_csmt_worker, arg)
     92 {
     93     struct csmt_context *ctx = arg;
     94     struct csmt_instruction *instr;
     95     DBG("CSMT worker spawned\n");
     96 
     97     pipe_thread_setname("CSMT-Worker");
     98 
     99     while (1) {
    100         nine_queue_wait_flush(ctx->pool);
    101         pipe_mutex_lock(ctx->thread_running);
    102 
    103         /* Get instruction. NULL on empty cmdbuf. */
    104         while (!p_atomic_read(&ctx->terminate) &&
    105                (instr = (struct csmt_instruction *)nine_queue_get(ctx->pool))) {
    106 
    107             /* decode */
    108             if (instr->func(ctx->device, instr)) {
    109                 pipe_mutex_lock(ctx->mutex_processed);
    110                 p_atomic_set(&ctx->processed, TRUE);
    111                 pipe_condvar_signal(ctx->event_processed);
    112                 pipe_mutex_unlock(ctx->mutex_processed);
    113             }
    114             if (p_atomic_read(&ctx->toPause)) {
    115                 pipe_mutex_unlock(ctx->thread_running);
    116                 /* will wait here the thread can be resumed */
    117                 pipe_mutex_lock(ctx->thread_resume);
    118                 pipe_mutex_lock(ctx->thread_running);
    119                 pipe_mutex_unlock(ctx->thread_resume);
    120             }
    121         }
    122 
    123         pipe_mutex_unlock(ctx->thread_running);
    124         if (p_atomic_read(&ctx->terminate)) {
    125             pipe_mutex_lock(ctx->mutex_processed);
    126             p_atomic_set(&ctx->processed, TRUE);
    127             pipe_condvar_signal(ctx->event_processed);
    128             pipe_mutex_unlock(ctx->mutex_processed);
    129             break;
    130         }
    131     }
    132 
    133     DBG("CSMT worker destroyed\n");
    134     return 0;
    135 }
    136 
    137 /* Create a CSMT context.
    138  * Spawns a worker thread.
    139  */
    140 struct csmt_context *
    141 nine_csmt_create( struct NineDevice9 *This )
    142 {
    143     struct csmt_context *ctx;
    144 
    145     ctx = CALLOC_STRUCT(csmt_context);
    146     if (!ctx)
    147         return NULL;
    148 
    149     ctx->pool = nine_queue_create();
    150     if (!ctx->pool) {
    151         FREE(ctx);
    152         return NULL;
    153     }
    154     pipe_condvar_init(ctx->event_processed);
    155     pipe_mutex_init(ctx->mutex_processed);
    156     pipe_mutex_init(ctx->thread_running);
    157     pipe_mutex_init(ctx->thread_resume);
    158 
    159 #if DEBUG
    160     pipe_thread_setname("Main thread");
    161 #endif
    162 
    163     ctx->device = This;
    164 
    165     ctx->worker = pipe_thread_create(nine_csmt_worker, ctx);
    166     if (!ctx->worker) {
    167         nine_queue_delete(ctx->pool);
    168         FREE(ctx);
    169         return NULL;
    170     }
    171 
    172     DBG("Returning context %p\n", ctx);
    173 
    174     return ctx;
    175 }
    176 
    177 static int
    178 nop_func( struct NineDevice9 *This, struct csmt_instruction *instr )
    179 {
    180     (void) This;
    181     (void) instr;
    182 
    183     return 1;
    184 }
    185 
    186 /* Push nop instruction and flush the queue.
    187  * Waits for the worker to complete. */
    188 void
    189 nine_csmt_process( struct NineDevice9 *device )
    190 {
    191     struct csmt_instruction* instr;
    192     struct csmt_context *ctx = device->csmt_ctx;
    193 
    194     if (!device->csmt_active)
    195         return;
    196 
    197     if (nine_queue_isempty(ctx->pool))
    198         return;
    199 
    200     DBG("device=%p\n", device);
    201 
    202     /* NOP */
    203     instr = nine_queue_alloc(ctx->pool, sizeof(struct csmt_instruction));
    204     assert(instr);
    205     instr->func = nop_func;
    206 
    207     p_atomic_set(&ctx->processed, FALSE);
    208     nine_queue_flush(ctx->pool);
    209 
    210     nine_csmt_wait_processed(ctx);
    211 }
    212 
    213 /* Destroys a CSMT context.
    214  * Waits for the worker thread to terminate.
    215  */
    216 void
    217 nine_csmt_destroy( struct NineDevice9 *device, struct csmt_context *ctx )
    218 {
    219     struct csmt_instruction* instr;
    220     pipe_thread render_thread = ctx->worker;
    221 
    222     DBG("device=%p ctx=%p\n", device, ctx);
    223 
    224     /* Push nop and flush the queue. */
    225     instr = nine_queue_alloc(ctx->pool, sizeof(struct csmt_instruction));
    226     assert(instr);
    227     instr->func = nop_func;
    228 
    229     p_atomic_set(&ctx->processed, FALSE);
    230     /* Signal worker to terminate. */
    231     p_atomic_set(&ctx->terminate, TRUE);
    232     nine_queue_flush(ctx->pool);
    233 
    234     nine_csmt_wait_processed(ctx);
    235     nine_queue_delete(ctx->pool);
    236     pipe_mutex_destroy(ctx->mutex_processed);
    237 
    238     FREE(ctx);
    239 
    240     pipe_thread_wait(render_thread);
    241 }
    242 
    243 static void
    244 nine_csmt_pause( struct NineDevice9 *device )
    245 {
    246     struct csmt_context *ctx = device->csmt_ctx;
    247 
    248     if (!device->csmt_active)
    249         return;
    250 
    251     /* No need to pause the thread */
    252     if (nine_queue_no_flushed_work(ctx->pool))
    253         return;
    254 
    255     pipe_mutex_lock(ctx->thread_resume);
    256     p_atomic_set(&ctx->toPause, TRUE);
    257 
    258     /* Wait the thread is paused */
    259     pipe_mutex_lock(ctx->thread_running);
    260     ctx->hasPaused = TRUE;
    261     p_atomic_set(&ctx->toPause, FALSE);
    262 }
    263 
    264 static void
    265 nine_csmt_resume( struct NineDevice9 *device )
    266 {
    267     struct csmt_context *ctx = device->csmt_ctx;
    268 
    269     if (!device->csmt_active)
    270         return;
    271 
    272     if (!ctx->hasPaused)
    273         return;
    274 
    275     ctx->hasPaused = FALSE;
    276     pipe_mutex_unlock(ctx->thread_running);
    277     pipe_mutex_unlock(ctx->thread_resume);
    278 }
    279 
    280 struct pipe_context *
    281 nine_context_get_pipe( struct NineDevice9 *device )
    282 {
    283     nine_csmt_process(device);
    284     return device->context.pipe;
    285 }
    286 
    287 struct pipe_context *
    288 nine_context_get_pipe_multithread( struct NineDevice9 *device )
    289 {
    290     struct csmt_context *ctx = device->csmt_ctx;
    291 
    292     if (!device->csmt_active)
    293         return device->context.pipe;
    294 
    295     if (!pipe_thread_is_self(ctx->worker))
    296         nine_csmt_process(device);
    297 
    298     return device->context.pipe;
    299 }
    300 
    301 struct pipe_context *
    302 nine_context_get_pipe_acquire( struct NineDevice9 *device )
    303 {
    304     nine_csmt_pause(device);
    305     return device->context.pipe;
    306 }
    307 
    308 void
    309 nine_context_get_pipe_release( struct NineDevice9 *device )
    310 {
    311     nine_csmt_resume(device);
    312 }
    313 
    314 /* Nine state functions */
    315 
    316 /* Check if some states need to be set dirty */
    317 
    318 static inline DWORD
    319 check_multisample(struct NineDevice9 *device)
    320 {
    321     DWORD *rs = device->context.rs;
    322     DWORD new_value = (rs[D3DRS_ZENABLE] || rs[D3DRS_STENCILENABLE]) &&
    323                       device->context.rt[0]->desc.MultiSampleType >= 1 &&
    324                       rs[D3DRS_MULTISAMPLEANTIALIAS];
    325     if (rs[NINED3DRS_MULTISAMPLE] != new_value) {
    326         rs[NINED3DRS_MULTISAMPLE] = new_value;
    327         return NINE_STATE_RASTERIZER;
    328     }
    329     return 0;
    330 }
    331 
    332 /* State preparation only */
    333 
    334 static inline void
    335 prepare_blend(struct NineDevice9 *device)
    336 {
    337     nine_convert_blend_state(&device->context.pipe_data.blend, device->context.rs);
    338     device->context.commit |= NINE_STATE_COMMIT_BLEND;
    339 }
    340 
    341 static inline void
    342 prepare_dsa(struct NineDevice9 *device)
    343 {
    344     nine_convert_dsa_state(&device->context.pipe_data.dsa, device->context.rs);
    345     device->context.commit |= NINE_STATE_COMMIT_DSA;
    346 }
    347 
    348 static inline void
    349 prepare_rasterizer(struct NineDevice9 *device)
    350 {
    351     nine_convert_rasterizer_state(device, &device->context.pipe_data.rast, device->context.rs);
    352     device->context.commit |= NINE_STATE_COMMIT_RASTERIZER;
    353 }
    354 
    355 static void
    356 prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device)
    357 {
    358     struct nine_context *context = &device->context;
    359 
    360     if (context->changed.vs_const_f || context->changed.group & NINE_STATE_SWVP) {
    361         struct pipe_constant_buffer cb;
    362 
    363         cb.buffer_offset = 0;
    364         cb.buffer_size = 4096 * sizeof(float[4]);
    365         cb.user_buffer = context->vs_const_f_swvp;
    366 
    367         if (context->vs->lconstf.ranges) {
    368             const struct nine_lconstf *lconstf = &(context->vs->lconstf);
    369             const struct nine_range *r = lconstf->ranges;
    370             unsigned n = 0;
    371             float *dst = context->vs_lconstf_temp;
    372             float *src = (float *)cb.user_buffer;
    373             memcpy(dst, src, cb.buffer_size);
    374             while (r) {
    375                 unsigned p = r->bgn;
    376                 unsigned c = r->end - r->bgn;
    377                 memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
    378                 n += c;
    379                 r = r->next;
    380             }
    381             cb.user_buffer = dst;
    382         }
    383 
    384         /* Do not erase the buffer field.
    385          * It is either NULL (user_cbufs), or a resource.
    386          * u_upload_data will do the proper refcount */
    387         context->pipe_data.cb0_swvp.buffer_offset = cb.buffer_offset;
    388         context->pipe_data.cb0_swvp.buffer_size = cb.buffer_size;
    389         context->pipe_data.cb0_swvp.user_buffer = cb.user_buffer;
    390 
    391         cb.user_buffer = (char *)cb.user_buffer + 4096 * sizeof(float[4]);
    392         context->pipe_data.cb1_swvp.buffer_offset = cb.buffer_offset;
    393         context->pipe_data.cb1_swvp.buffer_size = cb.buffer_size;
    394         context->pipe_data.cb1_swvp.user_buffer = cb.user_buffer;
    395 
    396         context->changed.vs_const_f = 0;
    397     }
    398 
    399     if (context->changed.vs_const_i || context->changed.group & NINE_STATE_SWVP) {
    400         struct pipe_constant_buffer cb;
    401 
    402         cb.buffer_offset = 0;
    403         cb.buffer_size = 2048 * sizeof(float[4]);
    404         cb.user_buffer = context->vs_const_i;
    405 
    406         context->pipe_data.cb2_swvp.buffer_offset = cb.buffer_offset;
    407         context->pipe_data.cb2_swvp.buffer_size = cb.buffer_size;
    408         context->pipe_data.cb2_swvp.user_buffer = cb.user_buffer;
    409         context->changed.vs_const_i = 0;
    410     }
    411 
    412     if (context->changed.vs_const_b || context->changed.group & NINE_STATE_SWVP) {
    413         struct pipe_constant_buffer cb;
    414 
    415         cb.buffer_offset = 0;
    416         cb.buffer_size = 512 * sizeof(float[4]);
    417         cb.user_buffer = context->vs_const_b;
    418 
    419         context->pipe_data.cb3_swvp.buffer_offset = cb.buffer_offset;
    420         context->pipe_data.cb3_swvp.buffer_size = cb.buffer_size;
    421         context->pipe_data.cb3_swvp.user_buffer = cb.user_buffer;
    422         context->changed.vs_const_b = 0;
    423     }
    424 
    425     if (!device->driver_caps.user_cbufs) {
    426         struct pipe_constant_buffer *cb = &(context->pipe_data.cb0_swvp);
    427         u_upload_data(device->constbuf_uploader,
    428                       0,
    429                       cb->buffer_size,
    430                       device->constbuf_alignment,
    431                       cb->user_buffer,
    432                       &(cb->buffer_offset),
    433                       &(cb->buffer));
    434         u_upload_unmap(device->constbuf_uploader);
    435         cb->user_buffer = NULL;
    436 
    437         cb = &(context->pipe_data.cb1_swvp);
    438         u_upload_data(device->constbuf_uploader,
    439                       0,
    440                       cb->buffer_size,
    441                       device->constbuf_alignment,
    442                       cb->user_buffer,
    443                       &(cb->buffer_offset),
    444                       &(cb->buffer));
    445         u_upload_unmap(device->constbuf_uploader);
    446         cb->user_buffer = NULL;
    447 
    448         cb = &(context->pipe_data.cb2_swvp);
    449         u_upload_data(device->constbuf_uploader,
    450                       0,
    451                       cb->buffer_size,
    452                       device->constbuf_alignment,
    453                       cb->user_buffer,
    454                       &(cb->buffer_offset),
    455                       &(cb->buffer));
    456         u_upload_unmap(device->constbuf_uploader);
    457         cb->user_buffer = NULL;
    458 
    459         cb = &(context->pipe_data.cb3_swvp);
    460         u_upload_data(device->constbuf_uploader,
    461                       0,
    462                       cb->buffer_size,
    463                       device->constbuf_alignment,
    464                       cb->user_buffer,
    465                       &(cb->buffer_offset),
    466                       &(cb->buffer));
    467         u_upload_unmap(device->constbuf_uploader);
    468         cb->user_buffer = NULL;
    469     }
    470 
    471     context->changed.group &= ~NINE_STATE_VS_CONST;
    472     context->commit |= NINE_STATE_COMMIT_CONST_VS;
    473 }
    474 
    475 static void
    476 prepare_vs_constants_userbuf(struct NineDevice9 *device)
    477 {
    478     struct nine_context *context = &device->context;
    479     struct pipe_constant_buffer cb;
    480     cb.buffer = NULL;
    481     cb.buffer_offset = 0;
    482     cb.buffer_size = context->vs->const_used_size;
    483     cb.user_buffer = context->vs_const_f;
    484 
    485     if (context->swvp) {
    486         prepare_vs_constants_userbuf_swvp(device);
    487         return;
    488     }
    489 
    490     if (context->changed.vs_const_i || context->changed.group & NINE_STATE_SWVP) {
    491         int *idst = (int *)&context->vs_const_f[4 * device->max_vs_const_f];
    492         memcpy(idst, context->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4]));
    493         context->changed.vs_const_i = 0;
    494     }
    495 
    496     if (context->changed.vs_const_b || context->changed.group & NINE_STATE_SWVP) {
    497         int *idst = (int *)&context->vs_const_f[4 * device->max_vs_const_f];
    498         uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
    499         memcpy(bdst, context->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL));
    500         context->changed.vs_const_b = 0;
    501     }
    502 
    503     if (!cb.buffer_size)
    504         return;
    505 
    506     if (context->vs->lconstf.ranges) {
    507         /* TODO: Can we make it so that we don't have to copy everything ? */
    508         const struct nine_lconstf *lconstf =  &(context->vs->lconstf);
    509         const struct nine_range *r = lconstf->ranges;
    510         unsigned n = 0;
    511         float *dst = context->vs_lconstf_temp;
    512         float *src = (float *)cb.user_buffer;
    513         memcpy(dst, src, cb.buffer_size);
    514         while (r) {
    515             unsigned p = r->bgn;
    516             unsigned c = r->end - r->bgn;
    517             memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
    518             n += c;
    519             r = r->next;
    520         }
    521         cb.user_buffer = dst;
    522     }
    523 
    524     if (!device->driver_caps.user_cbufs) {
    525         context->pipe_data.cb_vs.buffer_size = cb.buffer_size;
    526         u_upload_data(device->constbuf_uploader,
    527                       0,
    528                       cb.buffer_size,
    529                       device->constbuf_alignment,
    530                       cb.user_buffer,
    531                       &context->pipe_data.cb_vs.buffer_offset,
    532                       &context->pipe_data.cb_vs.buffer);
    533         u_upload_unmap(device->constbuf_uploader);
    534         context->pipe_data.cb_vs.user_buffer = NULL;
    535     } else
    536         context->pipe_data.cb_vs = cb;
    537 
    538     context->changed.vs_const_f = 0;
    539 
    540     context->changed.group &= ~NINE_STATE_VS_CONST;
    541     context->commit |= NINE_STATE_COMMIT_CONST_VS;
    542 }
    543 
    544 static void
    545 prepare_ps_constants_userbuf(struct NineDevice9 *device)
    546 {
    547     struct nine_context *context = &device->context;
    548     struct pipe_constant_buffer cb;
    549     cb.buffer = NULL;
    550     cb.buffer_offset = 0;
    551     cb.buffer_size = context->ps->const_used_size;
    552     cb.user_buffer = context->ps_const_f;
    553 
    554     if (context->changed.ps_const_i) {
    555         int *idst = (int *)&context->ps_const_f[4 * device->max_ps_const_f];
    556         memcpy(idst, context->ps_const_i, sizeof(context->ps_const_i));
    557         context->changed.ps_const_i = 0;
    558     }
    559     if (context->changed.ps_const_b) {
    560         int *idst = (int *)&context->ps_const_f[4 * device->max_ps_const_f];
    561         uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
    562         memcpy(bdst, context->ps_const_b, sizeof(context->ps_const_b));
    563         context->changed.ps_const_b = 0;
    564     }
    565 
    566     /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */
    567     if (context->ps->bumpenvmat_needed) {
    568         memcpy(context->ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
    569         memcpy(&context->ps_lconstf_temp[4 * 8], &device->context.bumpmap_vars, sizeof(device->context.bumpmap_vars));
    570 
    571         cb.user_buffer = context->ps_lconstf_temp;
    572     }
    573 
    574     if (context->ps->byte_code.version < 0x30 &&
    575         context->rs[D3DRS_FOGENABLE]) {
    576         float *dst = &context->ps_lconstf_temp[4 * 32];
    577         if (cb.user_buffer != context->ps_lconstf_temp) {
    578             memcpy(context->ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
    579             cb.user_buffer = context->ps_lconstf_temp;
    580         }
    581 
    582         d3dcolor_to_rgba(dst, context->rs[D3DRS_FOGCOLOR]);
    583         if (context->rs[D3DRS_FOGTABLEMODE] == D3DFOG_LINEAR) {
    584             dst[4] = asfloat(context->rs[D3DRS_FOGEND]);
    585             dst[5] = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
    586         } else if (context->rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE) {
    587             dst[4] = asfloat(context->rs[D3DRS_FOGDENSITY]);
    588         }
    589         cb.buffer_size = 4 * 4 * 34;
    590     }
    591 
    592     if (!cb.buffer_size)
    593         return;
    594 
    595     if (!device->driver_caps.user_cbufs) {
    596         context->pipe_data.cb_ps.buffer_size = cb.buffer_size;
    597         u_upload_data(device->constbuf_uploader,
    598                       0,
    599                       cb.buffer_size,
    600                       device->constbuf_alignment,
    601                       cb.user_buffer,
    602                       &context->pipe_data.cb_ps.buffer_offset,
    603                       &context->pipe_data.cb_ps.buffer);
    604         u_upload_unmap(device->constbuf_uploader);
    605         context->pipe_data.cb_ps.user_buffer = NULL;
    606     } else
    607         context->pipe_data.cb_ps = cb;
    608 
    609     context->changed.ps_const_f = 0;
    610 
    611     context->changed.group &= ~NINE_STATE_PS_CONST;
    612     context->commit |= NINE_STATE_COMMIT_CONST_PS;
    613 }
    614 
    615 static inline uint32_t
    616 prepare_vs(struct NineDevice9 *device, uint8_t shader_changed)
    617 {
    618     struct nine_context *context = &device->context;
    619     struct NineVertexShader9 *vs = context->vs;
    620     uint32_t changed_group = 0;
    621     int has_key_changed = 0;
    622 
    623     if (likely(context->programmable_vs))
    624         has_key_changed = NineVertexShader9_UpdateKey(vs, device);
    625 
    626     if (!shader_changed && !has_key_changed)
    627         return 0;
    628 
    629     /* likely because we dislike FF */
    630     if (likely(context->programmable_vs)) {
    631         context->cso_shader.vs = NineVertexShader9_GetVariant(vs);
    632     } else {
    633         vs = device->ff.vs;
    634         context->cso_shader.vs = vs->ff_cso;
    635     }
    636 
    637     if (context->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) {
    638         context->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size;
    639         changed_group |= NINE_STATE_RASTERIZER;
    640     }
    641 
    642     if ((context->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask)
    643         /* Bound dummy sampler. */
    644         changed_group |= NINE_STATE_SAMPLER;
    645 
    646     context->commit |= NINE_STATE_COMMIT_VS;
    647     return changed_group;
    648 }
    649 
    650 static inline uint32_t
    651 prepare_ps(struct NineDevice9 *device, uint8_t shader_changed)
    652 {
    653     struct nine_context *context = &device->context;
    654     struct NinePixelShader9 *ps = context->ps;
    655     uint32_t changed_group = 0;
    656     int has_key_changed = 0;
    657 
    658     if (likely(ps))
    659         has_key_changed = NinePixelShader9_UpdateKey(ps, context);
    660 
    661     if (!shader_changed && !has_key_changed)
    662         return 0;
    663 
    664     if (likely(ps)) {
    665         context->cso_shader.ps = NinePixelShader9_GetVariant(ps);
    666     } else {
    667         ps = device->ff.ps;
    668         context->cso_shader.ps = ps->ff_cso;
    669     }
    670 
    671     if ((context->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask)
    672         /* Bound dummy sampler. */
    673         changed_group |= NINE_STATE_SAMPLER;
    674 
    675     context->commit |= NINE_STATE_COMMIT_PS;
    676     return changed_group;
    677 }
    678 
    679 /* State preparation incremental */
    680 
    681 /* State preparation + State commit */
    682 
    683 static void
    684 update_framebuffer(struct NineDevice9 *device, bool is_clear)
    685 {
    686     struct nine_context *context = &device->context;
    687     struct pipe_context *pipe = context->pipe;
    688     struct pipe_framebuffer_state *fb = &context->pipe_data.fb;
    689     unsigned i;
    690     struct NineSurface9 *rt0 = context->rt[0];
    691     unsigned w = rt0->desc.Width;
    692     unsigned h = rt0->desc.Height;
    693     unsigned nr_samples = rt0->base.info.nr_samples;
    694     unsigned ps_mask = context->ps ? context->ps->rt_mask : 1;
    695     unsigned mask = is_clear ? 0xf : ps_mask;
    696     const int sRGB = context->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
    697 
    698     DBG("\n");
    699 
    700     context->rt_mask = 0x0;
    701     fb->nr_cbufs = 0;
    702 
    703     /* all render targets must have the same size and the depth buffer must be
    704      * bigger. Multisample has to match, according to spec. But some apps do
    705      * things wrong there, and no error is returned. The behaviour they get
    706      * apparently is that depth buffer is disabled if it doesn't match.
    707      * Surely the same for render targets. */
    708 
    709     /* Special case: D3DFMT_NULL is used to bound no real render target,
    710      * but render to depth buffer. We have to not take into account the render
    711      * target info. TODO: know what should happen when there are several render targers
    712      * and the first one is D3DFMT_NULL */
    713     if (rt0->desc.Format == D3DFMT_NULL && context->ds) {
    714         w = context->ds->desc.Width;
    715         h = context->ds->desc.Height;
    716         nr_samples = context->ds->base.info.nr_samples;
    717     }
    718 
    719     for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
    720         struct NineSurface9 *rt = context->rt[i];
    721 
    722         if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) &&
    723             rt->desc.Width == w && rt->desc.Height == h &&
    724             rt->base.info.nr_samples == nr_samples) {
    725             fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB);
    726             context->rt_mask |= 1 << i;
    727             fb->nr_cbufs = i + 1;
    728         } else {
    729             /* Color outputs must match RT slot,
    730              * drivers will have to handle NULL entries for GL, too.
    731              */
    732             fb->cbufs[i] = NULL;
    733         }
    734     }
    735 
    736     if (context->ds && context->ds->desc.Width >= w &&
    737         context->ds->desc.Height >= h &&
    738         context->ds->base.info.nr_samples == nr_samples) {
    739         fb->zsbuf = NineSurface9_GetSurface(context->ds, 0);
    740     } else {
    741         fb->zsbuf = NULL;
    742     }
    743 
    744     fb->width = w;
    745     fb->height = h;
    746 
    747     pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */
    748 
    749     if (is_clear && context->rt_mask == ps_mask)
    750         context->changed.group &= ~NINE_STATE_FB;
    751 }
    752 
    753 static void
    754 update_viewport(struct NineDevice9 *device)
    755 {
    756     struct nine_context *context = &device->context;
    757     const D3DVIEWPORT9 *vport = &context->viewport;
    758     struct pipe_viewport_state pvport;
    759 
    760     /* D3D coordinates are:
    761      * -1 .. +1 for X,Y and
    762      *  0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz)
    763      */
    764     pvport.scale[0] = (float)vport->Width * 0.5f;
    765     pvport.scale[1] = (float)vport->Height * -0.5f;
    766     pvport.scale[2] = vport->MaxZ - vport->MinZ;
    767     pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X;
    768     pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y;
    769     pvport.translate[2] = vport->MinZ;
    770 
    771     /* We found R600 and SI cards have some imprecision
    772      * on the barycentric coordinates used for interpolation.
    773      * Some shaders rely on having something precise.
    774      * We found that the proprietary driver has the imprecision issue,
    775      * except when the render target width and height are powers of two.
    776      * It is using some sort of workaround for these cases
    777      * which covers likely all the cases the applications rely
    778      * on something precise.
    779      * We haven't found the workaround, but it seems like it's better
    780      * for applications if the imprecision is biased towards infinity
    781      * instead of -infinity (which is what measured). So shift slightly
    782      * the viewport: not enough to change rasterization result (in particular
    783      * for multisampling), but enough to make the imprecision biased
    784      * towards infinity. We do this shift only if render target width and
    785      * height are powers of two.
    786      * Solves 'red shadows' bug on UE3 games.
    787      */
    788     if (device->driver_bugs.buggy_barycentrics &&
    789         ((vport->Width & (vport->Width-1)) == 0) &&
    790         ((vport->Height & (vport->Height-1)) == 0)) {
    791         pvport.translate[0] -= 1.0f / 128.0f;
    792         pvport.translate[1] -= 1.0f / 128.0f;
    793     }
    794 
    795     cso_set_viewport(context->cso, &pvport);
    796 }
    797 
    798 /* Loop through VS inputs and pick the vertex elements with the declared
    799  * usage from the vertex declaration, then insert the instance divisor from
    800  * the stream source frequency setting.
    801  */
    802 static void
    803 update_vertex_elements(struct NineDevice9 *device)
    804 {
    805     struct nine_context *context = &device->context;
    806     const struct NineVertexDeclaration9 *vdecl = device->context.vdecl;
    807     const struct NineVertexShader9 *vs;
    808     unsigned n, b, i;
    809     int index;
    810     char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
    811     char used_streams[device->caps.MaxStreams];
    812     int dummy_vbo_stream = -1;
    813     BOOL need_dummy_vbo = FALSE;
    814     struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
    815 
    816     context->stream_usage_mask = 0;
    817     memset(vdecl_index_map, -1, 16);
    818     memset(used_streams, 0, device->caps.MaxStreams);
    819     vs = context->programmable_vs ? context->vs : device->ff.vs;
    820 
    821     if (vdecl) {
    822         for (n = 0; n < vs->num_inputs; ++n) {
    823             DBG("looking up input %u (usage %u) from vdecl(%p)\n",
    824                 n, vs->input_map[n].ndecl, vdecl);
    825 
    826             for (i = 0; i < vdecl->nelems; i++) {
    827                 if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
    828                     vdecl_index_map[n] = i;
    829                     used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
    830                     break;
    831                 }
    832             }
    833             if (vdecl_index_map[n] < 0)
    834                 need_dummy_vbo = TRUE;
    835         }
    836     } else {
    837         /* No vertex declaration. Likely will never happen in practice,
    838          * but we need not crash on this */
    839         need_dummy_vbo = TRUE;
    840     }
    841 
    842     if (need_dummy_vbo) {
    843         for (i = 0; i < device->caps.MaxStreams; i++ ) {
    844             if (!used_streams[i]) {
    845                 dummy_vbo_stream = i;
    846                 break;
    847             }
    848         }
    849     }
    850     /* there are less vertex shader inputs than stream slots,
    851      * so if we need a slot for the dummy vbo, we should have found one */
    852     assert (!need_dummy_vbo || dummy_vbo_stream != -1);
    853 
    854     for (n = 0; n < vs->num_inputs; ++n) {
    855         index = vdecl_index_map[n];
    856         if (index >= 0) {
    857             ve[n] = vdecl->elems[index];
    858             b = ve[n].vertex_buffer_index;
    859             context->stream_usage_mask |= 1 << b;
    860             /* XXX wine just uses 1 here: */
    861             if (context->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
    862                 ve[n].instance_divisor = context->stream_freq[b] & 0x7FFFFF;
    863         } else {
    864             /* if the vertex declaration is incomplete compared to what the
    865              * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
    866              * This is not precised by the spec, but is the behaviour
    867              * tested on win */
    868             ve[n].vertex_buffer_index = dummy_vbo_stream;
    869             ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
    870             ve[n].src_offset = 0;
    871             ve[n].instance_divisor = 0;
    872         }
    873     }
    874 
    875     if (context->dummy_vbo_bound_at != dummy_vbo_stream) {
    876         if (context->dummy_vbo_bound_at >= 0)
    877             context->changed.vtxbuf |= 1 << context->dummy_vbo_bound_at;
    878         if (dummy_vbo_stream >= 0) {
    879             context->changed.vtxbuf |= 1 << dummy_vbo_stream;
    880             context->vbo_bound_done = FALSE;
    881         }
    882         context->dummy_vbo_bound_at = dummy_vbo_stream;
    883     }
    884 
    885     cso_set_vertex_elements(context->cso, vs->num_inputs, ve);
    886 }
    887 
    888 static void
    889 update_vertex_buffers(struct NineDevice9 *device)
    890 {
    891     struct nine_context *context = &device->context;
    892     struct pipe_context *pipe = context->pipe;
    893     struct pipe_vertex_buffer dummy_vtxbuf;
    894     uint32_t mask = context->changed.vtxbuf;
    895     unsigned i;
    896 
    897     DBG("mask=%x\n", mask);
    898 
    899     if (context->dummy_vbo_bound_at >= 0) {
    900         if (!context->vbo_bound_done) {
    901             dummy_vtxbuf.buffer = device->dummy_vbo;
    902             dummy_vtxbuf.stride = 0;
    903             dummy_vtxbuf.user_buffer = NULL;
    904             dummy_vtxbuf.buffer_offset = 0;
    905             pipe->set_vertex_buffers(pipe, context->dummy_vbo_bound_at,
    906                                      1, &dummy_vtxbuf);
    907             context->vbo_bound_done = TRUE;
    908         }
    909         mask &= ~(1 << context->dummy_vbo_bound_at);
    910     }
    911 
    912     for (i = 0; mask; mask >>= 1, ++i) {
    913         if (mask & 1) {
    914             if (context->vtxbuf[i].buffer)
    915                 pipe->set_vertex_buffers(pipe, i, 1, &context->vtxbuf[i]);
    916             else
    917                 pipe->set_vertex_buffers(pipe, i, 1, NULL);
    918         }
    919     }
    920 
    921     context->changed.vtxbuf = 0;
    922 }
    923 
    924 static inline boolean
    925 update_sampler_derived(struct nine_context *context, unsigned s)
    926 {
    927     boolean changed = FALSE;
    928 
    929     if (context->samp[s][NINED3DSAMP_SHADOW] != context->texture[s].shadow) {
    930         changed = TRUE;
    931         context->samp[s][NINED3DSAMP_SHADOW] = context->texture[s].shadow;
    932     }
    933 
    934     if (context->samp[s][NINED3DSAMP_CUBETEX] !=
    935         (context->texture[s].type == D3DRTYPE_CUBETEXTURE)) {
    936         changed = TRUE;
    937         context->samp[s][NINED3DSAMP_CUBETEX] =
    938                 context->texture[s].type == D3DRTYPE_CUBETEXTURE;
    939     }
    940 
    941     if (context->samp[s][D3DSAMP_MIPFILTER] != D3DTEXF_NONE) {
    942         int lod = context->samp[s][D3DSAMP_MAXMIPLEVEL] - context->texture[s].lod;
    943         if (lod < 0)
    944             lod = 0;
    945         if (context->samp[s][NINED3DSAMP_MINLOD] != lod) {
    946             changed = TRUE;
    947             context->samp[s][NINED3DSAMP_MINLOD] = lod;
    948         }
    949     } else {
    950         context->changed.sampler[s] &= ~0x300; /* lod changes irrelevant */
    951     }
    952 
    953     return changed;
    954 }
    955 
    956 /* TODO: add sRGB override to pipe_sampler_state ? */
    957 static void
    958 update_textures_and_samplers(struct NineDevice9 *device)
    959 {
    960     struct nine_context *context = &device->context;
    961     struct pipe_sampler_view *view[NINE_MAX_SAMPLERS];
    962     unsigned num_textures;
    963     unsigned i;
    964     boolean commit_samplers;
    965     uint16_t sampler_mask = context->ps ? context->ps->sampler_mask :
    966                             device->ff.ps->sampler_mask;
    967 
    968     /* TODO: Can we reduce iterations here ? */
    969 
    970     commit_samplers = FALSE;
    971     context->bound_samplers_mask_ps = 0;
    972     for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_PS; ++i) {
    973         const unsigned s = NINE_SAMPLER_PS(i);
    974         int sRGB;
    975 
    976         if (!context->texture[s].enabled && !(sampler_mask & (1 << i))) {
    977             view[i] = NULL;
    978             continue;
    979         }
    980 
    981         if (context->texture[s].enabled) {
    982             sRGB = context->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0;
    983 
    984             view[i] = context->texture[s].view[sRGB];
    985             num_textures = i + 1;
    986 
    987             if (update_sampler_derived(context, s) || (context->changed.sampler[s] & 0x05fe)) {
    988                 context->changed.sampler[s] = 0;
    989                 commit_samplers = TRUE;
    990                 nine_convert_sampler_state(context->cso, s, context->samp[s]);
    991             }
    992         } else {
    993             /* Bind dummy sampler. We do not bind dummy sampler when
    994              * it is not needed because it could add overhead. The
    995              * dummy sampler should have r=g=b=0 and a=1. We do not
    996              * unbind dummy sampler directly when they are not needed
    997              * anymore, but they're going to be removed as long as texture
    998              * or sampler states are changed. */
    999             view[i] = device->dummy_sampler_view;
   1000             num_textures = i + 1;
   1001 
   1002             cso_single_sampler(context->cso, PIPE_SHADER_FRAGMENT,
   1003                                s - NINE_SAMPLER_PS(0), &device->dummy_sampler_state);
   1004 
   1005             commit_samplers = TRUE;
   1006             context->changed.sampler[s] = ~0;
   1007         }
   1008 
   1009         context->bound_samplers_mask_ps |= (1 << s);
   1010     }
   1011 
   1012     cso_set_sampler_views(context->cso, PIPE_SHADER_FRAGMENT, num_textures, view);
   1013 
   1014     if (commit_samplers)
   1015         cso_single_sampler_done(context->cso, PIPE_SHADER_FRAGMENT);
   1016 
   1017     commit_samplers = FALSE;
   1018     sampler_mask = context->programmable_vs ? context->vs->sampler_mask : 0;
   1019     context->bound_samplers_mask_vs = 0;
   1020     for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_VS; ++i) {
   1021         const unsigned s = NINE_SAMPLER_VS(i);
   1022         int sRGB;
   1023 
   1024         if (!context->texture[s].enabled && !(sampler_mask & (1 << i))) {
   1025             view[i] = NULL;
   1026             continue;
   1027         }
   1028 
   1029         if (context->texture[s].enabled) {
   1030             sRGB = context->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0;
   1031 
   1032             view[i] = context->texture[s].view[sRGB];
   1033             num_textures = i + 1;
   1034 
   1035             if (update_sampler_derived(context, s) || (context->changed.sampler[s] & 0x05fe)) {
   1036                 context->changed.sampler[s] = 0;
   1037                 commit_samplers = TRUE;
   1038                 nine_convert_sampler_state(context->cso, s, context->samp[s]);
   1039             }
   1040         } else {
   1041             /* Bind dummy sampler. We do not bind dummy sampler when
   1042              * it is not needed because it could add overhead. The
   1043              * dummy sampler should have r=g=b=0 and a=1. We do not
   1044              * unbind dummy sampler directly when they are not needed
   1045              * anymore, but they're going to be removed as long as texture
   1046              * or sampler states are changed. */
   1047             view[i] = device->dummy_sampler_view;
   1048             num_textures = i + 1;
   1049 
   1050             cso_single_sampler(context->cso, PIPE_SHADER_VERTEX,
   1051                                s - NINE_SAMPLER_VS(0), &device->dummy_sampler_state);
   1052 
   1053             commit_samplers = TRUE;
   1054             context->changed.sampler[s] = ~0;
   1055         }
   1056 
   1057         context->bound_samplers_mask_vs |= (1 << s);
   1058     }
   1059 
   1060     cso_set_sampler_views(context->cso, PIPE_SHADER_VERTEX, num_textures, view);
   1061 
   1062     if (commit_samplers)
   1063         cso_single_sampler_done(context->cso, PIPE_SHADER_VERTEX);
   1064 }
   1065 
   1066 /* State commit only */
   1067 
   1068 static inline void
   1069 commit_blend(struct NineDevice9 *device)
   1070 {
   1071     struct nine_context *context = &device->context;
   1072 
   1073     cso_set_blend(context->cso, &context->pipe_data.blend);
   1074 }
   1075 
   1076 static inline void
   1077 commit_dsa(struct NineDevice9 *device)
   1078 {
   1079     struct nine_context *context = &device->context;
   1080 
   1081     cso_set_depth_stencil_alpha(context->cso, &context->pipe_data.dsa);
   1082 }
   1083 
   1084 static inline void
   1085 commit_scissor(struct NineDevice9 *device)
   1086 {
   1087     struct nine_context *context = &device->context;
   1088     struct pipe_context *pipe = context->pipe;
   1089 
   1090     pipe->set_scissor_states(pipe, 0, 1, &context->scissor);
   1091 }
   1092 
   1093 static inline void
   1094 commit_rasterizer(struct NineDevice9 *device)
   1095 {
   1096     struct nine_context *context = &device->context;
   1097 
   1098     cso_set_rasterizer(context->cso, &context->pipe_data.rast);
   1099 }
   1100 
   1101 static inline void
   1102 commit_index_buffer(struct NineDevice9 *device)
   1103 {
   1104     struct nine_context *context = &device->context;
   1105     struct pipe_context *pipe = context->pipe;
   1106     if (context->idxbuf.buffer)
   1107         pipe->set_index_buffer(pipe, &context->idxbuf);
   1108     else
   1109         pipe->set_index_buffer(pipe, NULL);
   1110 }
   1111 
   1112 static inline void
   1113 commit_vs_constants(struct NineDevice9 *device)
   1114 {
   1115     struct nine_context *context = &device->context;
   1116     struct pipe_context *pipe = context->pipe;
   1117 
   1118     if (unlikely(!context->programmable_vs))
   1119         pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->pipe_data.cb_vs_ff);
   1120     else {
   1121         if (context->swvp) {
   1122             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->pipe_data.cb0_swvp);
   1123             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 1, &context->pipe_data.cb1_swvp);
   1124             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 2, &context->pipe_data.cb2_swvp);
   1125             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 3, &context->pipe_data.cb3_swvp);
   1126         } else {
   1127             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->pipe_data.cb_vs);
   1128         }
   1129     }
   1130 }
   1131 
   1132 static inline void
   1133 commit_ps_constants(struct NineDevice9 *device)
   1134 {
   1135     struct nine_context *context = &device->context;
   1136     struct pipe_context *pipe = context->pipe;
   1137 
   1138     if (unlikely(!context->ps))
   1139         pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->pipe_data.cb_ps_ff);
   1140     else
   1141         pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->pipe_data.cb_ps);
   1142 }
   1143 
   1144 static inline void
   1145 commit_vs(struct NineDevice9 *device)
   1146 {
   1147     struct nine_context *context = &device->context;
   1148 
   1149     context->pipe->bind_vs_state(context->pipe, context->cso_shader.vs);
   1150 }
   1151 
   1152 
   1153 static inline void
   1154 commit_ps(struct NineDevice9 *device)
   1155 {
   1156     struct nine_context *context = &device->context;
   1157 
   1158     context->pipe->bind_fs_state(context->pipe, context->cso_shader.ps);
   1159 }
   1160 /* State Update */
   1161 
   1162 #define NINE_STATE_SHADER_CHANGE_VS \
   1163    (NINE_STATE_VS |         \
   1164     NINE_STATE_TEXTURE |    \
   1165     NINE_STATE_FOG_SHADER | \
   1166     NINE_STATE_POINTSIZE_SHADER | \
   1167     NINE_STATE_SWVP)
   1168 
   1169 #define NINE_STATE_SHADER_CHANGE_PS \
   1170    (NINE_STATE_PS |         \
   1171     NINE_STATE_TEXTURE |    \
   1172     NINE_STATE_FOG_SHADER | \
   1173     NINE_STATE_PS1X_SHADER)
   1174 
   1175 #define NINE_STATE_FREQUENT \
   1176    (NINE_STATE_RASTERIZER | \
   1177     NINE_STATE_TEXTURE |    \
   1178     NINE_STATE_SAMPLER |    \
   1179     NINE_STATE_VS_CONST |   \
   1180     NINE_STATE_PS_CONST |   \
   1181     NINE_STATE_MULTISAMPLE)
   1182 
   1183 #define NINE_STATE_COMMON \
   1184    (NINE_STATE_FB |       \
   1185     NINE_STATE_BLEND |    \
   1186     NINE_STATE_DSA |      \
   1187     NINE_STATE_VIEWPORT | \
   1188     NINE_STATE_VDECL |    \
   1189     NINE_STATE_IDXBUF |   \
   1190     NINE_STATE_STREAMFREQ)
   1191 
   1192 #define NINE_STATE_RARE      \
   1193    (NINE_STATE_SCISSOR |     \
   1194     NINE_STATE_BLEND_COLOR | \
   1195     NINE_STATE_STENCIL_REF | \
   1196     NINE_STATE_SAMPLE_MASK)
   1197 
   1198 static void
   1199 nine_update_state(struct NineDevice9 *device)
   1200 {
   1201     struct nine_context *context = &device->context;
   1202     struct pipe_context *pipe = context->pipe;
   1203     uint32_t group;
   1204 
   1205     DBG("changed state groups: %x\n", context->changed.group);
   1206 
   1207     /* NOTE: We may want to use the cso cache for everything, or let
   1208      * NineDevice9.RestoreNonCSOState actually set the states, then we wouldn't
   1209      * have to care about state being clobbered here and could merge this back
   1210      * into update_textures. Except, we also need to re-validate textures that
   1211      * may be dirty anyway, even if no texture bindings changed.
   1212      */
   1213 
   1214     /* ff_update may change VS/PS dirty bits */
   1215     if (unlikely(!context->programmable_vs || !context->ps))
   1216         nine_ff_update(device);
   1217     group = context->changed.group;
   1218 
   1219     if (group & (NINE_STATE_SHADER_CHANGE_VS | NINE_STATE_SHADER_CHANGE_PS)) {
   1220         if (group & NINE_STATE_SHADER_CHANGE_VS)
   1221             group |= prepare_vs(device, (group & NINE_STATE_VS) != 0); /* may set NINE_STATE_RASTERIZER and NINE_STATE_SAMPLER*/
   1222         if (group & NINE_STATE_SHADER_CHANGE_PS)
   1223             group |= prepare_ps(device, (group & NINE_STATE_PS) != 0);
   1224     }
   1225 
   1226     if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) {
   1227         if (group & NINE_STATE_FB)
   1228             update_framebuffer(device, FALSE);
   1229         if (group & NINE_STATE_BLEND)
   1230             prepare_blend(device);
   1231         if (group & NINE_STATE_DSA)
   1232             prepare_dsa(device);
   1233         if (group & NINE_STATE_VIEWPORT)
   1234             update_viewport(device);
   1235         if (group & (NINE_STATE_VDECL | NINE_STATE_VS | NINE_STATE_STREAMFREQ))
   1236             update_vertex_elements(device);
   1237         if (group & NINE_STATE_IDXBUF)
   1238             commit_index_buffer(device);
   1239     }
   1240 
   1241     if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS | NINE_STATE_SWVP))) {
   1242         if (group & NINE_STATE_MULTISAMPLE)
   1243             group |= check_multisample(device);
   1244         if (group & NINE_STATE_RASTERIZER)
   1245             prepare_rasterizer(device);
   1246         if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
   1247             update_textures_and_samplers(device);
   1248         if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS | NINE_STATE_SWVP)) && context->programmable_vs)
   1249             prepare_vs_constants_userbuf(device);
   1250         if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && context->ps)
   1251             prepare_ps_constants_userbuf(device);
   1252     }
   1253 
   1254     if (context->changed.vtxbuf)
   1255         update_vertex_buffers(device);
   1256 
   1257     if (context->commit & NINE_STATE_COMMIT_BLEND)
   1258         commit_blend(device);
   1259     if (context->commit & NINE_STATE_COMMIT_DSA)
   1260         commit_dsa(device);
   1261     if (context->commit & NINE_STATE_COMMIT_RASTERIZER)
   1262         commit_rasterizer(device);
   1263     if (context->commit & NINE_STATE_COMMIT_CONST_VS)
   1264         commit_vs_constants(device);
   1265     if (context->commit & NINE_STATE_COMMIT_CONST_PS)
   1266         commit_ps_constants(device);
   1267     if (context->commit & NINE_STATE_COMMIT_VS)
   1268         commit_vs(device);
   1269     if (context->commit & NINE_STATE_COMMIT_PS)
   1270         commit_ps(device);
   1271 
   1272     context->commit = 0;
   1273 
   1274     if (unlikely(context->changed.ucp)) {
   1275         pipe->set_clip_state(pipe, &context->clip);
   1276         context->changed.ucp = FALSE;
   1277     }
   1278 
   1279     if (unlikely(group & NINE_STATE_RARE)) {
   1280         if (group & NINE_STATE_SCISSOR)
   1281             commit_scissor(device);
   1282         if (group & NINE_STATE_BLEND_COLOR) {
   1283             struct pipe_blend_color color;
   1284             d3dcolor_to_rgba(&color.color[0], context->rs[D3DRS_BLENDFACTOR]);
   1285             pipe->set_blend_color(pipe, &color);
   1286         }
   1287         if (group & NINE_STATE_SAMPLE_MASK) {
   1288             if (context->rt[0]->desc.MultiSampleType <= D3DMULTISAMPLE_NONMASKABLE) {
   1289                 pipe->set_sample_mask(pipe, ~0);
   1290             } else {
   1291                 pipe->set_sample_mask(pipe, context->rs[D3DRS_MULTISAMPLEMASK]);
   1292             }
   1293         }
   1294         if (group & NINE_STATE_STENCIL_REF) {
   1295             struct pipe_stencil_ref ref;
   1296             ref.ref_value[0] = context->rs[D3DRS_STENCILREF];
   1297             ref.ref_value[1] = ref.ref_value[0];
   1298             pipe->set_stencil_ref(pipe, &ref);
   1299         }
   1300     }
   1301 
   1302     context->changed.group &=
   1303         (NINE_STATE_FF | NINE_STATE_VS_CONST | NINE_STATE_PS_CONST);
   1304 
   1305     DBG("finished\n");
   1306 }
   1307 
   1308 #define RESZ_CODE 0x7fa05000
   1309 
   1310 static void
   1311 NineDevice9_ResolveZ( struct NineDevice9 *device )
   1312 {
   1313     struct nine_context *context = &device->context;
   1314     const struct util_format_description *desc;
   1315     struct NineSurface9 *source = context->ds;
   1316     struct pipe_resource *src, *dst;
   1317     struct pipe_blit_info blit;
   1318 
   1319     DBG("RESZ resolve\n");
   1320 
   1321     if (!source || !context->texture[0].enabled ||
   1322         context->texture[0].type != D3DRTYPE_TEXTURE)
   1323         return;
   1324 
   1325     src = source->base.resource;
   1326     dst = context->texture[0].resource;
   1327 
   1328     if (!src || !dst)
   1329         return;
   1330 
   1331     /* check dst is depth format. we know already for src */
   1332     desc = util_format_description(dst->format);
   1333     if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
   1334         return;
   1335 
   1336     memset(&blit, 0, sizeof(blit));
   1337     blit.src.resource = src;
   1338     blit.src.level = 0;
   1339     blit.src.format = src->format;
   1340     blit.src.box.z = 0;
   1341     blit.src.box.depth = 1;
   1342     blit.src.box.x = 0;
   1343     blit.src.box.y = 0;
   1344     blit.src.box.width = src->width0;
   1345     blit.src.box.height = src->height0;
   1346 
   1347     blit.dst.resource = dst;
   1348     blit.dst.level = 0;
   1349     blit.dst.format = dst->format;
   1350     blit.dst.box.z = 0;
   1351     blit.dst.box.depth = 1;
   1352     blit.dst.box.x = 0;
   1353     blit.dst.box.y = 0;
   1354     blit.dst.box.width = dst->width0;
   1355     blit.dst.box.height = dst->height0;
   1356 
   1357     blit.mask = PIPE_MASK_ZS;
   1358     blit.filter = PIPE_TEX_FILTER_NEAREST;
   1359     blit.scissor_enable = FALSE;
   1360 
   1361     context->pipe->blit(context->pipe, &blit);
   1362 }
   1363 
   1364 #define ALPHA_TO_COVERAGE_ENABLE   MAKEFOURCC('A', '2', 'M', '1')
   1365 #define ALPHA_TO_COVERAGE_DISABLE  MAKEFOURCC('A', '2', 'M', '0')
   1366 
   1367 /* Nine_context functions.
   1368  * Serialized through CSMT macros.
   1369  */
   1370 
   1371 static void
   1372 nine_context_set_texture_apply(struct NineDevice9 *device,
   1373                                DWORD stage,
   1374                                BOOL enabled,
   1375                                BOOL shadow,
   1376                                DWORD lod,
   1377                                D3DRESOURCETYPE type,
   1378                                uint8_t pstype,
   1379                                struct pipe_resource *res,
   1380                                struct pipe_sampler_view *view0,
   1381                                struct pipe_sampler_view *view1);
   1382 static void
   1383 nine_context_set_stream_source_apply(struct NineDevice9 *device,
   1384                                     UINT StreamNumber,
   1385                                     struct pipe_resource *res,
   1386                                     UINT OffsetInBytes,
   1387                                     UINT Stride);
   1388 
   1389 static void
   1390 nine_context_set_indices_apply(struct NineDevice9 *device,
   1391                                struct pipe_resource *res,
   1392                                UINT IndexSize,
   1393                                UINT OffsetInBytes);
   1394 
   1395 static void
   1396 nine_context_set_pixel_shader_constant_i_transformed(struct NineDevice9 *device,
   1397                                                      UINT StartRegister,
   1398                                                      const int *pConstantData,
   1399                                                      unsigned pConstantData_size,
   1400                                                      UINT Vector4iCount);
   1401 
   1402 CSMT_ITEM_NO_WAIT(nine_context_set_render_state,
   1403                   ARG_VAL(D3DRENDERSTATETYPE, State),
   1404                   ARG_VAL(DWORD, Value))
   1405 {
   1406     struct nine_context *context = &device->context;
   1407 
   1408     /* Amd hacks (equivalent to GL extensions) */
   1409     if (unlikely(State == D3DRS_POINTSIZE)) {
   1410         if (Value == RESZ_CODE) {
   1411             NineDevice9_ResolveZ(device);
   1412             return;
   1413         }
   1414 
   1415         if (Value == ALPHA_TO_COVERAGE_ENABLE ||
   1416             Value == ALPHA_TO_COVERAGE_DISABLE) {
   1417             context->rs[NINED3DRS_ALPHACOVERAGE] = (Value == ALPHA_TO_COVERAGE_ENABLE);
   1418             context->changed.group |= NINE_STATE_BLEND;
   1419             return;
   1420         }
   1421     }
   1422 
   1423     /* NV hack */
   1424     if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) {
   1425         if (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && context->rs[NINED3DRS_ALPHACOVERAGE])) {
   1426             context->rs[NINED3DRS_ALPHACOVERAGE] = (Value == D3DFMT_ATOC) ? 3 : 0;
   1427             context->rs[NINED3DRS_ALPHACOVERAGE] &= context->rs[D3DRS_ALPHATESTENABLE] ? 3 : 2;
   1428             context->changed.group |= NINE_STATE_BLEND;
   1429             return;
   1430         }
   1431     }
   1432     if (unlikely(State == D3DRS_ALPHATESTENABLE && (context->rs[NINED3DRS_ALPHACOVERAGE] & 2))) {
   1433         DWORD alphacoverage_prev = context->rs[NINED3DRS_ALPHACOVERAGE];
   1434         context->rs[NINED3DRS_ALPHACOVERAGE] = (Value ? 3 : 2);
   1435         if (context->rs[NINED3DRS_ALPHACOVERAGE] != alphacoverage_prev)
   1436             context->changed.group |= NINE_STATE_BLEND;
   1437     }
   1438 
   1439     context->rs[State] = nine_fix_render_state_value(State, Value);
   1440     context->changed.group |= nine_render_state_group[State];
   1441 }
   1442 
   1443 CSMT_ITEM_NO_WAIT(nine_context_set_texture_apply,
   1444                   ARG_VAL(DWORD, stage),
   1445                   ARG_VAL(BOOL, enabled),
   1446                   ARG_VAL(BOOL, shadow),
   1447                   ARG_VAL(DWORD, lod),
   1448                   ARG_VAL(D3DRESOURCETYPE, type),
   1449                   ARG_VAL(uint8_t, pstype),
   1450                   ARG_BIND_RES(struct pipe_resource, res),
   1451                   ARG_BIND_VIEW(struct pipe_sampler_view, view0),
   1452                   ARG_BIND_VIEW(struct pipe_sampler_view, view1))
   1453 {
   1454     struct nine_context *context = &device->context;
   1455 
   1456     context->texture[stage].enabled = enabled;
   1457     context->samplers_shadow &= ~(1 << stage);
   1458     context->samplers_shadow |= shadow << stage;
   1459     context->texture[stage].shadow = shadow;
   1460     context->texture[stage].lod = lod;
   1461     context->texture[stage].type = type;
   1462     context->texture[stage].pstype = pstype;
   1463     pipe_resource_reference(&context->texture[stage].resource, res);
   1464     pipe_sampler_view_reference(&context->texture[stage].view[0], view0);
   1465     pipe_sampler_view_reference(&context->texture[stage].view[1], view1);
   1466 
   1467     context->changed.group |= NINE_STATE_TEXTURE;
   1468 }
   1469 
   1470 void
   1471 nine_context_set_texture(struct NineDevice9 *device,
   1472                          DWORD Stage,
   1473                          struct NineBaseTexture9 *tex)
   1474 {
   1475     BOOL enabled = FALSE;
   1476     BOOL shadow = FALSE;
   1477     DWORD lod = 0;
   1478     D3DRESOURCETYPE type = D3DRTYPE_TEXTURE;
   1479     uint8_t pstype = 0;
   1480     struct pipe_resource *res = NULL;
   1481     struct pipe_sampler_view *view0 = NULL, *view1 = NULL;
   1482 
   1483     /* For managed pool, the data can be initially incomplete.
   1484      * In that case, the texture is rebound later
   1485      * (in NineBaseTexture9_Validate/NineBaseTexture9_UploadSelf). */
   1486     if (tex && tex->base.resource) {
   1487         enabled = TRUE;
   1488         shadow = tex->shadow;
   1489         lod = tex->managed.lod;
   1490         type = tex->base.type;
   1491         pstype = tex->pstype;
   1492         res = tex->base.resource;
   1493         view0 = NineBaseTexture9_GetSamplerView(tex, 0);
   1494         view1 = NineBaseTexture9_GetSamplerView(tex, 1);
   1495     }
   1496 
   1497     nine_context_set_texture_apply(device, Stage, enabled,
   1498                                    shadow, lod, type, pstype,
   1499                                    res, view0, view1);
   1500 }
   1501 
   1502 CSMT_ITEM_NO_WAIT(nine_context_set_sampler_state,
   1503                   ARG_VAL(DWORD, Sampler),
   1504                   ARG_VAL(D3DSAMPLERSTATETYPE, Type),
   1505                   ARG_VAL(DWORD, Value))
   1506 {
   1507     struct nine_context *context = &device->context;
   1508 
   1509     if (unlikely(!nine_check_sampler_state_value(Type, Value)))
   1510         return;
   1511 
   1512     context->samp[Sampler][Type] = Value;
   1513     context->changed.group |= NINE_STATE_SAMPLER;
   1514     context->changed.sampler[Sampler] |= 1 << Type;
   1515 }
   1516 
   1517 CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_apply,
   1518                   ARG_VAL(UINT, StreamNumber),
   1519                   ARG_BIND_RES(struct pipe_resource, res),
   1520                   ARG_VAL(UINT, OffsetInBytes),
   1521                   ARG_VAL(UINT, Stride))
   1522 {
   1523     struct nine_context *context = &device->context;
   1524     const unsigned i = StreamNumber;
   1525 
   1526     context->vtxbuf[i].stride = Stride;
   1527     context->vtxbuf[i].buffer_offset = OffsetInBytes;
   1528     pipe_resource_reference(&context->vtxbuf[i].buffer, res);
   1529 
   1530     context->changed.vtxbuf |= 1 << StreamNumber;
   1531 }
   1532 
   1533 void
   1534 nine_context_set_stream_source(struct NineDevice9 *device,
   1535                                UINT StreamNumber,
   1536                                struct NineVertexBuffer9 *pVBuf9,
   1537                                UINT OffsetInBytes,
   1538                                UINT Stride)
   1539 {
   1540     struct pipe_resource *res = NULL;
   1541     unsigned offset = 0;
   1542 
   1543     if (pVBuf9)
   1544         res = NineVertexBuffer9_GetResource(pVBuf9, &offset);
   1545     /* in the future when there is internal offset, add it
   1546      * to OffsetInBytes */
   1547 
   1548     nine_context_set_stream_source_apply(device, StreamNumber,
   1549                                          res, offset + OffsetInBytes,
   1550                                          Stride);
   1551 }
   1552 
   1553 CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_freq,
   1554                   ARG_VAL(UINT, StreamNumber),
   1555                   ARG_VAL(UINT, Setting))
   1556 {
   1557     struct nine_context *context = &device->context;
   1558 
   1559     context->stream_freq[StreamNumber] = Setting;
   1560 
   1561     if (Setting & D3DSTREAMSOURCE_INSTANCEDATA)
   1562         context->stream_instancedata_mask |= 1 << StreamNumber;
   1563     else
   1564         context->stream_instancedata_mask &= ~(1 << StreamNumber);
   1565 
   1566     if (StreamNumber != 0)
   1567         context->changed.group |= NINE_STATE_STREAMFREQ;
   1568 }
   1569 
   1570 CSMT_ITEM_NO_WAIT(nine_context_set_indices_apply,
   1571                   ARG_BIND_RES(struct pipe_resource, res),
   1572                   ARG_VAL(UINT, IndexSize),
   1573                   ARG_VAL(UINT, OffsetInBytes))
   1574 {
   1575     struct nine_context *context = &device->context;
   1576 
   1577     context->idxbuf.index_size = IndexSize;
   1578     context->idxbuf.offset = OffsetInBytes;
   1579     pipe_resource_reference(&context->idxbuf.buffer, res);
   1580     context->idxbuf.user_buffer = NULL;
   1581 
   1582     context->changed.group |= NINE_STATE_IDXBUF;
   1583 }
   1584 
   1585 void
   1586 nine_context_set_indices(struct NineDevice9 *device,
   1587                          struct NineIndexBuffer9 *idxbuf)
   1588 {
   1589     const struct pipe_index_buffer *pipe_idxbuf;
   1590     struct pipe_resource *res = NULL;
   1591     UINT IndexSize = 0;
   1592     UINT OffsetInBytes = 0;
   1593 
   1594     if (idxbuf) {
   1595         pipe_idxbuf = NineIndexBuffer9_GetBuffer(idxbuf);
   1596         IndexSize = pipe_idxbuf->index_size;
   1597         res = pipe_idxbuf->buffer;
   1598         OffsetInBytes = pipe_idxbuf->offset;
   1599     }
   1600 
   1601     nine_context_set_indices_apply(device, res, IndexSize, OffsetInBytes);
   1602 }
   1603 
   1604 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_declaration,
   1605                   ARG_BIND_REF(struct NineVertexDeclaration9, vdecl))
   1606 {
   1607     struct nine_context *context = &device->context;
   1608     BOOL was_programmable_vs = context->programmable_vs;
   1609 
   1610     nine_bind(&context->vdecl, vdecl);
   1611 
   1612     context->programmable_vs = context->vs && !(context->vdecl && context->vdecl->position_t);
   1613     if (was_programmable_vs != context->programmable_vs) {
   1614         context->commit |= NINE_STATE_COMMIT_CONST_VS;
   1615         context->changed.group |= NINE_STATE_VS;
   1616     }
   1617 
   1618     context->changed.group |= NINE_STATE_VDECL;
   1619 }
   1620 
   1621 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader,
   1622                   ARG_BIND_REF(struct NineVertexShader9, pShader))
   1623 {
   1624     struct nine_context *context = &device->context;
   1625     BOOL was_programmable_vs = context->programmable_vs;
   1626 
   1627     nine_bind(&context->vs, pShader);
   1628 
   1629     context->programmable_vs = context->vs && !(context->vdecl && context->vdecl->position_t);
   1630 
   1631     /* ff -> non-ff: commit back non-ff constants */
   1632     if (!was_programmable_vs && context->programmable_vs)
   1633         context->commit |= NINE_STATE_COMMIT_CONST_VS;
   1634 
   1635     context->changed.group |= NINE_STATE_VS;
   1636 }
   1637 
   1638 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_f,
   1639                   ARG_VAL(UINT, StartRegister),
   1640                   ARG_MEM(float, pConstantData),
   1641                   ARG_MEM_SIZE(unsigned, pConstantData_size),
   1642                   ARG_VAL(UINT, Vector4fCount))
   1643 {
   1644     struct nine_context *context = &device->context;
   1645     float *vs_const_f = device->may_swvp ? context->vs_const_f_swvp : context->vs_const_f;
   1646 
   1647     memcpy(&vs_const_f[StartRegister * 4],
   1648            pConstantData,
   1649            pConstantData_size);
   1650 
   1651     if (device->may_swvp) {
   1652         Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister;
   1653         if (StartRegister < NINE_MAX_CONST_F)
   1654             memcpy(&context->vs_const_f[StartRegister * 4],
   1655                    pConstantData,
   1656                    Vector4fCount * 4 * sizeof(context->vs_const_f[0]));
   1657     }
   1658 
   1659     context->changed.vs_const_f = TRUE;
   1660     context->changed.group |= NINE_STATE_VS_CONST;
   1661 }
   1662 
   1663 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_i,
   1664                   ARG_VAL(UINT, StartRegister),
   1665                   ARG_MEM(int, pConstantData),
   1666                   ARG_MEM_SIZE(unsigned, pConstantData_size),
   1667                   ARG_VAL(UINT, Vector4iCount))
   1668 {
   1669     struct nine_context *context = &device->context;
   1670     int i;
   1671 
   1672     if (device->driver_caps.vs_integer) {
   1673         memcpy(&context->vs_const_i[4 * StartRegister],
   1674                pConstantData,
   1675                pConstantData_size);
   1676     } else {
   1677         for (i = 0; i < Vector4iCount; i++) {
   1678             context->vs_const_i[4 * (StartRegister + i)] = fui((float)(pConstantData[4 * i]));
   1679             context->vs_const_i[4 * (StartRegister + i) + 1] = fui((float)(pConstantData[4 * i + 1]));
   1680             context->vs_const_i[4 * (StartRegister + i) + 2] = fui((float)(pConstantData[4 * i + 2]));
   1681             context->vs_const_i[4 * (StartRegister + i) + 3] = fui((float)(pConstantData[4 * i + 3]));
   1682         }
   1683     }
   1684 
   1685     context->changed.vs_const_i = TRUE;
   1686     context->changed.group |= NINE_STATE_VS_CONST;
   1687 }
   1688 
   1689 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_b,
   1690                   ARG_VAL(UINT, StartRegister),
   1691                   ARG_MEM(BOOL, pConstantData),
   1692                   ARG_MEM_SIZE(unsigned, pConstantData_size),
   1693                   ARG_VAL(UINT, BoolCount))
   1694 {
   1695     struct nine_context *context = &device->context;
   1696     int i;
   1697     uint32_t bool_true = device->driver_caps.vs_integer ? 0xFFFFFFFF : fui(1.0f);
   1698 
   1699     (void) pConstantData_size;
   1700 
   1701     for (i = 0; i < BoolCount; i++)
   1702         context->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
   1703 
   1704     context->changed.vs_const_b = TRUE;
   1705     context->changed.group |= NINE_STATE_VS_CONST;
   1706 }
   1707 
   1708 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader,
   1709                   ARG_BIND_REF(struct NinePixelShader9, ps))
   1710 {
   1711     struct nine_context *context = &device->context;
   1712     unsigned old_mask = context->ps ? context->ps->rt_mask : 1;
   1713     unsigned mask;
   1714 
   1715     /* ff -> non-ff: commit back non-ff constants */
   1716     if (!context->ps && ps)
   1717         context->commit |= NINE_STATE_COMMIT_CONST_PS;
   1718 
   1719     nine_bind(&context->ps, ps);
   1720 
   1721     context->changed.group |= NINE_STATE_PS;
   1722 
   1723     mask = context->ps ? context->ps->rt_mask : 1;
   1724     /* We need to update cbufs if the pixel shader would
   1725      * write to different render targets */
   1726     if (mask != old_mask)
   1727         context->changed.group |= NINE_STATE_FB;
   1728 }
   1729 
   1730 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_f,
   1731                   ARG_VAL(UINT, StartRegister),
   1732                   ARG_MEM(float, pConstantData),
   1733                   ARG_MEM_SIZE(unsigned, pConstantData_size),
   1734                   ARG_VAL(UINT, Vector4fCount))
   1735 {
   1736     struct nine_context *context = &device->context;
   1737 
   1738     memcpy(&context->ps_const_f[StartRegister * 4],
   1739            pConstantData,
   1740            pConstantData_size);
   1741 
   1742     context->changed.ps_const_f = TRUE;
   1743     context->changed.group |= NINE_STATE_PS_CONST;
   1744 }
   1745 
   1746 /* For stateblocks */
   1747 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i_transformed,
   1748                   ARG_VAL(UINT, StartRegister),
   1749                   ARG_MEM(int, pConstantData),
   1750                   ARG_MEM_SIZE(unsigned, pConstantData_size),
   1751                   ARG_VAL(UINT, Vector4iCount))
   1752 {
   1753     struct nine_context *context = &device->context;
   1754 
   1755     memcpy(&context->ps_const_i[StartRegister][0],
   1756            pConstantData,
   1757            Vector4iCount * sizeof(context->ps_const_i[0]));
   1758 
   1759     context->changed.ps_const_i = TRUE;
   1760     context->changed.group |= NINE_STATE_PS_CONST;
   1761 }
   1762 
   1763 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i,
   1764                   ARG_VAL(UINT, StartRegister),
   1765                   ARG_MEM(int, pConstantData),
   1766                   ARG_MEM_SIZE(unsigned, pConstantData_size),
   1767                   ARG_VAL(UINT, Vector4iCount))
   1768 {
   1769     struct nine_context *context = &device->context;
   1770     int i;
   1771 
   1772     if (device->driver_caps.ps_integer) {
   1773         memcpy(&context->ps_const_i[StartRegister][0],
   1774                pConstantData,
   1775                pConstantData_size);
   1776     } else {
   1777         for (i = 0; i < Vector4iCount; i++) {
   1778             context->ps_const_i[StartRegister+i][0] = fui((float)(pConstantData[4*i]));
   1779             context->ps_const_i[StartRegister+i][1] = fui((float)(pConstantData[4*i+1]));
   1780             context->ps_const_i[StartRegister+i][2] = fui((float)(pConstantData[4*i+2]));
   1781             context->ps_const_i[StartRegister+i][3] = fui((float)(pConstantData[4*i+3]));
   1782         }
   1783     }
   1784     context->changed.ps_const_i = TRUE;
   1785     context->changed.group |= NINE_STATE_PS_CONST;
   1786 }
   1787 
   1788 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_b,
   1789                   ARG_VAL(UINT, StartRegister),
   1790                   ARG_MEM(BOOL, pConstantData),
   1791                   ARG_MEM_SIZE(unsigned, pConstantData_size),
   1792                   ARG_VAL(UINT, BoolCount))
   1793 {
   1794     struct nine_context *context = &device->context;
   1795     int i;
   1796     uint32_t bool_true = device->driver_caps.ps_integer ? 0xFFFFFFFF : fui(1.0f);
   1797 
   1798     (void) pConstantData_size;
   1799 
   1800     for (i = 0; i < BoolCount; i++)
   1801         context->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
   1802 
   1803     context->changed.ps_const_b = TRUE;
   1804     context->changed.group |= NINE_STATE_PS_CONST;
   1805 }
   1806 
   1807 /* XXX: use resource, as resource might change */
   1808 CSMT_ITEM_NO_WAIT(nine_context_set_render_target,
   1809                   ARG_VAL(DWORD, RenderTargetIndex),
   1810                   ARG_BIND_REF(struct NineSurface9, rt))
   1811 {
   1812     struct nine_context *context = &device->context;
   1813     const unsigned i = RenderTargetIndex;
   1814 
   1815     if (i == 0) {
   1816         context->viewport.X = 0;
   1817         context->viewport.Y = 0;
   1818         context->viewport.Width = rt->desc.Width;
   1819         context->viewport.Height = rt->desc.Height;
   1820         context->viewport.MinZ = 0.0f;
   1821         context->viewport.MaxZ = 1.0f;
   1822 
   1823         context->scissor.minx = 0;
   1824         context->scissor.miny = 0;
   1825         context->scissor.maxx = rt->desc.Width;
   1826         context->scissor.maxy = rt->desc.Height;
   1827 
   1828         context->changed.group |= NINE_STATE_VIEWPORT | NINE_STATE_SCISSOR | NINE_STATE_MULTISAMPLE;
   1829 
   1830         if (context->rt[0] &&
   1831             (context->rt[0]->desc.MultiSampleType <= D3DMULTISAMPLE_NONMASKABLE) !=
   1832             (rt->desc.MultiSampleType <= D3DMULTISAMPLE_NONMASKABLE))
   1833             context->changed.group |= NINE_STATE_SAMPLE_MASK;
   1834     }
   1835 
   1836     if (context->rt[i] != rt) {
   1837        nine_bind(&context->rt[i], rt);
   1838        context->changed.group |= NINE_STATE_FB;
   1839     }
   1840 }
   1841 
   1842 /* XXX: use resource instead of ds, as resource might change */
   1843 CSMT_ITEM_NO_WAIT(nine_context_set_depth_stencil,
   1844                   ARG_BIND_REF(struct NineSurface9, ds))
   1845 {
   1846     struct nine_context *context = &device->context;
   1847 
   1848     nine_bind(&context->ds, ds);
   1849     context->changed.group |= NINE_STATE_FB;
   1850 }
   1851 
   1852 CSMT_ITEM_NO_WAIT(nine_context_set_viewport,
   1853                   ARG_COPY_REF(D3DVIEWPORT9, viewport))
   1854 {
   1855     struct nine_context *context = &device->context;
   1856 
   1857     context->viewport = *viewport;
   1858     context->changed.group |= NINE_STATE_VIEWPORT;
   1859 }
   1860 
   1861 CSMT_ITEM_NO_WAIT(nine_context_set_scissor,
   1862                   ARG_COPY_REF(struct pipe_scissor_state, scissor))
   1863 {
   1864     struct nine_context *context = &device->context;
   1865 
   1866     context->scissor = *scissor;
   1867     context->changed.group |= NINE_STATE_SCISSOR;
   1868 }
   1869 
   1870 CSMT_ITEM_NO_WAIT(nine_context_set_transform,
   1871                   ARG_VAL(D3DTRANSFORMSTATETYPE, State),
   1872                   ARG_COPY_REF(D3DMATRIX, pMatrix))
   1873 {
   1874     struct nine_context *context = &device->context;
   1875     D3DMATRIX *M = nine_state_access_transform(&context->ff, State, TRUE);
   1876 
   1877     *M = *pMatrix;
   1878     context->ff.changed.transform[State / 32] |= 1 << (State % 32);
   1879     context->changed.group |= NINE_STATE_FF;
   1880 }
   1881 
   1882 CSMT_ITEM_NO_WAIT(nine_context_set_material,
   1883                   ARG_COPY_REF(D3DMATERIAL9, pMaterial))
   1884 {
   1885     struct nine_context *context = &device->context;
   1886 
   1887     context->ff.material = *pMaterial;
   1888     context->changed.group |= NINE_STATE_FF_MATERIAL;
   1889 }
   1890 
   1891 CSMT_ITEM_NO_WAIT(nine_context_set_light,
   1892                   ARG_VAL(DWORD, Index),
   1893                   ARG_COPY_REF(D3DLIGHT9, pLight))
   1894 {
   1895     struct nine_context *context = &device->context;
   1896 
   1897     (void)nine_state_set_light(&context->ff, Index, pLight);
   1898     context->changed.group |= NINE_STATE_FF_LIGHTING;
   1899 }
   1900 
   1901 
   1902 /* For stateblocks */
   1903 static void
   1904 nine_context_light_enable_stateblock(struct NineDevice9 *device,
   1905                                      const uint16_t active_light[NINE_MAX_LIGHTS_ACTIVE], /* TODO: use pointer that convey size for csmt */
   1906                                      unsigned int num_lights_active)
   1907 {
   1908     struct nine_context *context = &device->context;
   1909 
   1910     /* TODO: Use CSMT_* to avoid calling nine_csmt_process */
   1911     nine_csmt_process(device);
   1912     memcpy(context->ff.active_light, active_light, NINE_MAX_LIGHTS_ACTIVE * sizeof(context->ff.active_light[0]));
   1913     context->ff.num_lights_active = num_lights_active;
   1914     context->changed.group |= NINE_STATE_FF_LIGHTING;
   1915 }
   1916 
   1917 CSMT_ITEM_NO_WAIT(nine_context_light_enable,
   1918                   ARG_VAL(DWORD, Index),
   1919                   ARG_VAL(BOOL, Enable))
   1920 {
   1921     struct nine_context *context = &device->context;
   1922 
   1923     nine_state_light_enable(&context->ff, &context->changed.group, Index, Enable);
   1924 }
   1925 
   1926 CSMT_ITEM_NO_WAIT(nine_context_set_texture_stage_state,
   1927                   ARG_VAL(DWORD, Stage),
   1928                   ARG_VAL(D3DTEXTURESTAGESTATETYPE, Type),
   1929                   ARG_VAL(DWORD, Value))
   1930 {
   1931     struct nine_context *context = &device->context;
   1932     int bumpmap_index = -1;
   1933 
   1934     context->ff.tex_stage[Stage][Type] = Value;
   1935     switch (Type) {
   1936     case D3DTSS_BUMPENVMAT00:
   1937         bumpmap_index = 4 * Stage;
   1938         break;
   1939     case D3DTSS_BUMPENVMAT01:
   1940         bumpmap_index = 4 * Stage + 1;
   1941         break;
   1942     case D3DTSS_BUMPENVMAT10:
   1943         bumpmap_index = 4 * Stage + 2;
   1944         break;
   1945     case D3DTSS_BUMPENVMAT11:
   1946         bumpmap_index = 4 * Stage + 3;
   1947         break;
   1948     case D3DTSS_BUMPENVLSCALE:
   1949         bumpmap_index = 4 * 8 + 2 * Stage;
   1950         break;
   1951     case D3DTSS_BUMPENVLOFFSET:
   1952         bumpmap_index = 4 * 8 + 2 * Stage + 1;
   1953         break;
   1954     case D3DTSS_TEXTURETRANSFORMFLAGS:
   1955         context->changed.group |= NINE_STATE_PS1X_SHADER;
   1956         break;
   1957     default:
   1958         break;
   1959     }
   1960 
   1961     if (bumpmap_index >= 0) {
   1962         context->bumpmap_vars[bumpmap_index] = Value;
   1963         context->changed.group |= NINE_STATE_PS_CONST;
   1964     }
   1965 
   1966     context->changed.group |= NINE_STATE_FF_PSSTAGES;
   1967     context->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
   1968 }
   1969 
   1970 CSMT_ITEM_NO_WAIT(nine_context_set_clip_plane,
   1971                   ARG_VAL(DWORD, Index),
   1972                   ARG_COPY_REF(struct nine_clipplane, pPlane))
   1973 {
   1974     struct nine_context *context = &device->context;
   1975 
   1976     memcpy(&context->clip.ucp[Index][0], pPlane, sizeof(context->clip.ucp[0]));
   1977     context->changed.ucp = TRUE;
   1978 }
   1979 
   1980 CSMT_ITEM_NO_WAIT(nine_context_set_swvp,
   1981                   ARG_VAL(boolean, swvp))
   1982 {
   1983     struct nine_context *context = &device->context;
   1984 
   1985     context->swvp = swvp;
   1986     context->changed.group |= NINE_STATE_SWVP;
   1987 }
   1988 
   1989 #if 0
   1990 
   1991 void
   1992 nine_context_apply_stateblock(struct NineDevice9 *device,
   1993                               const struct nine_state *src)
   1994 {
   1995     struct nine_context *context = &device->context;
   1996     int i;
   1997 
   1998     context->changed.group |= src->changed.group;
   1999 
   2000     for (i = 0; i < ARRAY_SIZE(src->changed.rs); ++i) {
   2001         uint32_t m = src->changed.rs[i];
   2002         while (m) {
   2003             const int r = ffs(m) - 1;
   2004             m &= ~(1 << r);
   2005             context->rs[i * 32 + r] = nine_fix_render_state_value(i * 32 + r, src->rs_advertised[i * 32 + r]);
   2006         }
   2007     }
   2008 
   2009     /* Textures */
   2010     if (src->changed.texture) {
   2011         uint32_t m = src->changed.texture;
   2012         unsigned s;
   2013 
   2014         for (s = 0; m; ++s, m >>= 1) {
   2015             struct NineBaseTexture9 *tex = src->texture[s];
   2016             if (!(m & 1))
   2017                 continue;
   2018             nine_context_set_texture(device, s, tex);
   2019         }
   2020     }
   2021 
   2022     /* Sampler state */
   2023     if (src->changed.group & NINE_STATE_SAMPLER) {
   2024         unsigned s;
   2025 
   2026         for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
   2027             uint32_t m = src->changed.sampler[s];
   2028             while (m) {
   2029                 const int i = ffs(m) - 1;
   2030                 m &= ~(1 << i);
   2031                 if (nine_check_sampler_state_value(i, src->samp_advertised[s][i]))
   2032                     context->samp[s][i] = src->samp_advertised[s][i];
   2033             }
   2034             context->changed.sampler[s] |= src->changed.sampler[s];
   2035         }
   2036     }
   2037 
   2038     /* Vertex buffers */
   2039     if (src->changed.vtxbuf | src->changed.stream_freq) {
   2040         uint32_t m = src->changed.vtxbuf | src->changed.stream_freq;
   2041         for (i = 0; m; ++i, m >>= 1) {
   2042             if (src->changed.vtxbuf & (1 << i)) {
   2043                 if (src->stream[i]) {
   2044                     unsigned offset = 0;
   2045                     pipe_resource_reference(&context->vtxbuf[i].buffer,
   2046                         src->stream[i] ? NineVertexBuffer9_GetResource(src->stream[i], &offset) : NULL);
   2047                     context->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset + offset;
   2048                     context->vtxbuf[i].stride = src->vtxbuf[i].stride;
   2049                 }
   2050             }
   2051             if (src->changed.stream_freq & (1 << i)) {
   2052                 context->stream_freq[i] = src->stream_freq[i];
   2053                 if (src->stream_freq[i] & D3DSTREAMSOURCE_INSTANCEDATA)
   2054                     context->stream_instancedata_mask |= 1 << i;
   2055                 else
   2056                     context->stream_instancedata_mask &= ~(1 << i);
   2057             }
   2058         }
   2059         context->changed.vtxbuf |= src->changed.vtxbuf;
   2060     }
   2061 
   2062     /* Index buffer */
   2063     if (src->changed.group & NINE_STATE_IDXBUF)
   2064         nine_context_set_indices(device, src->idxbuf);
   2065 
   2066     /* Vertex declaration */
   2067     if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl)
   2068         nine_context_set_vertex_declaration(device, src->vdecl);
   2069 
   2070     /* Vertex shader */
   2071     if (src->changed.group & NINE_STATE_VS)
   2072         nine_bind(&context->vs, src->vs);
   2073 
   2074     context->programmable_vs = context->vs && !(context->vdecl && context->vdecl->position_t);
   2075 
   2076     /* Pixel shader */
   2077     if (src->changed.group & NINE_STATE_PS)
   2078         nine_bind(&context->ps, src->ps);
   2079 
   2080     /* Vertex constants */
   2081     if (src->changed.group & NINE_STATE_VS_CONST) {
   2082         struct nine_range *r;
   2083         if (device->may_swvp) {
   2084             for (r = src->changed.vs_const_f; r; r = r->next) {
   2085                 int bgn = r->bgn;
   2086                 int end = r->end;
   2087                 memcpy(&context->vs_const_f_swvp[bgn * 4],
   2088                        &src->vs_const_f[bgn * 4],
   2089                        (end - bgn) * 4 * sizeof(float));
   2090                 if (bgn < device->max_vs_const_f) {
   2091                     end = MIN2(end, device->max_vs_const_f);
   2092                     memcpy(&context->vs_const_f[bgn * 4],
   2093                            &src->vs_const_f[bgn * 4],
   2094                            (end - bgn) * 4 * sizeof(float));
   2095                 }
   2096             }
   2097         } else {
   2098             for (r = src->changed.vs_const_f; r; r = r->next) {
   2099                 memcpy(&context->vs_const_f[r->bgn * 4],
   2100                        &src->vs_const_f[r->bgn * 4],
   2101                        (r->end - r->bgn) * 4 * sizeof(float));
   2102             }
   2103         }
   2104         for (r = src->changed.vs_const_i; r; r = r->next) {
   2105             memcpy(&context->vs_const_i[r->bgn * 4],
   2106                    &src->vs_const_i[r->bgn * 4],
   2107                    (r->end - r->bgn) * 4 * sizeof(int));
   2108         }
   2109         for (r = src->changed.vs_const_b; r; r = r->next) {
   2110             memcpy(&context->vs_const_b[r->bgn],
   2111                    &src->vs_const_b[r->bgn],
   2112                    (r->end - r->bgn) * sizeof(int));
   2113         }
   2114         context->changed.vs_const_f = !!src->changed.vs_const_f;
   2115         context->changed.vs_const_i = !!src->changed.vs_const_i;
   2116         context->changed.vs_const_b = !!src->changed.vs_const_b;
   2117     }
   2118 
   2119     /* Pixel constants */
   2120     if (src->changed.group & NINE_STATE_PS_CONST) {
   2121         struct nine_range *r;
   2122         for (r = src->changed.ps_const_f; r; r = r->next) {
   2123             memcpy(&context->ps_const_f[r->bgn * 4],
   2124                    &src->ps_const_f[r->bgn * 4],
   2125                    (r->end - r->bgn) * 4 * sizeof(float));
   2126         }
   2127         if (src->changed.ps_const_i) {
   2128             uint16_t m = src->changed.ps_const_i;
   2129             for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
   2130                 if (m & 1)
   2131                     memcpy(context->ps_const_i[i], src->ps_const_i[i], 4 * sizeof(int));
   2132         }
   2133         if (src->changed.ps_const_b) {
   2134             uint16_t m = src->changed.ps_const_b;
   2135             for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
   2136                 if (m & 1)
   2137                     context->ps_const_b[i] = src->ps_const_b[i];
   2138         }
   2139         context->changed.ps_const_f = !!src->changed.ps_const_f;
   2140         context->changed.ps_const_i = !!src->changed.ps_const_i;
   2141         context->changed.ps_const_b = !!src->changed.ps_const_b;
   2142     }
   2143 
   2144     /* Viewport */
   2145     if (src->changed.group & NINE_STATE_VIEWPORT)
   2146         context->viewport = src->viewport;
   2147 
   2148     /* Scissor */
   2149     if (src->changed.group & NINE_STATE_SCISSOR)
   2150         context->scissor = src->scissor;
   2151 
   2152     /* User Clip Planes */
   2153     if (src->changed.ucp) {
   2154         for (i = 0; i < PIPE_MAX_CLIP_PLANES; ++i)
   2155             if (src->changed.ucp & (1 << i))
   2156                 memcpy(context->clip.ucp[i],
   2157                        src->clip.ucp[i], sizeof(src->clip.ucp[0]));
   2158         context->changed.ucp = TRUE;
   2159     }
   2160 
   2161     if (!(src->changed.group & NINE_STATE_FF))
   2162         return;
   2163 
   2164     /* Fixed function state. */
   2165 
   2166     if (src->changed.group & NINE_STATE_FF_MATERIAL)
   2167         context->ff.material = src->ff.material;
   2168 
   2169     if (src->changed.group & NINE_STATE_FF_PSSTAGES) {
   2170         unsigned s;
   2171         for (s = 0; s < NINE_MAX_TEXTURE_STAGES; ++s) {
   2172             for (i = 0; i < NINED3DTSS_COUNT; ++i)
   2173                 if (src->ff.changed.tex_stage[s][i / 32] & (1 << (i % 32)))
   2174                     context->ff.tex_stage[s][i] = src->ff.tex_stage[s][i];
   2175         }
   2176     }
   2177     if (src->changed.group & NINE_STATE_FF_LIGHTING) {
   2178         unsigned num_lights = MAX2(context->ff.num_lights, src->ff.num_lights);
   2179         /* Can happen if the stateblock had recorded the creation of
   2180          * new lights. */
   2181         if (context->ff.num_lights < num_lights) {
   2182             context->ff.light = REALLOC(context->ff.light,
   2183                                     context->ff.num_lights * sizeof(D3DLIGHT9),
   2184                                     num_lights * sizeof(D3DLIGHT9));
   2185             memset(&context->ff.light[context->ff.num_lights], 0, (num_lights - context->ff.num_lights) * sizeof(D3DLIGHT9));
   2186             for (i = context->ff.num_lights; i < num_lights; ++i)
   2187                 context->ff.light[i].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
   2188             context->ff.num_lights = num_lights;
   2189         }
   2190         /* src->ff.num_lights < num_lights has been handled before */
   2191         assert (src->ff.num_lights == num_lights);
   2192 
   2193         for (i = 0; i < num_lights; ++i)
   2194             if (src->ff.light[i].Type != NINED3DLIGHT_INVALID)
   2195                 context->ff.light[i] = src->ff.light[i];
   2196 
   2197         memcpy(context->ff.active_light, src->ff.active_light, sizeof(src->ff.active_light) );
   2198         context->ff.num_lights_active = src->ff.num_lights_active;
   2199     }
   2200     if (src->changed.group & NINE_STATE_FF_VSTRANSF) {
   2201         for (i = 0; i < ARRAY_SIZE(src->ff.changed.transform); ++i) {
   2202             unsigned s;
   2203             if (!src->ff.changed.transform[i])
   2204                 continue;
   2205             for (s = i * 32; s < (i * 32 + 32); ++s) {
   2206                 if (!(src->ff.changed.transform[i] & (1 << (s % 32))))
   2207                     continue;
   2208                 *nine_state_access_transform(&context->ff, s, TRUE) =
   2209                     *nine_state_access_transform( /* const because !alloc */
   2210                         (struct nine_ff_state *)&src->ff, s, FALSE);
   2211             }
   2212             context->ff.changed.transform[i] |= src->ff.changed.transform[i];
   2213         }
   2214     }
   2215 }
   2216 
   2217 #endif
   2218 
   2219 /* Do not write to nine_context directly. Slower,
   2220  * but works with csmt. TODO: write a special csmt version that
   2221  * would record the list of commands as much as possible,
   2222  * and use the version above else.
   2223  */
   2224 void
   2225 nine_context_apply_stateblock(struct NineDevice9 *device,
   2226                               const struct nine_state *src)
   2227 {
   2228     int i;
   2229 
   2230     /* No need to apply src->changed.group, since all calls do
   2231     * set context->changed.group */
   2232 
   2233     for (i = 0; i < ARRAY_SIZE(src->changed.rs); ++i) {
   2234         uint32_t m = src->changed.rs[i];
   2235         while (m) {
   2236             const int r = ffs(m) - 1;
   2237             m &= ~(1 << r);
   2238             nine_context_set_render_state(device, i * 32 + r, src->rs_advertised[i * 32 + r]);
   2239         }
   2240     }
   2241 
   2242     /* Textures */
   2243     if (src->changed.texture) {
   2244         uint32_t m = src->changed.texture;
   2245         unsigned s;
   2246 
   2247         for (s = 0; m; ++s, m >>= 1) {
   2248             struct NineBaseTexture9 *tex = src->texture[s];
   2249             if (!(m & 1))
   2250                 continue;
   2251             nine_context_set_texture(device, s, tex);
   2252         }
   2253     }
   2254 
   2255     /* Sampler state */
   2256     if (src->changed.group & NINE_STATE_SAMPLER) {
   2257         unsigned s;
   2258 
   2259         for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
   2260             uint32_t m = src->changed.sampler[s];
   2261             while (m) {
   2262                 const int i = ffs(m) - 1;
   2263                 m &= ~(1 << i);
   2264                 nine_context_set_sampler_state(device, s, i, src->samp_advertised[s][i]);
   2265             }
   2266         }
   2267     }
   2268 
   2269     /* Vertex buffers */
   2270     if (src->changed.vtxbuf | src->changed.stream_freq) {
   2271         uint32_t m = src->changed.vtxbuf | src->changed.stream_freq;
   2272         for (i = 0; m; ++i, m >>= 1) {
   2273             if (src->changed.vtxbuf & (1 << i))
   2274                 nine_context_set_stream_source(device, i, src->stream[i], src->vtxbuf[i].buffer_offset, src->vtxbuf[i].stride);
   2275             if (src->changed.stream_freq & (1 << i))
   2276                 nine_context_set_stream_source_freq(device, i, src->stream_freq[i]);
   2277         }
   2278     }
   2279 
   2280     /* Index buffer */
   2281     if (src->changed.group & NINE_STATE_IDXBUF)
   2282         nine_context_set_indices(device, src->idxbuf);
   2283 
   2284     /* Vertex declaration */
   2285     if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl)
   2286         nine_context_set_vertex_declaration(device, src->vdecl);
   2287 
   2288     /* Vertex shader */
   2289     if (src->changed.group & NINE_STATE_VS)
   2290         nine_context_set_vertex_shader(device, src->vs);
   2291 
   2292     /* Pixel shader */
   2293     if (src->changed.group & NINE_STATE_PS)
   2294         nine_context_set_pixel_shader(device, src->ps);
   2295 
   2296     /* Vertex constants */
   2297     if (src->changed.group & NINE_STATE_VS_CONST) {
   2298         struct nine_range *r;
   2299         for (r = src->changed.vs_const_f; r; r = r->next)
   2300             nine_context_set_vertex_shader_constant_f(device, r->bgn,
   2301                                                       &src->vs_const_f[r->bgn * 4],
   2302                                                       sizeof(float[4]) * (r->end - r->bgn),
   2303                                                       r->end - r->bgn);
   2304         for (r = src->changed.vs_const_i; r; r = r->next)
   2305             nine_context_set_vertex_shader_constant_i(device, r->bgn,
   2306                                                       &src->vs_const_i[r->bgn * 4],
   2307                                                       sizeof(int[4]) * (r->end - r->bgn),
   2308                                                       r->end - r->bgn);
   2309         for (r = src->changed.vs_const_b; r; r = r->next)
   2310             nine_context_set_vertex_shader_constant_b(device, r->bgn,
   2311                                                       &src->vs_const_b[r->bgn * 4],
   2312                                                       sizeof(BOOL) * (r->end - r->bgn),
   2313                                                       r->end - r->bgn);
   2314     }
   2315 
   2316     /* Pixel constants */
   2317     if (src->changed.group & NINE_STATE_PS_CONST) {
   2318         struct nine_range *r;
   2319         for (r = src->changed.ps_const_f; r; r = r->next)
   2320             nine_context_set_pixel_shader_constant_f(device, r->bgn,
   2321                                                      &src->ps_const_f[r->bgn * 4],
   2322                                                      sizeof(float[4]) * (r->end - r->bgn),
   2323                                                      r->end - r->bgn);
   2324         if (src->changed.ps_const_i) {
   2325             uint16_t m = src->changed.ps_const_i;
   2326             for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
   2327                 if (m & 1)
   2328                     nine_context_set_pixel_shader_constant_i_transformed(device, i,
   2329                                                                          src->ps_const_i[i], sizeof(int[4]), 1);
   2330         }
   2331         if (src->changed.ps_const_b) {
   2332             uint16_t m = src->changed.ps_const_b;
   2333             for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
   2334                 if (m & 1)
   2335                     nine_context_set_pixel_shader_constant_b(device, i,
   2336                                                              &src->ps_const_b[i], sizeof(BOOL), 1);
   2337         }
   2338     }
   2339 
   2340     /* Viewport */
   2341     if (src->changed.group & NINE_STATE_VIEWPORT)
   2342         nine_context_set_viewport(device, &src->viewport);
   2343 
   2344     /* Scissor */
   2345     if (src->changed.group & NINE_STATE_SCISSOR)
   2346         nine_context_set_scissor(device, &src->scissor);
   2347 
   2348     /* User Clip Planes */
   2349     if (src->changed.ucp)
   2350         for (i = 0; i < PIPE_MAX_CLIP_PLANES; ++i)
   2351             if (src->changed.ucp & (1 << i))
   2352                 nine_context_set_clip_plane(device, i, (struct nine_clipplane*)&src->clip.ucp[i][0]);
   2353 
   2354     if (!(src->changed.group & NINE_STATE_FF))
   2355         return;
   2356 
   2357     /* Fixed function state. */
   2358 
   2359     if (src->changed.group & NINE_STATE_FF_MATERIAL)
   2360         nine_context_set_material(device, &src->ff.material);
   2361 
   2362     if (src->changed.group & NINE_STATE_FF_PSSTAGES) {
   2363         unsigned s;
   2364         for (s = 0; s < NINE_MAX_TEXTURE_STAGES; ++s) {
   2365             for (i = 0; i < NINED3DTSS_COUNT; ++i)
   2366                 if (src->ff.changed.tex_stage[s][i / 32] & (1 << (i % 32)))
   2367                    nine_context_set_texture_stage_state(device, s, i, src->ff.tex_stage[s][i]);
   2368         }
   2369     }
   2370     if (src->changed.group & NINE_STATE_FF_LIGHTING) {
   2371         for (i = 0; i < src->ff.num_lights; ++i)
   2372             if (src->ff.light[i].Type != NINED3DLIGHT_INVALID)
   2373                 nine_context_set_light(device, i, &src->ff.light[i]);
   2374 
   2375         nine_context_light_enable_stateblock(device, src->ff.active_light, src->ff.num_lights_active);
   2376     }
   2377     if (src->changed.group & NINE_STATE_FF_VSTRANSF) {
   2378         for (i = 0; i < ARRAY_SIZE(src->ff.changed.transform); ++i) {
   2379             unsigned s;
   2380             if (!src->ff.changed.transform[i])
   2381                 continue;
   2382             for (s = i * 32; s < (i * 32 + 32); ++s) {
   2383                 if (!(src->ff.changed.transform[i] & (1 << (s % 32))))
   2384                     continue;
   2385                 nine_context_set_transform(device, s,
   2386                                            nine_state_access_transform(
   2387                                                (struct nine_ff_state *)&src->ff,
   2388                                                                        s, FALSE));
   2389             }
   2390         }
   2391     }
   2392 }
   2393 
   2394 static void
   2395 nine_update_state_framebuffer_clear(struct NineDevice9 *device)
   2396 {
   2397     struct nine_context *context = &device->context;
   2398 
   2399     if (context->changed.group & NINE_STATE_FB)
   2400         update_framebuffer(device, TRUE);
   2401 }
   2402 
   2403 CSMT_ITEM_NO_WAIT(nine_context_clear_fb,
   2404                   ARG_VAL(DWORD, Count),
   2405                   ARG_COPY_REF(D3DRECT, pRects),
   2406                   ARG_VAL(DWORD, Flags),
   2407                   ARG_VAL(D3DCOLOR, Color),
   2408                   ARG_VAL(float, Z),
   2409                   ARG_VAL(DWORD, Stencil))
   2410 {
   2411     struct nine_context *context = &device->context;
   2412     const int sRGB = context->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
   2413     struct pipe_surface *cbuf, *zsbuf;
   2414     struct pipe_context *pipe = context->pipe;
   2415     struct NineSurface9 *zsbuf_surf = context->ds;
   2416     struct NineSurface9 *rt;
   2417     unsigned bufs = 0;
   2418     unsigned r, i;
   2419     union pipe_color_union rgba;
   2420     unsigned rt_mask = 0;
   2421     D3DRECT rect;
   2422 
   2423     nine_update_state_framebuffer_clear(device);
   2424 
   2425     if (Flags & D3DCLEAR_TARGET) bufs |= PIPE_CLEAR_COLOR;
   2426     /* Ignore Z buffer if not bound */
   2427     if (context->pipe_data.fb.zsbuf != NULL) {
   2428         if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
   2429         if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
   2430     }
   2431     if (!bufs)
   2432         return;
   2433     d3dcolor_to_pipe_color_union(&rgba, Color);
   2434 
   2435     rect.x1 = context->viewport.X;
   2436     rect.y1 = context->viewport.Y;
   2437     rect.x2 = context->viewport.Width + rect.x1;
   2438     rect.y2 = context->viewport.Height + rect.y1;
   2439 
   2440     /* Both rectangles apply, which is weird, but that's D3D9. */
   2441     if (context->rs[D3DRS_SCISSORTESTENABLE]) {
   2442         rect.x1 = MAX2(rect.x1, context->scissor.minx);
   2443         rect.y1 = MAX2(rect.y1, context->scissor.miny);
   2444         rect.x2 = MIN2(rect.x2, context->scissor.maxx);
   2445         rect.y2 = MIN2(rect.y2, context->scissor.maxy);
   2446     }
   2447 
   2448     if (Count) {
   2449         /* Maybe apps like to specify a large rect ? */
   2450         if (pRects[0].x1 <= rect.x1 && pRects[0].x2 >= rect.x2 &&
   2451             pRects[0].y1 <= rect.y1 && pRects[0].y2 >= rect.y2) {
   2452             DBG("First rect covers viewport.\n");
   2453             Count = 0;
   2454             pRects = NULL;
   2455         }
   2456     }
   2457 
   2458     if (rect.x1 >= context->pipe_data.fb.width || rect.y1 >= context->pipe_data.fb.height)
   2459         return;
   2460 
   2461     for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
   2462         if (context->rt[i] && context->rt[i]->desc.Format != D3DFMT_NULL)
   2463             rt_mask |= 1 << i;
   2464     }
   2465 
   2466     /* fast path, clears everything at once */
   2467     if (!Count &&
   2468         (!(bufs & PIPE_CLEAR_COLOR) || (rt_mask == context->rt_mask)) &&
   2469         rect.x1 == 0 && rect.y1 == 0 &&
   2470         /* Case we clear only render target. Check clear region vs rt. */
   2471         ((!(bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
   2472          rect.x2 >= context->pipe_data.fb.width &&
   2473          rect.y2 >= context->pipe_data.fb.height) ||
   2474         /* Case we clear depth buffer (and eventually rt too).
   2475          * depth buffer size is always >= rt size. Compare to clear region */
   2476         ((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
   2477          rect.x2 >= zsbuf_surf->desc.Width &&
   2478          rect.y2 >= zsbuf_surf->desc.Height))) {
   2479         DBG("Clear fast path\n");
   2480         pipe->clear(pipe, bufs, &rgba, Z, Stencil);
   2481         return;
   2482     }
   2483 
   2484     if (!Count) {
   2485         Count = 1;
   2486         pRects = &rect;
   2487     }
   2488 
   2489     for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
   2490         rt = context->rt[i];
   2491         if (!rt || rt->desc.Format == D3DFMT_NULL ||
   2492             !(bufs & PIPE_CLEAR_COLOR))
   2493             continue; /* save space, compiler should hoist this */
   2494         cbuf = NineSurface9_GetSurface(rt, sRGB);
   2495         for (r = 0; r < Count; ++r) {
   2496             /* Don't trust users to pass these in the right order. */
   2497             unsigned x1 = MIN2(pRects[r].x1, pRects[r].x2);
   2498             unsigned y1 = MIN2(pRects[r].y1, pRects[r].y2);
   2499             unsigned x2 = MAX2(pRects[r].x1, pRects[r].x2);
   2500             unsigned y2 = MAX2(pRects[r].y1, pRects[r].y2);
   2501 #ifndef NINE_LAX
   2502             /* Drop negative rectangles (like wine expects). */
   2503             if (pRects[r].x1 > pRects[r].x2) continue;
   2504             if (pRects[r].y1 > pRects[r].y2) continue;
   2505 #endif
   2506 
   2507             x1 = MAX2(x1, rect.x1);
   2508             y1 = MAX2(y1, rect.y1);
   2509             x2 = MIN3(x2, rect.x2, rt->desc.Width);
   2510             y2 = MIN3(y2, rect.y2, rt->desc.Height);
   2511 
   2512             DBG("Clearing (%u..%u)x(%u..%u)\n", x1, x2, y1, y2);
   2513             pipe->clear_render_target(pipe, cbuf, &rgba,
   2514                                       x1, y1, x2 - x1, y2 - y1, false);
   2515         }
   2516     }
   2517     if (!(bufs & PIPE_CLEAR_DEPTHSTENCIL))
   2518         return;
   2519 
   2520     bufs &= PIPE_CLEAR_DEPTHSTENCIL;
   2521 
   2522     for (r = 0; r < Count; ++r) {
   2523         unsigned x1 = MIN2(pRects[r].x1, pRects[r].x2);
   2524         unsigned y1 = MIN2(pRects[r].y1, pRects[r].y2);
   2525         unsigned x2 = MAX2(pRects[r].x1, pRects[r].x2);
   2526         unsigned y2 = MAX2(pRects[r].y1, pRects[r].y2);
   2527 #ifndef NINE_LAX
   2528         /* Drop negative rectangles. */
   2529         if (pRects[r].x1 > pRects[r].x2) continue;
   2530         if (pRects[r].y1 > pRects[r].y2) continue;
   2531 #endif
   2532 
   2533         x1 = MIN2(x1, rect.x1);
   2534         y1 = MIN2(y1, rect.y1);
   2535         x2 = MIN3(x2, rect.x2, zsbuf_surf->desc.Width);
   2536         y2 = MIN3(y2, rect.y2, zsbuf_surf->desc.Height);
   2537 
   2538         zsbuf = NineSurface9_GetSurface(zsbuf_surf, 0);
   2539         assert(zsbuf);
   2540         pipe->clear_depth_stencil(pipe, zsbuf, bufs, Z, Stencil,
   2541                                   x1, y1, x2 - x1, y2 - y1, false);
   2542     }
   2543     return;
   2544 }
   2545 
   2546 
   2547 static inline void
   2548 init_draw_info(struct pipe_draw_info *info,
   2549                struct NineDevice9 *dev, D3DPRIMITIVETYPE type, UINT count)
   2550 {
   2551     info->mode = d3dprimitivetype_to_pipe_prim(type);
   2552     info->count = prim_count_to_vertex_count(type, count);
   2553     info->start_instance = 0;
   2554     info->instance_count = 1;
   2555     if (dev->context.stream_instancedata_mask & dev->context.stream_usage_mask)
   2556         info->instance_count = MAX2(dev->context.stream_freq[0] & 0x7FFFFF, 1);
   2557     info->primitive_restart = FALSE;
   2558     info->restart_index = 0;
   2559     info->count_from_stream_output = NULL;
   2560     info->indirect = NULL;
   2561     info->indirect_params = NULL;
   2562 }
   2563 
   2564 CSMT_ITEM_NO_WAIT(nine_context_draw_primitive,
   2565                   ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
   2566                   ARG_VAL(UINT, StartVertex),
   2567                   ARG_VAL(UINT, PrimitiveCount))
   2568 {
   2569     struct nine_context *context = &device->context;
   2570     struct pipe_draw_info info;
   2571 
   2572     nine_update_state(device);
   2573 
   2574     init_draw_info(&info, device, PrimitiveType, PrimitiveCount);
   2575     info.indexed = FALSE;
   2576     info.start = StartVertex;
   2577     info.index_bias = 0;
   2578     info.min_index = info.start;
   2579     info.max_index = info.count - 1;
   2580 
   2581     context->pipe->draw_vbo(context->pipe, &info);
   2582 }
   2583 
   2584 CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive,
   2585                   ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
   2586                    ARG_VAL(INT, BaseVertexIndex),
   2587                    ARG_VAL(UINT, MinVertexIndex),
   2588                    ARG_VAL(UINT, NumVertices),
   2589                    ARG_VAL(UINT, StartIndex),
   2590                    ARG_VAL(UINT, PrimitiveCount))
   2591 {
   2592     struct nine_context *context = &device->context;
   2593     struct pipe_draw_info info;
   2594 
   2595     nine_update_state(device);
   2596 
   2597     init_draw_info(&info, device, PrimitiveType, PrimitiveCount);
   2598     info.indexed = TRUE;
   2599     info.start = StartIndex;
   2600     info.index_bias = BaseVertexIndex;
   2601     /* These don't include index bias: */
   2602     info.min_index = MinVertexIndex;
   2603     info.max_index = MinVertexIndex + NumVertices - 1;
   2604 
   2605     context->pipe->draw_vbo(context->pipe, &info);
   2606 }
   2607 
   2608 CSMT_ITEM_NO_WAIT(nine_context_draw_primitive_from_vtxbuf,
   2609                   ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
   2610                   ARG_VAL(UINT, PrimitiveCount),
   2611                   ARG_BIND_BUF(struct pipe_vertex_buffer, vtxbuf))
   2612 {
   2613     struct nine_context *context = &device->context;
   2614     struct pipe_draw_info info;
   2615 
   2616     nine_update_state(device);
   2617 
   2618     init_draw_info(&info, device, PrimitiveType, PrimitiveCount);
   2619     info.indexed = FALSE;
   2620     info.start = 0;
   2621     info.index_bias = 0;
   2622     info.min_index = 0;
   2623     info.max_index = info.count - 1;
   2624 
   2625     context->pipe->set_vertex_buffers(context->pipe, 0, 1, vtxbuf);
   2626 
   2627     context->pipe->draw_vbo(context->pipe, &info);
   2628 }
   2629 
   2630 CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf,
   2631                   ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
   2632                   ARG_VAL(UINT, MinVertexIndex),
   2633                   ARG_VAL(UINT, NumVertices),
   2634                   ARG_VAL(UINT, PrimitiveCount),
   2635                   ARG_BIND_BUF(struct pipe_vertex_buffer, vbuf),
   2636                   ARG_BIND_BUF(struct pipe_index_buffer, ibuf))
   2637 {
   2638     struct nine_context *context = &device->context;
   2639     struct pipe_draw_info info;
   2640 
   2641     nine_update_state(device);
   2642 
   2643     init_draw_info(&info, device, PrimitiveType, PrimitiveCount);
   2644     info.indexed = TRUE;
   2645     info.start = 0;
   2646     info.index_bias = 0;
   2647     info.min_index = MinVertexIndex;
   2648     info.max_index = MinVertexIndex + NumVertices - 1;
   2649     context->pipe->set_vertex_buffers(context->pipe, 0, 1, vbuf);
   2650     context->pipe->set_index_buffer(context->pipe, ibuf);
   2651 
   2652     context->pipe->draw_vbo(context->pipe, &info);
   2653 }
   2654 
   2655 CSMT_ITEM_NO_WAIT(nine_context_resource_copy_region,
   2656                   ARG_BIND_REF(struct NineUnknown, dst),
   2657                   ARG_BIND_REF(struct NineUnknown, src),
   2658                   ARG_BIND_RES(struct pipe_resource, dst_res),
   2659                   ARG_VAL(unsigned, dst_level),
   2660                   ARG_COPY_REF(struct pipe_box, dst_box),
   2661                   ARG_BIND_RES(struct pipe_resource, src_res),
   2662                   ARG_VAL(unsigned, src_level),
   2663                   ARG_COPY_REF(struct pipe_box, src_box))
   2664 {
   2665     struct nine_context *context = &device->context;
   2666 
   2667     (void) dst;
   2668     (void) src;
   2669 
   2670     context->pipe->resource_copy_region(context->pipe,
   2671             dst_res, dst_level,
   2672             dst_box->x, dst_box->y, dst_box->z,
   2673             src_res, src_level,
   2674             src_box);
   2675 }
   2676 
   2677 CSMT_ITEM_NO_WAIT(nine_context_blit,
   2678                   ARG_BIND_REF(struct NineUnknown, dst),
   2679                   ARG_BIND_REF(struct NineUnknown, src),
   2680                   ARG_BIND_BLIT(struct pipe_blit_info, blit))
   2681 {
   2682     struct nine_context *context = &device->context;
   2683 
   2684     (void) dst;
   2685     (void) src;
   2686 
   2687     context->pipe->blit(context->pipe, blit);
   2688 }
   2689 
   2690 CSMT_ITEM_NO_WAIT(nine_context_clear_render_target,
   2691                   ARG_BIND_REF(struct NineSurface9, surface),
   2692                   ARG_VAL(D3DCOLOR, color),
   2693                   ARG_VAL(UINT, x),
   2694                   ARG_VAL(UINT, y),
   2695                   ARG_VAL(UINT, width),
   2696                   ARG_VAL(UINT, height))
   2697 {
   2698     struct nine_context *context = &device->context;
   2699     struct pipe_surface *surf;
   2700     union pipe_color_union rgba;
   2701 
   2702     d3dcolor_to_pipe_color_union(&rgba, color);
   2703     surf = NineSurface9_GetSurface(surface, 0);
   2704     context->pipe->clear_render_target(context->pipe, surf, &rgba, x, y, width, height, false);
   2705 }
   2706 
   2707 CSMT_ITEM_NO_WAIT(nine_context_gen_mipmap,
   2708                   ARG_BIND_REF(struct NineUnknown, dst),
   2709                   ARG_BIND_RES(struct pipe_resource, res),
   2710                   ARG_VAL(UINT, base_level),
   2711                   ARG_VAL(UINT, last_level),
   2712                   ARG_VAL(UINT, first_layer),
   2713                   ARG_VAL(UINT, last_layer),
   2714                   ARG_VAL(UINT, filter))
   2715 {
   2716     struct nine_context *context = &device->context;
   2717 
   2718     /* We just bind dst for the bind count */
   2719     (void)dst;
   2720 
   2721     util_gen_mipmap(context->pipe, res, res->format, base_level,
   2722                     last_level, first_layer, last_layer, filter);
   2723 }
   2724 
   2725 CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload,
   2726                                ARG_BIND_RES(struct pipe_resource, res),
   2727                                ARG_VAL(unsigned, offset),
   2728                                ARG_VAL(unsigned, size),
   2729                                ARG_VAL(const void *, data))
   2730 {
   2731     struct nine_context *context = &device->context;
   2732 
   2733     context->pipe->buffer_subdata(context->pipe, res, 0, offset, size, data);
   2734 }
   2735 
   2736 CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_box_upload,
   2737                                ARG_BIND_REF(struct NineUnknown, dst),
   2738                                ARG_BIND_RES(struct pipe_resource, res),
   2739                                ARG_VAL(unsigned, level),
   2740                                ARG_COPY_REF(struct pipe_box, dst_box),
   2741                                ARG_VAL(enum pipe_format, src_format),
   2742                                ARG_VAL(const void *, src),
   2743                                ARG_VAL(unsigned, src_stride),
   2744                                ARG_VAL(unsigned, src_layer_stride),
   2745                                ARG_COPY_REF(struct pipe_box, src_box))
   2746 {
   2747     struct nine_context *context = &device->context;
   2748     struct pipe_context *pipe = context->pipe;
   2749     struct pipe_transfer *transfer = NULL;
   2750     uint8_t *map;
   2751 
   2752     /* We just bind dst for the bind count */
   2753     (void)dst;
   2754 
   2755     map = pipe->transfer_map(pipe,
   2756                              res,
   2757                              level,
   2758                              PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE,
   2759                              dst_box, &transfer);
   2760     if (!map)
   2761         return;
   2762 
   2763     /* Note: if formats are the sames, it will revert
   2764      * to normal memcpy */
   2765     (void) util_format_translate_3d(res->format,
   2766                                     map, transfer->stride,
   2767                                     transfer->layer_stride,
   2768                                     0, 0, 0,
   2769                                     src_format,
   2770                                     src, src_stride,
   2771                                     src_layer_stride,
   2772                                     src_box->x, src_box->y, src_box->z,
   2773                                     dst_box->width, dst_box->height,
   2774                                     dst_box->depth);
   2775 
   2776     pipe_transfer_unmap(pipe, transfer);
   2777 }
   2778 
   2779 struct pipe_query *
   2780 nine_context_create_query(struct NineDevice9 *device, unsigned query_type)
   2781 {
   2782     struct pipe_context *pipe;
   2783     struct pipe_query *res;
   2784 
   2785     pipe = nine_context_get_pipe_acquire(device);
   2786     res = pipe->create_query(pipe, query_type, 0);
   2787     nine_context_get_pipe_release(device);
   2788     return res;
   2789 }
   2790 
   2791 CSMT_ITEM_DO_WAIT(nine_context_destroy_query,
   2792                   ARG_REF(struct pipe_query, query))
   2793 {
   2794     struct nine_context *context = &device->context;
   2795 
   2796     context->pipe->destroy_query(context->pipe, query);
   2797 }
   2798 
   2799 CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_begin_query,
   2800                                ARG_REF(struct pipe_query, query))
   2801 {
   2802     struct nine_context *context = &device->context;
   2803 
   2804     (void) context->pipe->begin_query(context->pipe, query);
   2805 }
   2806 
   2807 CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_end_query,
   2808                                ARG_REF(struct pipe_query, query))
   2809 {
   2810     struct nine_context *context = &device->context;
   2811 
   2812     (void) context->pipe->end_query(context->pipe, query);
   2813 }
   2814 
   2815 boolean
   2816 nine_context_get_query_result(struct NineDevice9 *device, struct pipe_query *query,
   2817                               unsigned *counter, boolean flush, boolean wait,
   2818                               union pipe_query_result *result)
   2819 {
   2820     struct pipe_context *pipe;
   2821     boolean ret;
   2822 
   2823     if (wait)
   2824         nine_csmt_process(device);
   2825     else if (p_atomic_read(counter) > 0) {
   2826         if (flush && device->csmt_active)
   2827             nine_queue_flush(device->csmt_ctx->pool);
   2828         DBG("Pending begin/end. Returning\n");
   2829         return false;
   2830     }
   2831 
   2832     pipe = nine_context_get_pipe_acquire(device);
   2833     ret = pipe->get_query_result(pipe, query, wait, result);
   2834     nine_context_get_pipe_release(device);
   2835 
   2836     DBG("Query result %s\n", ret ? "found" : "not yet available");
   2837     return ret;
   2838 }
   2839 
   2840 /* State defaults */
   2841 
   2842 static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] =
   2843 {
   2844  /* [D3DRS_ZENABLE] = D3DZB_TRUE; wine: auto_depth_stencil */
   2845     [D3DRS_ZENABLE] = D3DZB_FALSE,
   2846     [D3DRS_FILLMODE] = D3DFILL_SOLID,
   2847     [D3DRS_SHADEMODE] = D3DSHADE_GOURAUD,
   2848 /*  [D3DRS_LINEPATTERN] = 0x00000000, */
   2849     [D3DRS_ZWRITEENABLE] = TRUE,
   2850     [D3DRS_ALPHATESTENABLE] = FALSE,
   2851     [D3DRS_LASTPIXEL] = TRUE,
   2852     [D3DRS_SRCBLEND] = D3DBLEND_ONE,
   2853     [D3DRS_DESTBLEND] = D3DBLEND_ZERO,
   2854     [D3DRS_CULLMODE] = D3DCULL_CCW,
   2855     [D3DRS_ZFUNC] = D3DCMP_LESSEQUAL,
   2856     [D3DRS_ALPHAFUNC] = D3DCMP_ALWAYS,
   2857     [D3DRS_ALPHAREF] = 0,
   2858     [D3DRS_DITHERENABLE] = FALSE,
   2859     [D3DRS_ALPHABLENDENABLE] = FALSE,
   2860     [D3DRS_FOGENABLE] = FALSE,
   2861     [D3DRS_SPECULARENABLE] = FALSE,
   2862 /*  [D3DRS_ZVISIBLE] = 0, */
   2863     [D3DRS_FOGCOLOR] = 0,
   2864     [D3DRS_FOGTABLEMODE] = D3DFOG_NONE,
   2865     [D3DRS_FOGSTART] = 0x00000000,
   2866     [D3DRS_FOGEND] = 0x3F800000,
   2867     [D3DRS_FOGDENSITY] = 0x3F800000,
   2868 /*  [D3DRS_EDGEANTIALIAS] = FALSE, */
   2869     [D3DRS_RANGEFOGENABLE] = FALSE,
   2870     [D3DRS_STENCILENABLE] = FALSE,
   2871     [D3DRS_STENCILFAIL] = D3DSTENCILOP_KEEP,
   2872     [D3DRS_STENCILZFAIL] = D3DSTENCILOP_KEEP,
   2873     [D3DRS_STENCILPASS] = D3DSTENCILOP_KEEP,
   2874     [D3DRS_STENCILREF] = 0,
   2875     [D3DRS_STENCILMASK] = 0xFFFFFFFF,
   2876     [D3DRS_STENCILFUNC] = D3DCMP_ALWAYS,
   2877     [D3DRS_STENCILWRITEMASK] = 0xFFFFFFFF,
   2878     [D3DRS_TEXTUREFACTOR] = 0xFFFFFFFF,
   2879     [D3DRS_WRAP0] = 0,
   2880     [D3DRS_WRAP1] = 0,
   2881     [D3DRS_WRAP2] = 0,
   2882     [D3DRS_WRAP3] = 0,
   2883     [D3DRS_WRAP4] = 0,
   2884     [D3DRS_WRAP5] = 0,
   2885     [D3DRS_WRAP6] = 0,
   2886     [D3DRS_WRAP7] = 0,
   2887     [D3DRS_CLIPPING] = TRUE,
   2888     [D3DRS_LIGHTING] = TRUE,
   2889     [D3DRS_AMBIENT] = 0,
   2890     [D3DRS_FOGVERTEXMODE] = D3DFOG_NONE,
   2891     [D3DRS_COLORVERTEX] = TRUE,
   2892     [D3DRS_LOCALVIEWER] = TRUE,
   2893     [D3DRS_NORMALIZENORMALS] = FALSE,
   2894     [D3DRS_DIFFUSEMATERIALSOURCE] = D3DMCS_COLOR1,
   2895     [D3DRS_SPECULARMATERIALSOURCE] = D3DMCS_COLOR2,
   2896     [D3DRS_AMBIENTMATERIALSOURCE] = D3DMCS_MATERIAL,
   2897     [D3DRS_EMISSIVEMATERIALSOURCE] = D3DMCS_MATERIAL,
   2898     [D3DRS_VERTEXBLEND] = D3DVBF_DISABLE,
   2899     [D3DRS_CLIPPLANEENABLE] = 0,
   2900 /*  [D3DRS_SOFTWAREVERTEXPROCESSING] = FALSE, */
   2901     [D3DRS_POINTSIZE] = 0x3F800000,
   2902     [D3DRS_POINTSIZE_MIN] = 0x3F800000,
   2903     [D3DRS_POINTSPRITEENABLE] = FALSE,
   2904     [D3DRS_POINTSCALEENABLE] = FALSE,
   2905     [D3DRS_POINTSCALE_A] = 0x3F800000,
   2906     [D3DRS_POINTSCALE_B] = 0x00000000,
   2907     [D3DRS_POINTSCALE_C] = 0x00000000,
   2908     [D3DRS_MULTISAMPLEANTIALIAS] = TRUE,
   2909     [D3DRS_MULTISAMPLEMASK] = 0xFFFFFFFF,
   2910     [D3DRS_PATCHEDGESTYLE] = D3DPATCHEDGE_DISCRETE,
   2911 /*  [D3DRS_PATCHSEGMENTS] = 0x3F800000, */
   2912     [D3DRS_DEBUGMONITORTOKEN] = 0xDEADCAFE,
   2913     [D3DRS_POINTSIZE_MAX] = 0x3F800000, /* depends on cap */
   2914     [D3DRS_INDEXEDVERTEXBLENDENABLE] = FALSE,
   2915     [D3DRS_COLORWRITEENABLE] = 0x0000000f,
   2916     [D3DRS_TWEENFACTOR] = 0x00000000,
   2917     [D3DRS_BLENDOP] = D3DBLENDOP_ADD,
   2918     [D3DRS_POSITIONDEGREE] = D3DDEGREE_CUBIC,
   2919     [D3DRS_NORMALDEGREE] = D3DDEGREE_LINEAR,
   2920     [D3DRS_SCISSORTESTENABLE] = FALSE,
   2921     [D3DRS_SLOPESCALEDEPTHBIAS] = 0,
   2922     [D3DRS_MINTESSELLATIONLEVEL] = 0x3F800000,
   2923     [D3DRS_MAXTESSELLATIONLEVEL] = 0x3F800000,
   2924     [D3DRS_ANTIALIASEDLINEENABLE] = FALSE,
   2925     [D3DRS_ADAPTIVETESS_X] = 0x00000000,
   2926     [D3DRS_ADAPTIVETESS_Y] = 0x00000000,
   2927     [D3DRS_ADAPTIVETESS_Z] = 0x3F800000,
   2928     [D3DRS_ADAPTIVETESS_W] = 0x00000000,
   2929     [D3DRS_ENABLEADAPTIVETESSELLATION] = FALSE,
   2930     [D3DRS_TWOSIDEDSTENCILMODE] = FALSE,
   2931     [D3DRS_CCW_STENCILFAIL] = D3DSTENCILOP_KEEP,
   2932     [D3DRS_CCW_STENCILZFAIL] = D3DSTENCILOP_KEEP,
   2933     [D3DRS_CCW_STENCILPASS] = D3DSTENCILOP_KEEP,
   2934     [D3DRS_CCW_STENCILFUNC] = D3DCMP_ALWAYS,
   2935     [D3DRS_COLORWRITEENABLE1] = 0x0000000F,
   2936     [D3DRS_COLORWRITEENABLE2] = 0x0000000F,
   2937     [D3DRS_COLORWRITEENABLE3] = 0x0000000F,
   2938     [D3DRS_BLENDFACTOR] = 0xFFFFFFFF,
   2939     [D3DRS_SRGBWRITEENABLE] = 0,
   2940     [D3DRS_DEPTHBIAS] = 0,
   2941     [D3DRS_WRAP8] = 0,
   2942     [D3DRS_WRAP9] = 0,
   2943     [D3DRS_WRAP10] = 0,
   2944     [D3DRS_WRAP11] = 0,
   2945     [D3DRS_WRAP12] = 0,
   2946     [D3DRS_WRAP13] = 0,
   2947     [D3DRS_WRAP14] = 0,
   2948     [D3DRS_WRAP15] = 0,
   2949     [D3DRS_SEPARATEALPHABLENDENABLE] = FALSE,
   2950     [D3DRS_SRCBLENDALPHA] = D3DBLEND_ONE,
   2951     [D3DRS_DESTBLENDALPHA] = D3DBLEND_ZERO,
   2952     [D3DRS_BLENDOPALPHA] = D3DBLENDOP_ADD,
   2953     [NINED3DRS_VSPOINTSIZE] = FALSE,
   2954     [NINED3DRS_RTMASK] = 0xf,
   2955     [NINED3DRS_ALPHACOVERAGE] = FALSE,
   2956     [NINED3DRS_MULTISAMPLE] = FALSE
   2957 };
   2958 static const DWORD nine_tex_stage_state_defaults[NINED3DTSS_LAST + 1] =
   2959 {
   2960     [D3DTSS_COLOROP] = D3DTOP_DISABLE,
   2961     [D3DTSS_ALPHAOP] = D3DTOP_DISABLE,
   2962     [D3DTSS_COLORARG1] = D3DTA_TEXTURE,
   2963     [D3DTSS_COLORARG2] = D3DTA_CURRENT,
   2964     [D3DTSS_COLORARG0] = D3DTA_CURRENT,
   2965     [D3DTSS_ALPHAARG1] = D3DTA_TEXTURE,
   2966     [D3DTSS_ALPHAARG2] = D3DTA_CURRENT,
   2967     [D3DTSS_ALPHAARG0] = D3DTA_CURRENT,
   2968     [D3DTSS_RESULTARG] = D3DTA_CURRENT,
   2969     [D3DTSS_BUMPENVMAT00] = 0,
   2970     [D3DTSS_BUMPENVMAT01] = 0,
   2971     [D3DTSS_BUMPENVMAT10] = 0,
   2972     [D3DTSS_BUMPENVMAT11] = 0,
   2973     [D3DTSS_BUMPENVLSCALE] = 0,
   2974     [D3DTSS_BUMPENVLOFFSET] = 0,
   2975     [D3DTSS_TEXCOORDINDEX] = 0,
   2976     [D3DTSS_TEXTURETRANSFORMFLAGS] = D3DTTFF_DISABLE,
   2977 };
   2978 static const DWORD nine_samp_state_defaults[NINED3DSAMP_LAST + 1] =
   2979 {
   2980     [D3DSAMP_ADDRESSU] = D3DTADDRESS_WRAP,
   2981     [D3DSAMP_ADDRESSV] = D3DTADDRESS_WRAP,
   2982     [D3DSAMP_ADDRESSW] = D3DTADDRESS_WRAP,
   2983     [D3DSAMP_BORDERCOLOR] = 0,
   2984     [D3DSAMP_MAGFILTER] = D3DTEXF_POINT,
   2985     [D3DSAMP_MINFILTER] = D3DTEXF_POINT,
   2986     [D3DSAMP_MIPFILTER] = D3DTEXF_NONE,
   2987     [D3DSAMP_MIPMAPLODBIAS] = 0,
   2988     [D3DSAMP_MAXMIPLEVEL] = 0,
   2989     [D3DSAMP_MAXANISOTROPY] = 1,
   2990     [D3DSAMP_SRGBTEXTURE] = 0,
   2991     [D3DSAMP_ELEMENTINDEX] = 0,
   2992     [D3DSAMP_DMAPOFFSET] = 0,
   2993     [NINED3DSAMP_MINLOD] = 0,
   2994     [NINED3DSAMP_SHADOW] = 0,
   2995     [NINED3DSAMP_CUBETEX] = 0
   2996 };
   2997 
   2998 /* Note: The following 4 functions assume there is no
   2999  * pending commands */
   3000 
   3001 void nine_state_restore_non_cso(struct NineDevice9 *device)
   3002 {
   3003     struct nine_context *context = &device->context;
   3004 
   3005     context->changed.group = NINE_STATE_ALL;
   3006     context->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
   3007     context->changed.ucp = TRUE;
   3008     context->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS;
   3009 }
   3010 
   3011 void
   3012 nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
   3013                         boolean is_reset)
   3014 {
   3015     struct nine_state *state = &device->state;
   3016     struct nine_context *context = &device->context;
   3017     unsigned s;
   3018 
   3019     /* Initialize defaults.
   3020      */
   3021     memcpy(context->rs, nine_render_state_defaults, sizeof(context->rs));
   3022 
   3023     for (s = 0; s < ARRAY_SIZE(state->ff.tex_stage); ++s) {
   3024         memcpy(&state->ff.tex_stage[s], nine_tex_stage_state_defaults,
   3025                sizeof(state->ff.tex_stage[s]));
   3026         state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] = s;
   3027     }
   3028     state->ff.tex_stage[0][D3DTSS_COLOROP] = D3DTOP_MODULATE;
   3029     state->ff.tex_stage[0][D3DTSS_ALPHAOP] = D3DTOP_SELECTARG1;
   3030 
   3031     for (s = 0; s < ARRAY_SIZE(state->ff.tex_stage); ++s)
   3032         memcpy(&context->ff.tex_stage[s], state->ff.tex_stage[s],
   3033                sizeof(state->ff.tex_stage[s]));
   3034 
   3035     memset(&context->bumpmap_vars, 0, sizeof(context->bumpmap_vars));
   3036 
   3037     for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
   3038         memcpy(&context->samp[s], nine_samp_state_defaults,
   3039                sizeof(context->samp[s]));
   3040         memcpy(&state->samp_advertised[s], nine_samp_state_defaults,
   3041                sizeof(state->samp_advertised[s]));
   3042     }
   3043 
   3044     memset(state->vs_const_f, 0, VS_CONST_F_SIZE(device));
   3045     memset(context->vs_const_f, 0, device->vs_const_size);
   3046     if (context->vs_const_f_swvp)
   3047         memset(context->vs_const_f_swvp, 0, NINE_MAX_CONST_F_SWVP * sizeof(float[4]));
   3048     memset(state->vs_const_i, 0, VS_CONST_I_SIZE(device));
   3049     memset(context->vs_const_i, 0, VS_CONST_I_SIZE(device));
   3050     memset(state->vs_const_b, 0, VS_CONST_B_SIZE(device));
   3051     memset(context->vs_const_b, 0, VS_CONST_B_SIZE(device));
   3052     memset(state->ps_const_f, 0, device->ps_const_size);
   3053     memset(context->ps_const_f, 0, device->ps_const_size);
   3054     memset(state->ps_const_i, 0, sizeof(state->ps_const_i));
   3055     memset(context->ps_const_i, 0, sizeof(context->ps_const_i));
   3056     memset(state->ps_const_b, 0, sizeof(state->ps_const_b));
   3057     memset(context->ps_const_b, 0, sizeof(context->ps_const_b));
   3058 
   3059     /* Cap dependent initial state:
   3060      */
   3061     context->rs[D3DRS_POINTSIZE_MAX] = fui(caps->MaxPointSize);
   3062 
   3063     memcpy(state->rs_advertised, context->rs, sizeof(context->rs));
   3064 
   3065     /* Set changed flags to initialize driver.
   3066      */
   3067     context->changed.group = NINE_STATE_ALL;
   3068     context->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
   3069     context->changed.ucp = TRUE;
   3070 
   3071     context->ff.changed.transform[0] = ~0;
   3072     context->ff.changed.transform[D3DTS_WORLD / 32] |= 1 << (D3DTS_WORLD % 32);
   3073 
   3074     if (!is_reset) {
   3075         state->viewport.MinZ = context->viewport.MinZ = 0.0f;
   3076         state->viewport.MaxZ = context->viewport.MaxZ = 1.0f;
   3077     }
   3078 
   3079     for (s = 0; s < NINE_MAX_SAMPLERS; ++s)
   3080         context->changed.sampler[s] = ~0;
   3081 
   3082     if (!is_reset) {
   3083         context->dummy_vbo_bound_at = -1;
   3084         context->vbo_bound_done = FALSE;
   3085     }
   3086 }
   3087 
   3088 void
   3089 nine_state_clear(struct nine_state *state, const boolean device)
   3090 {
   3091     unsigned i;
   3092 
   3093     for (i = 0; i < ARRAY_SIZE(state->rt); ++i)
   3094        nine_bind(&state->rt[i], NULL);
   3095     nine_bind(&state->ds, NULL);
   3096     nine_bind(&state->vs, NULL);
   3097     nine_bind(&state->ps, NULL);
   3098     nine_bind(&state->vdecl, NULL);
   3099     for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
   3100         nine_bind(&state->stream[i], NULL);
   3101 
   3102     nine_bind(&state->idxbuf, NULL);
   3103     for (i = 0; i < NINE_MAX_SAMPLERS; ++i) {
   3104         if (device &&
   3105             state->texture[i] &&
   3106           --state->texture[i]->bind_count == 0)
   3107             list_delinit(&state->texture[i]->list);
   3108         nine_bind(&state->texture[i], NULL);
   3109     }
   3110 }
   3111 
   3112 void
   3113 nine_context_clear(struct NineDevice9 *device)
   3114 {
   3115     struct nine_context *context = &device->context;
   3116     struct pipe_context *pipe = context->pipe;
   3117     struct cso_context *cso = context->cso;
   3118     unsigned i;
   3119 
   3120     /* Early device ctor failure. Nothing to do */
   3121     if (!pipe || !cso)
   3122         return;
   3123 
   3124     pipe->bind_vs_state(pipe, NULL);
   3125     pipe->bind_fs_state(pipe, NULL);
   3126 
   3127     /* Don't unbind constant buffers, they're device-private and
   3128      * do not change on Reset.
   3129      */
   3130 
   3131     cso_set_samplers(cso, PIPE_SHADER_VERTEX, 0, NULL);
   3132     cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 0, NULL);
   3133 
   3134     cso_set_sampler_views(cso, PIPE_SHADER_VERTEX, 0, NULL);
   3135     cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 0, NULL);
   3136 
   3137     pipe->set_vertex_buffers(pipe, 0, device->caps.MaxStreams, NULL);
   3138     pipe->set_index_buffer(pipe, NULL);
   3139 
   3140     for (i = 0; i < ARRAY_SIZE(context->rt); ++i)
   3141        nine_bind(&context->rt[i], NULL);
   3142     nine_bind(&context->ds, NULL);
   3143     nine_bind(&context->vs, NULL);
   3144     nine_bind(&context->ps, NULL);
   3145     nine_bind(&context->vdecl, NULL);
   3146     for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
   3147         pipe_resource_reference(&context->vtxbuf[i].buffer, NULL);
   3148     pipe_resource_reference(&context->idxbuf.buffer, NULL);
   3149 
   3150     for (i = 0; i < NINE_MAX_SAMPLERS; ++i) {
   3151         context->texture[i].enabled = FALSE;
   3152         pipe_resource_reference(&context->texture[i].resource,
   3153                                 NULL);
   3154         pipe_sampler_view_reference(&context->texture[i].view[0],
   3155                                     NULL);
   3156         pipe_sampler_view_reference(&context->texture[i].view[1],
   3157                                     NULL);
   3158     }
   3159 }
   3160 
   3161 void
   3162 nine_state_init_sw(struct NineDevice9 *device)
   3163 {
   3164     struct pipe_context *pipe_sw = device->pipe_sw;
   3165     struct pipe_rasterizer_state rast;
   3166     struct pipe_blend_state blend;
   3167     struct pipe_depth_stencil_alpha_state dsa;
   3168     struct pipe_framebuffer_state fb;
   3169 
   3170     /* Only used with Streamout */
   3171     memset(&rast, 0, sizeof(rast));
   3172     rast.rasterizer_discard = true;
   3173     rast.point_quad_rasterization = 1; /* to make llvmpipe happy */
   3174     cso_set_rasterizer(device->cso_sw, &rast);
   3175 
   3176     /* dummy settings */
   3177     memset(&blend, 0, sizeof(blend));
   3178     memset(&dsa, 0, sizeof(dsa));
   3179     memset(&fb, 0, sizeof(fb));
   3180     cso_set_blend(device->cso_sw, &blend);
   3181     cso_set_depth_stencil_alpha(device->cso_sw, &dsa);
   3182     cso_set_framebuffer(device->cso_sw, &fb);
   3183     cso_set_viewport_dims(device->cso_sw, 1.0, 1.0, false);
   3184     cso_set_fragment_shader_handle(device->cso_sw, util_make_empty_fragment_shader(pipe_sw));
   3185 }
   3186 
   3187 /* There is duplication with update_vertex_elements.
   3188  * TODO: Share the code */
   3189 
   3190 static void
   3191 update_vertex_elements_sw(struct NineDevice9 *device)
   3192 {
   3193     struct nine_state *state = &device->state;
   3194     const struct NineVertexDeclaration9 *vdecl = device->state.vdecl;
   3195     const struct NineVertexShader9 *vs;
   3196     unsigned n, b, i;
   3197     int index;
   3198     char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
   3199     char used_streams[device->caps.MaxStreams];
   3200     int dummy_vbo_stream = -1;
   3201     BOOL need_dummy_vbo = FALSE;
   3202     struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
   3203     bool programmable_vs = state->vs && !(state->vdecl && state->vdecl->position_t);
   3204 
   3205     memset(vdecl_index_map, -1, 16);
   3206     memset(used_streams, 0, device->caps.MaxStreams);
   3207     vs = programmable_vs ? device->state.vs : device->ff.vs;
   3208 
   3209     if (vdecl) {
   3210         for (n = 0; n < vs->num_inputs; ++n) {
   3211             DBG("looking up input %u (usage %u) from vdecl(%p)\n",
   3212                 n, vs->input_map[n].ndecl, vdecl);
   3213 
   3214             for (i = 0; i < vdecl->nelems; i++) {
   3215                 if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
   3216                     vdecl_index_map[n] = i;
   3217                     used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
   3218                     break;
   3219                 }
   3220             }
   3221             if (vdecl_index_map[n] < 0)
   3222                 need_dummy_vbo = TRUE;
   3223         }
   3224     } else {
   3225         /* No vertex declaration. Likely will never happen in practice,
   3226          * but we need not crash on this */
   3227         need_dummy_vbo = TRUE;
   3228     }
   3229 
   3230     if (need_dummy_vbo) {
   3231         for (i = 0; i < device->caps.MaxStreams; i++ ) {
   3232             if (!used_streams[i]) {
   3233                 dummy_vbo_stream = i;
   3234                 break;
   3235             }
   3236         }
   3237     }
   3238     /* TODO handle dummy_vbo */
   3239     assert (!need_dummy_vbo);
   3240 
   3241     for (n = 0; n < vs->num_inputs; ++n) {
   3242         index = vdecl_index_map[n];
   3243         if (index >= 0) {
   3244             ve[n] = vdecl->elems[index];
   3245             b = ve[n].vertex_buffer_index;
   3246             /* XXX wine just uses 1 here: */
   3247             if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
   3248                 ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
   3249         } else {
   3250             /* if the vertex declaration is incomplete compared to what the
   3251              * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
   3252              * This is not precised by the spec, but is the behaviour
   3253              * tested on win */
   3254             ve[n].vertex_buffer_index = dummy_vbo_stream;
   3255             ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
   3256             ve[n].src_offset = 0;
   3257             ve[n].instance_divisor = 0;
   3258         }
   3259     }
   3260 
   3261     cso_set_vertex_elements(device->cso_sw, vs->num_inputs, ve);
   3262 }
   3263 
   3264 static void
   3265 update_vertex_buffers_sw(struct NineDevice9 *device, int start_vertice, int num_vertices)
   3266 {
   3267     struct pipe_context *pipe = nine_context_get_pipe_acquire(device);
   3268     struct pipe_context *pipe_sw = device->pipe_sw;
   3269     struct nine_state *state = &device->state;
   3270     struct nine_state_sw_internal *sw_internal = &device->state_sw_internal;
   3271     struct pipe_vertex_buffer vtxbuf;
   3272     uint32_t mask = 0xf;
   3273     unsigned i;
   3274 
   3275     DBG("mask=%x\n", mask);
   3276 
   3277     /* TODO: handle dummy_vbo_bound_at */
   3278 
   3279     for (i = 0; mask; mask >>= 1, ++i) {
   3280         if (mask & 1) {
   3281             if (state->stream[i]) {
   3282                 unsigned offset;
   3283                 struct pipe_resource *buf;
   3284                 struct pipe_box box;
   3285 
   3286                 vtxbuf = state->vtxbuf[i];
   3287                 vtxbuf.buffer = NineVertexBuffer9_GetResource(state->stream[i], &offset);
   3288 
   3289                 DBG("Locking %p (offset %d, length %d)\n", vtxbuf.buffer,
   3290                     vtxbuf.buffer_offset, num_vertices * vtxbuf.stride);
   3291 
   3292                 u_box_1d(vtxbuf.buffer_offset + offset + start_vertice * vtxbuf.stride,
   3293                          num_vertices * vtxbuf.stride, &box);
   3294                 buf = vtxbuf.buffer;
   3295                 vtxbuf.user_buffer = pipe->transfer_map(pipe, buf, 0, PIPE_TRANSFER_READ, &box,
   3296                                                         &(sw_internal->transfers_so[i]));
   3297                 vtxbuf.buffer = NULL;
   3298                 if (!device->driver_caps.user_sw_vbufs) {
   3299                     u_upload_data(device->vertex_sw_uploader,
   3300                                   0,
   3301                                   box.width,
   3302                                   16,
   3303                                   vtxbuf.user_buffer,
   3304                                   &(vtxbuf.buffer_offset),
   3305                                   &(vtxbuf.buffer));
   3306                     u_upload_unmap(device->vertex_sw_uploader);
   3307                     vtxbuf.user_buffer = NULL;
   3308                 }
   3309                 pipe_sw->set_vertex_buffers(pipe_sw, i, 1, &vtxbuf);
   3310                 if (vtxbuf.buffer)
   3311                     pipe_resource_reference(&vtxbuf.buffer, NULL);
   3312             } else
   3313                 pipe_sw->set_vertex_buffers(pipe_sw, i, 1, NULL);
   3314         }
   3315     }
   3316     nine_context_get_pipe_release(device);
   3317 }
   3318 
   3319 static void
   3320 update_vs_constants_sw(struct NineDevice9 *device)
   3321 {
   3322     struct nine_state *state = &device->state;
   3323     struct pipe_context *pipe_sw = device->pipe_sw;
   3324 
   3325     DBG("updating\n");
   3326 
   3327     {
   3328         struct pipe_constant_buffer cb;
   3329         const void *buf;
   3330 
   3331         cb.buffer = NULL;
   3332         cb.buffer_offset = 0;
   3333         cb.buffer_size = 4096 * sizeof(float[4]);
   3334         cb.user_buffer = state->vs_const_f;
   3335 
   3336         if (state->vs->lconstf.ranges) {
   3337             const struct nine_lconstf *lconstf =  &device->state.vs->lconstf;
   3338             const struct nine_range *r = lconstf->ranges;
   3339             unsigned n = 0;
   3340             float *dst = device->state.vs_lconstf_temp;
   3341             float *src = (float *)cb.user_buffer;
   3342             memcpy(dst, src, 8192 * sizeof(float[4]));
   3343             while (r) {
   3344                 unsigned p = r->bgn;
   3345                 unsigned c = r->end - r->bgn;
   3346                 memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
   3347                 n += c;
   3348                 r = r->next;
   3349             }
   3350             cb.user_buffer = dst;
   3351         }
   3352 
   3353         buf = cb.user_buffer;
   3354         if (!device->driver_caps.user_sw_cbufs) {
   3355             u_upload_data(device->constbuf_sw_uploader,
   3356                           0,
   3357                           cb.buffer_size,
   3358                           16,
   3359                           cb.user_buffer,
   3360                           &(cb.buffer_offset),
   3361                           &(cb.buffer));
   3362             u_upload_unmap(device->constbuf_sw_uploader);
   3363             cb.user_buffer = NULL;
   3364         }
   3365 
   3366         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 0, &cb);
   3367         if (cb.buffer)
   3368             pipe_resource_reference(&cb.buffer, NULL);
   3369 
   3370         cb.user_buffer = (char *)buf + 4096 * sizeof(float[4]);
   3371         if (!device->driver_caps.user_sw_cbufs) {
   3372             u_upload_data(device->constbuf_sw_uploader,
   3373                           0,
   3374                           cb.buffer_size,
   3375                           16,
   3376                           cb.user_buffer,
   3377                           &(cb.buffer_offset),
   3378                           &(cb.buffer));
   3379             u_upload_unmap(device->constbuf_sw_uploader);
   3380             cb.user_buffer = NULL;
   3381         }
   3382 
   3383         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 1, &cb);
   3384         if (cb.buffer)
   3385             pipe_resource_reference(&cb.buffer, NULL);
   3386     }
   3387 
   3388     {
   3389         struct pipe_constant_buffer cb;
   3390 
   3391         cb.buffer = NULL;
   3392         cb.buffer_offset = 0;
   3393         cb.buffer_size = 2048 * sizeof(float[4]);
   3394         cb.user_buffer = state->vs_const_i;
   3395 
   3396         if (!device->driver_caps.user_sw_cbufs) {
   3397             u_upload_data(device->constbuf_sw_uploader,
   3398                           0,
   3399                           cb.buffer_size,
   3400                           16,
   3401                           cb.user_buffer,
   3402                           &(cb.buffer_offset),
   3403                           &(cb.buffer));
   3404             u_upload_unmap(device->constbuf_sw_uploader);
   3405             cb.user_buffer = NULL;
   3406         }
   3407 
   3408         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 2, &cb);
   3409         if (cb.buffer)
   3410             pipe_resource_reference(&cb.buffer, NULL);
   3411     }
   3412 
   3413     {
   3414         struct pipe_constant_buffer cb;
   3415 
   3416         cb.buffer = NULL;
   3417         cb.buffer_offset = 0;
   3418         cb.buffer_size = 512 * sizeof(float[4]);
   3419         cb.user_buffer = state->vs_const_b;
   3420 
   3421         if (!device->driver_caps.user_sw_cbufs) {
   3422             u_upload_data(device->constbuf_sw_uploader,
   3423                           0,
   3424                           cb.buffer_size,
   3425                           16,
   3426                           cb.user_buffer,
   3427                           &(cb.buffer_offset),
   3428                           &(cb.buffer));
   3429             u_upload_unmap(device->constbuf_sw_uploader);
   3430             cb.user_buffer = NULL;
   3431         }
   3432 
   3433         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 3, &cb);
   3434         if (cb.buffer)
   3435             pipe_resource_reference(&cb.buffer, NULL);
   3436     }
   3437 
   3438     {
   3439         struct pipe_constant_buffer cb;
   3440         const D3DVIEWPORT9 *vport = &device->state.viewport;
   3441         float viewport_data[8] = {(float)vport->Width * 0.5f,
   3442             (float)vport->Height * -0.5f, vport->MaxZ - vport->MinZ, 0.f,
   3443             (float)vport->Width * 0.5f + (float)vport->X,
   3444             (float)vport->Height * 0.5f + (float)vport->Y,
   3445             vport->MinZ, 0.f};
   3446 
   3447         cb.buffer = NULL;
   3448         cb.buffer_offset = 0;
   3449         cb.buffer_size = 2 * sizeof(float[4]);
   3450         cb.user_buffer = viewport_data;
   3451 
   3452         {
   3453             u_upload_data(device->constbuf_sw_uploader,
   3454                           0,
   3455                           cb.buffer_size,
   3456                           16,
   3457                           cb.user_buffer,
   3458                           &(cb.buffer_offset),
   3459                           &(cb.buffer));
   3460             u_upload_unmap(device->constbuf_sw_uploader);
   3461             cb.user_buffer = NULL;
   3462         }
   3463 
   3464         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 4, &cb);
   3465         if (cb.buffer)
   3466             pipe_resource_reference(&cb.buffer, NULL);
   3467     }
   3468 
   3469 }
   3470 
   3471 void
   3472 nine_state_prepare_draw_sw(struct NineDevice9 *device, struct NineVertexDeclaration9 *vdecl_out,
   3473                            int start_vertice, int num_vertices, struct pipe_stream_output_info *so)
   3474 {
   3475     struct nine_state *state = &device->state;
   3476     bool programmable_vs = state->vs && !(state->vdecl && state->vdecl->position_t);
   3477     struct NineVertexShader9 *vs = programmable_vs ? device->state.vs : device->ff.vs;
   3478 
   3479     assert(programmable_vs);
   3480 
   3481     DBG("Preparing draw\n");
   3482     cso_set_vertex_shader_handle(device->cso_sw,
   3483                                  NineVertexShader9_GetVariantProcessVertices(vs, vdecl_out, so));
   3484     update_vertex_elements_sw(device);
   3485     update_vertex_buffers_sw(device, start_vertice, num_vertices);
   3486     update_vs_constants_sw(device);
   3487     DBG("Preparation succeeded\n");
   3488 }
   3489 
   3490 void
   3491 nine_state_after_draw_sw(struct NineDevice9 *device)
   3492 {
   3493     struct nine_state_sw_internal *sw_internal = &device->state_sw_internal;
   3494     struct pipe_context *pipe = nine_context_get_pipe_acquire(device);
   3495     struct pipe_context *pipe_sw = device->pipe_sw;
   3496     int i;
   3497 
   3498     for (i = 0; i < 4; i++) {
   3499         pipe_sw->set_vertex_buffers(pipe_sw, i, 1, NULL);
   3500         if (sw_internal->transfers_so[i])
   3501             pipe->transfer_unmap(pipe, sw_internal->transfers_so[i]);
   3502         sw_internal->transfers_so[i] = NULL;
   3503     }
   3504     nine_context_get_pipe_release(device);
   3505 }
   3506 
   3507 void
   3508 nine_state_destroy_sw(struct NineDevice9 *device)
   3509 {
   3510     (void) device;
   3511     /* Everything destroyed with cso */
   3512 }
   3513 
   3514 /*
   3515 static const DWORD nine_render_states_pixel[] =
   3516 {
   3517     D3DRS_ALPHABLENDENABLE,
   3518     D3DRS_ALPHAFUNC,
   3519     D3DRS_ALPHAREF,
   3520     D3DRS_ALPHATESTENABLE,
   3521     D3DRS_ANTIALIASEDLINEENABLE,
   3522     D3DRS_BLENDFACTOR,
   3523     D3DRS_BLENDOP,
   3524     D3DRS_BLENDOPALPHA,
   3525     D3DRS_CCW_STENCILFAIL,
   3526     D3DRS_CCW_STENCILPASS,
   3527     D3DRS_CCW_STENCILZFAIL,
   3528     D3DRS_COLORWRITEENABLE,
   3529     D3DRS_COLORWRITEENABLE1,
   3530     D3DRS_COLORWRITEENABLE2,
   3531     D3DRS_COLORWRITEENABLE3,
   3532     D3DRS_DEPTHBIAS,
   3533     D3DRS_DESTBLEND,
   3534     D3DRS_DESTBLENDALPHA,
   3535     D3DRS_DITHERENABLE,
   3536     D3DRS_FILLMODE,
   3537     D3DRS_FOGDENSITY,
   3538     D3DRS_FOGEND,
   3539     D3DRS_FOGSTART,
   3540     D3DRS_LASTPIXEL,
   3541     D3DRS_SCISSORTESTENABLE,
   3542     D3DRS_SEPARATEALPHABLENDENABLE,
   3543     D3DRS_SHADEMODE,
   3544     D3DRS_SLOPESCALEDEPTHBIAS,
   3545     D3DRS_SRCBLEND,
   3546     D3DRS_SRCBLENDALPHA,
   3547     D3DRS_SRGBWRITEENABLE,
   3548     D3DRS_STENCILENABLE,
   3549     D3DRS_STENCILFAIL,
   3550     D3DRS_STENCILFUNC,
   3551     D3DRS_STENCILMASK,
   3552     D3DRS_STENCILPASS,
   3553     D3DRS_STENCILREF,
   3554     D3DRS_STENCILWRITEMASK,
   3555     D3DRS_STENCILZFAIL,
   3556     D3DRS_TEXTUREFACTOR,
   3557     D3DRS_TWOSIDEDSTENCILMODE,
   3558     D3DRS_WRAP0,
   3559     D3DRS_WRAP1,
   3560     D3DRS_WRAP10,
   3561     D3DRS_WRAP11,
   3562     D3DRS_WRAP12,
   3563     D3DRS_WRAP13,
   3564     D3DRS_WRAP14,
   3565     D3DRS_WRAP15,
   3566     D3DRS_WRAP2,
   3567     D3DRS_WRAP3,
   3568     D3DRS_WRAP4,
   3569     D3DRS_WRAP5,
   3570     D3DRS_WRAP6,
   3571     D3DRS_WRAP7,
   3572     D3DRS_WRAP8,
   3573     D3DRS_WRAP9,
   3574     D3DRS_ZENABLE,
   3575     D3DRS_ZFUNC,
   3576     D3DRS_ZWRITEENABLE
   3577 };
   3578 */
   3579 const uint32_t nine_render_states_pixel[(NINED3DRS_LAST + 31) / 32] =
   3580 {
   3581     0x0f99c380, 0x1ff00070, 0x00000000, 0x00000000,
   3582     0x000000ff, 0xde01c900, 0x0003ffcf
   3583 };
   3584 
   3585 /*
   3586 static const DWORD nine_render_states_vertex[] =
   3587 {
   3588     D3DRS_ADAPTIVETESS_W,
   3589     D3DRS_ADAPTIVETESS_X,
   3590     D3DRS_ADAPTIVETESS_Y,
   3591     D3DRS_ADAPTIVETESS_Z,
   3592     D3DRS_AMBIENT,
   3593     D3DRS_AMBIENTMATERIALSOURCE,
   3594     D3DRS_CLIPPING,
   3595     D3DRS_CLIPPLANEENABLE,
   3596     D3DRS_COLORVERTEX,
   3597     D3DRS_CULLMODE,
   3598     D3DRS_DIFFUSEMATERIALSOURCE,
   3599     D3DRS_EMISSIVEMATERIALSOURCE,
   3600     D3DRS_ENABLEADAPTIVETESSELLATION,
   3601     D3DRS_FOGCOLOR,
   3602     D3DRS_FOGDENSITY,
   3603     D3DRS_FOGENABLE,
   3604     D3DRS_FOGEND,
   3605     D3DRS_FOGSTART,
   3606     D3DRS_FOGTABLEMODE,
   3607     D3DRS_FOGVERTEXMODE,
   3608     D3DRS_INDEXEDVERTEXBLENDENABLE,
   3609     D3DRS_LIGHTING,
   3610     D3DRS_LOCALVIEWER,
   3611     D3DRS_MAXTESSELLATIONLEVEL,
   3612     D3DRS_MINTESSELLATIONLEVEL,
   3613     D3DRS_MULTISAMPLEANTIALIAS,
   3614     D3DRS_MULTISAMPLEMASK,
   3615     D3DRS_NORMALDEGREE,
   3616     D3DRS_NORMALIZENORMALS,
   3617     D3DRS_PATCHEDGESTYLE,
   3618     D3DRS_POINTSCALE_A,
   3619     D3DRS_POINTSCALE_B,
   3620     D3DRS_POINTSCALE_C,
   3621     D3DRS_POINTSCALEENABLE,
   3622     D3DRS_POINTSIZE,
   3623     D3DRS_POINTSIZE_MAX,
   3624     D3DRS_POINTSIZE_MIN,
   3625     D3DRS_POINTSPRITEENABLE,
   3626     D3DRS_POSITIONDEGREE,
   3627     D3DRS_RANGEFOGENABLE,
   3628     D3DRS_SHADEMODE,
   3629     D3DRS_SPECULARENABLE,
   3630     D3DRS_SPECULARMATERIALSOURCE,
   3631     D3DRS_TWEENFACTOR,
   3632     D3DRS_VERTEXBLEND
   3633 };
   3634 */
   3635 const uint32_t nine_render_states_vertex[(NINED3DRS_LAST + 31) / 32] =
   3636 {
   3637     0x30400200, 0x0001007c, 0x00000000, 0x00000000,
   3638     0xfd9efb00, 0x01fc34cf, 0x00000000
   3639 };
   3640 
   3641 /* TODO: put in the right values */
   3642 const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] =
   3643 {
   3644     [D3DRS_ZENABLE] = NINE_STATE_DSA | NINE_STATE_MULTISAMPLE,
   3645     [D3DRS_FILLMODE] = NINE_STATE_RASTERIZER,
   3646     [D3DRS_SHADEMODE] = NINE_STATE_RASTERIZER,
   3647     [D3DRS_ZWRITEENABLE] = NINE_STATE_DSA,
   3648     [D3DRS_ALPHATESTENABLE] = NINE_STATE_DSA,
   3649     [D3DRS_LASTPIXEL] = NINE_STATE_RASTERIZER,
   3650     [D3DRS_SRCBLEND] = NINE_STATE_BLEND,
   3651     [D3DRS_DESTBLEND] = NINE_STATE_BLEND,
   3652     [D3DRS_CULLMODE] = NINE_STATE_RASTERIZER,
   3653     [D3DRS_ZFUNC] = NINE_STATE_DSA,
   3654     [D3DRS_ALPHAREF] = NINE_STATE_DSA,
   3655     [D3DRS_ALPHAFUNC] = NINE_STATE_DSA,
   3656     [D3DRS_DITHERENABLE] = NINE_STATE_BLEND,
   3657     [D3DRS_ALPHABLENDENABLE] = NINE_STATE_BLEND,
   3658     [D3DRS_FOGENABLE] = NINE_STATE_FF_OTHER | NINE_STATE_FOG_SHADER | NINE_STATE_PS_CONST,
   3659     [D3DRS_SPECULARENABLE] = NINE_STATE_FF_LIGHTING,
   3660     [D3DRS_FOGCOLOR] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST,
   3661     [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_OTHER | NINE_STATE_FOG_SHADER | NINE_STATE_PS_CONST,
   3662     [D3DRS_FOGSTART] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST,
   3663     [D3DRS_FOGEND] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST,
   3664     [D3DRS_FOGDENSITY] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST,
   3665     [D3DRS_RANGEFOGENABLE] = NINE_STATE_FF_OTHER,
   3666     [D3DRS_STENCILENABLE] = NINE_STATE_DSA | NINE_STATE_MULTISAMPLE,
   3667     [D3DRS_STENCILFAIL] = NINE_STATE_DSA,
   3668     [D3DRS_STENCILZFAIL] = NINE_STATE_DSA,
   3669     [D3DRS_STENCILPASS] = NINE_STATE_DSA,
   3670     [D3DRS_STENCILFUNC] = NINE_STATE_DSA,
   3671     [D3DRS_STENCILREF] = NINE_STATE_STENCIL_REF,
   3672     [D3DRS_STENCILMASK] = NINE_STATE_DSA,
   3673     [D3DRS_STENCILWRITEMASK] = NINE_STATE_DSA,
   3674     [D3DRS_TEXTUREFACTOR] = NINE_STATE_FF_PSSTAGES,
   3675     [D3DRS_WRAP0] = NINE_STATE_UNHANDLED, /* cylindrical wrap is crazy */
   3676     [D3DRS_WRAP1] = NINE_STATE_UNHANDLED,
   3677     [D3DRS_WRAP2] = NINE_STATE_UNHANDLED,
   3678     [D3DRS_WRAP3] = NINE_STATE_UNHANDLED,
   3679     [D3DRS_WRAP4] = NINE_STATE_UNHANDLED,
   3680     [D3DRS_WRAP5] = NINE_STATE_UNHANDLED,
   3681     [D3DRS_WRAP6] = NINE_STATE_UNHANDLED,
   3682     [D3DRS_WRAP7] = NINE_STATE_UNHANDLED,
   3683     [D3DRS_CLIPPING] = 0, /* software vertex processing only */
   3684     [D3DRS_LIGHTING] = NINE_STATE_FF_LIGHTING,
   3685     [D3DRS_AMBIENT] = NINE_STATE_FF_LIGHTING | NINE_STATE_FF_MATERIAL,
   3686     [D3DRS_FOGVERTEXMODE] = NINE_STATE_FF_OTHER,
   3687     [D3DRS_COLORVERTEX] = NINE_STATE_FF_LIGHTING,
   3688     [D3DRS_LOCALVIEWER] = NINE_STATE_FF_LIGHTING,
   3689     [D3DRS_NORMALIZENORMALS] = NINE_STATE_FF_OTHER,
   3690     [D3DRS_DIFFUSEMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
   3691     [D3DRS_SPECULARMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
   3692     [D3DRS_AMBIENTMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
   3693     [D3DRS_EMISSIVEMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
   3694     [D3DRS_VERTEXBLEND] = NINE_STATE_FF_OTHER,
   3695     [D3DRS_CLIPPLANEENABLE] = NINE_STATE_RASTERIZER,
   3696     [D3DRS_POINTSIZE] = NINE_STATE_RASTERIZER,
   3697     [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER | NINE_STATE_POINTSIZE_SHADER,
   3698     [D3DRS_POINTSPRITEENABLE] = NINE_STATE_RASTERIZER,
   3699     [D3DRS_POINTSCALEENABLE] = NINE_STATE_FF_OTHER,
   3700     [D3DRS_POINTSCALE_A] = NINE_STATE_FF_OTHER,
   3701     [D3DRS_POINTSCALE_B] = NINE_STATE_FF_OTHER,
   3702     [D3DRS_POINTSCALE_C] = NINE_STATE_FF_OTHER,
   3703     [D3DRS_MULTISAMPLEANTIALIAS] = NINE_STATE_MULTISAMPLE,
   3704     [D3DRS_MULTISAMPLEMASK] = NINE_STATE_SAMPLE_MASK,
   3705     [D3DRS_PATCHEDGESTYLE] = NINE_STATE_UNHANDLED,
   3706     [D3DRS_DEBUGMONITORTOKEN] = NINE_STATE_UNHANDLED,
   3707     [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER | NINE_STATE_POINTSIZE_SHADER,
   3708     [D3DRS_INDEXEDVERTEXBLENDENABLE] = NINE_STATE_FF_OTHER,
   3709     [D3DRS_COLORWRITEENABLE] = NINE_STATE_BLEND,
   3710     [D3DRS_TWEENFACTOR] = NINE_STATE_FF_OTHER,
   3711     [D3DRS_BLENDOP] = NINE_STATE_BLEND,
   3712     [D3DRS_POSITIONDEGREE] = NINE_STATE_UNHANDLED,
   3713     [D3DRS_NORMALDEGREE] = NINE_STATE_UNHANDLED,
   3714     [D3DRS_SCISSORTESTENABLE] = NINE_STATE_RASTERIZER,
   3715     [D3DRS_SLOPESCALEDEPTHBIAS] = NINE_STATE_RASTERIZER,
   3716     [D3DRS_ANTIALIASEDLINEENABLE] = NINE_STATE_RASTERIZER,
   3717     [D3DRS_MINTESSELLATIONLEVEL] = NINE_STATE_UNHANDLED,
   3718     [D3DRS_MAXTESSELLATIONLEVEL] = NINE_STATE_UNHANDLED,
   3719     [D3DRS_ADAPTIVETESS_X] = NINE_STATE_UNHANDLED,
   3720     [D3DRS_ADAPTIVETESS_Y] = NINE_STATE_UNHANDLED,
   3721     [D3DRS_ADAPTIVETESS_Z] = NINE_STATE_UNHANDLED,
   3722     [D3DRS_ADAPTIVETESS_W] = NINE_STATE_UNHANDLED,
   3723     [D3DRS_ENABLEADAPTIVETESSELLATION] = NINE_STATE_UNHANDLED,
   3724     [D3DRS_TWOSIDEDSTENCILMODE] = NINE_STATE_DSA,
   3725     [D3DRS_CCW_STENCILFAIL] = NINE_STATE_DSA,
   3726     [D3DRS_CCW_STENCILZFAIL] = NINE_STATE_DSA,
   3727     [D3DRS_CCW_STENCILPASS] = NINE_STATE_DSA,
   3728     [D3DRS_CCW_STENCILFUNC] = NINE_STATE_DSA,
   3729     [D3DRS_COLORWRITEENABLE1] = NINE_STATE_BLEND,
   3730     [D3DRS_COLORWRITEENABLE2] = NINE_STATE_BLEND,
   3731     [D3DRS_COLORWRITEENABLE3] = NINE_STATE_BLEND,
   3732     [D3DRS_BLENDFACTOR] = NINE_STATE_BLEND_COLOR,
   3733     [D3DRS_SRGBWRITEENABLE] = NINE_STATE_FB,
   3734     [D3DRS_DEPTHBIAS] = NINE_STATE_RASTERIZER,
   3735     [D3DRS_WRAP8] = NINE_STATE_UNHANDLED, /* cylwrap has to be done via GP */
   3736     [D3DRS_WRAP9] = NINE_STATE_UNHANDLED,
   3737     [D3DRS_WRAP10] = NINE_STATE_UNHANDLED,
   3738     [D3DRS_WRAP11] = NINE_STATE_UNHANDLED,
   3739     [D3DRS_WRAP12] = NINE_STATE_UNHANDLED,
   3740     [D3DRS_WRAP13] = NINE_STATE_UNHANDLED,
   3741     [D3DRS_WRAP14] = NINE_STATE_UNHANDLED,
   3742     [D3DRS_WRAP15] = NINE_STATE_UNHANDLED,
   3743     [D3DRS_SEPARATEALPHABLENDENABLE] = NINE_STATE_BLEND,
   3744     [D3DRS_SRCBLENDALPHA] = NINE_STATE_BLEND,
   3745     [D3DRS_DESTBLENDALPHA] = NINE_STATE_BLEND,
   3746     [D3DRS_BLENDOPALPHA] = NINE_STATE_BLEND
   3747 };
   3748 
   3749 /* Misc */
   3750 
   3751 D3DMATRIX *
   3752 nine_state_access_transform(struct nine_ff_state *ff_state, D3DTRANSFORMSTATETYPE t,
   3753                             boolean alloc)
   3754 {
   3755     static D3DMATRIX Identity = { .m[0] = { 1, 0, 0, 0 },
   3756                                   .m[1] = { 0, 1, 0, 0 },
   3757                                   .m[2] = { 0, 0, 1, 0 },
   3758                                   .m[3] = { 0, 0, 0, 1 } };
   3759     unsigned index;
   3760 
   3761     switch (t) {
   3762     case D3DTS_VIEW: index = 0; break;
   3763     case D3DTS_PROJECTION: index = 1; break;
   3764     case D3DTS_TEXTURE0: index = 2; break;
   3765     case D3DTS_TEXTURE1: index = 3; break;
   3766     case D3DTS_TEXTURE2: index = 4; break;
   3767     case D3DTS_TEXTURE3: index = 5; break;
   3768     case D3DTS_TEXTURE4: index = 6; break;
   3769     case D3DTS_TEXTURE5: index = 7; break;
   3770     case D3DTS_TEXTURE6: index = 8; break;
   3771     case D3DTS_TEXTURE7: index = 9; break;
   3772     default:
   3773         if (!(t >= D3DTS_WORLDMATRIX(0) && t <= D3DTS_WORLDMATRIX(255)))
   3774             return NULL;
   3775         index = 10 + (t - D3DTS_WORLDMATRIX(0));
   3776         break;
   3777     }
   3778 
   3779     if (index >= ff_state->num_transforms) {
   3780         unsigned N = index + 1;
   3781         unsigned n = ff_state->num_transforms;
   3782 
   3783         if (!alloc)
   3784             return &Identity;
   3785         ff_state->transform = REALLOC(ff_state->transform,
   3786                                       n * sizeof(D3DMATRIX),
   3787                                       N * sizeof(D3DMATRIX));
   3788         for (; n < N; ++n)
   3789             ff_state->transform[n] = Identity;
   3790         ff_state->num_transforms = N;
   3791     }
   3792     return &ff_state->transform[index];
   3793 }
   3794 
   3795 HRESULT
   3796 nine_state_set_light(struct nine_ff_state *ff_state, DWORD Index,
   3797                      const D3DLIGHT9 *pLight)
   3798 {
   3799     if (Index >= ff_state->num_lights) {
   3800         unsigned n = ff_state->num_lights;
   3801         unsigned N = Index + 1;
   3802 
   3803         ff_state->light = REALLOC(ff_state->light, n * sizeof(D3DLIGHT9),
   3804                                                    N * sizeof(D3DLIGHT9));
   3805         if (!ff_state->light)
   3806             return E_OUTOFMEMORY;
   3807         ff_state->num_lights = N;
   3808 
   3809         for (; n < Index; ++n) {
   3810             memset(&ff_state->light[n], 0, sizeof(D3DLIGHT9));
   3811             ff_state->light[n].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
   3812         }
   3813     }
   3814     ff_state->light[Index] = *pLight;
   3815 
   3816     if (pLight->Type == D3DLIGHT_SPOT && pLight->Theta >= pLight->Phi) {
   3817         DBG("Warning: clamping D3DLIGHT9.Theta\n");
   3818         ff_state->light[Index].Theta = ff_state->light[Index].Phi;
   3819     }
   3820     return D3D_OK;
   3821 }
   3822 
   3823 HRESULT
   3824 nine_state_light_enable(struct nine_ff_state *ff_state, uint32_t *change_group,
   3825                         DWORD Index, BOOL Enable)
   3826 {
   3827     unsigned i;
   3828 
   3829     user_assert(Index < ff_state->num_lights, D3DERR_INVALIDCALL);
   3830 
   3831     for (i = 0; i < ff_state->num_lights_active; ++i) {
   3832         if (ff_state->active_light[i] == Index)
   3833             break;
   3834     }
   3835 
   3836     if (Enable) {
   3837         if (i < ff_state->num_lights_active)
   3838             return D3D_OK;
   3839         /* XXX wine thinks this should still succeed:
   3840          */
   3841         user_assert(i < NINE_MAX_LIGHTS_ACTIVE, D3DERR_INVALIDCALL);
   3842 
   3843         ff_state->active_light[i] = Index;
   3844         ff_state->num_lights_active++;
   3845     } else {
   3846         if (i == ff_state->num_lights_active)
   3847             return D3D_OK;
   3848         --ff_state->num_lights_active;
   3849         for (; i < ff_state->num_lights_active; ++i)
   3850             ff_state->active_light[i] = ff_state->active_light[i + 1];
   3851     }
   3852 
   3853     *change_group |= NINE_STATE_FF_LIGHTING;
   3854 
   3855     return D3D_OK;
   3856 }
   3857 
   3858 #define D3DRS_TO_STRING_CASE(n) case D3DRS_##n: return "D3DRS_"#n
   3859 const char *nine_d3drs_to_string(DWORD State)
   3860 {
   3861     switch (State) {
   3862     D3DRS_TO_STRING_CASE(ZENABLE);
   3863     D3DRS_TO_STRING_CASE(FILLMODE);
   3864     D3DRS_TO_STRING_CASE(SHADEMODE);
   3865     D3DRS_TO_STRING_CASE(ZWRITEENABLE);
   3866     D3DRS_TO_STRING_CASE(ALPHATESTENABLE);
   3867     D3DRS_TO_STRING_CASE(LASTPIXEL);
   3868     D3DRS_TO_STRING_CASE(SRCBLEND);
   3869     D3DRS_TO_STRING_CASE(DESTBLEND);
   3870     D3DRS_TO_STRING_CASE(CULLMODE);
   3871     D3DRS_TO_STRING_CASE(ZFUNC);
   3872     D3DRS_TO_STRING_CASE(ALPHAREF);
   3873     D3DRS_TO_STRING_CASE(ALPHAFUNC);
   3874     D3DRS_TO_STRING_CASE(DITHERENABLE);
   3875     D3DRS_TO_STRING_CASE(ALPHABLENDENABLE);
   3876     D3DRS_TO_STRING_CASE(FOGENABLE);
   3877     D3DRS_TO_STRING_CASE(SPECULARENABLE);
   3878     D3DRS_TO_STRING_CASE(FOGCOLOR);
   3879     D3DRS_TO_STRING_CASE(FOGTABLEMODE);
   3880     D3DRS_TO_STRING_CASE(FOGSTART);
   3881     D3DRS_TO_STRING_CASE(FOGEND);
   3882     D3DRS_TO_STRING_CASE(FOGDENSITY);
   3883     D3DRS_TO_STRING_CASE(RANGEFOGENABLE);
   3884     D3DRS_TO_STRING_CASE(STENCILENABLE);
   3885     D3DRS_TO_STRING_CASE(STENCILFAIL);
   3886     D3DRS_TO_STRING_CASE(STENCILZFAIL);
   3887     D3DRS_TO_STRING_CASE(STENCILPASS);
   3888     D3DRS_TO_STRING_CASE(STENCILFUNC);
   3889     D3DRS_TO_STRING_CASE(STENCILREF);
   3890     D3DRS_TO_STRING_CASE(STENCILMASK);
   3891     D3DRS_TO_STRING_CASE(STENCILWRITEMASK);
   3892     D3DRS_TO_STRING_CASE(TEXTUREFACTOR);
   3893     D3DRS_TO_STRING_CASE(WRAP0);
   3894     D3DRS_TO_STRING_CASE(WRAP1);
   3895     D3DRS_TO_STRING_CASE(WRAP2);
   3896     D3DRS_TO_STRING_CASE(WRAP3);
   3897     D3DRS_TO_STRING_CASE(WRAP4);
   3898     D3DRS_TO_STRING_CASE(WRAP5);
   3899     D3DRS_TO_STRING_CASE(WRAP6);
   3900     D3DRS_TO_STRING_CASE(WRAP7);
   3901     D3DRS_TO_STRING_CASE(CLIPPING);
   3902     D3DRS_TO_STRING_CASE(LIGHTING);
   3903     D3DRS_TO_STRING_CASE(AMBIENT);
   3904     D3DRS_TO_STRING_CASE(FOGVERTEXMODE);
   3905     D3DRS_TO_STRING_CASE(COLORVERTEX);
   3906     D3DRS_TO_STRING_CASE(LOCALVIEWER);
   3907     D3DRS_TO_STRING_CASE(NORMALIZENORMALS);
   3908     D3DRS_TO_STRING_CASE(DIFFUSEMATERIALSOURCE);
   3909     D3DRS_TO_STRING_CASE(SPECULARMATERIALSOURCE);
   3910     D3DRS_TO_STRING_CASE(AMBIENTMATERIALSOURCE);
   3911     D3DRS_TO_STRING_CASE(EMISSIVEMATERIALSOURCE);
   3912     D3DRS_TO_STRING_CASE(VERTEXBLEND);
   3913     D3DRS_TO_STRING_CASE(CLIPPLANEENABLE);
   3914     D3DRS_TO_STRING_CASE(POINTSIZE);
   3915     D3DRS_TO_STRING_CASE(POINTSIZE_MIN);
   3916     D3DRS_TO_STRING_CASE(POINTSPRITEENABLE);
   3917     D3DRS_TO_STRING_CASE(POINTSCALEENABLE);
   3918     D3DRS_TO_STRING_CASE(POINTSCALE_A);
   3919     D3DRS_TO_STRING_CASE(POINTSCALE_B);
   3920     D3DRS_TO_STRING_CASE(POINTSCALE_C);
   3921     D3DRS_TO_STRING_CASE(MULTISAMPLEANTIALIAS);
   3922     D3DRS_TO_STRING_CASE(MULTISAMPLEMASK);
   3923     D3DRS_TO_STRING_CASE(PATCHEDGESTYLE);
   3924     D3DRS_TO_STRING_CASE(DEBUGMONITORTOKEN);
   3925     D3DRS_TO_STRING_CASE(POINTSIZE_MAX);
   3926     D3DRS_TO_STRING_CASE(INDEXEDVERTEXBLENDENABLE);
   3927     D3DRS_TO_STRING_CASE(COLORWRITEENABLE);
   3928     D3DRS_TO_STRING_CASE(TWEENFACTOR);
   3929     D3DRS_TO_STRING_CASE(BLENDOP);
   3930     D3DRS_TO_STRING_CASE(POSITIONDEGREE);
   3931     D3DRS_TO_STRING_CASE(NORMALDEGREE);
   3932     D3DRS_TO_STRING_CASE(SCISSORTESTENABLE);
   3933     D3DRS_TO_STRING_CASE(SLOPESCALEDEPTHBIAS);
   3934     D3DRS_TO_STRING_CASE(ANTIALIASEDLINEENABLE);
   3935     D3DRS_TO_STRING_CASE(MINTESSELLATIONLEVEL);
   3936     D3DRS_TO_STRING_CASE(MAXTESSELLATIONLEVEL);
   3937     D3DRS_TO_STRING_CASE(ADAPTIVETESS_X);
   3938     D3DRS_TO_STRING_CASE(ADAPTIVETESS_Y);
   3939     D3DRS_TO_STRING_CASE(ADAPTIVETESS_Z);
   3940     D3DRS_TO_STRING_CASE(ADAPTIVETESS_W);
   3941     D3DRS_TO_STRING_CASE(ENABLEADAPTIVETESSELLATION);
   3942     D3DRS_TO_STRING_CASE(TWOSIDEDSTENCILMODE);
   3943     D3DRS_TO_STRING_CASE(CCW_STENCILFAIL);
   3944     D3DRS_TO_STRING_CASE(CCW_STENCILZFAIL);
   3945     D3DRS_TO_STRING_CASE(CCW_STENCILPASS);
   3946     D3DRS_TO_STRING_CASE(CCW_STENCILFUNC);
   3947     D3DRS_TO_STRING_CASE(COLORWRITEENABLE1);
   3948     D3DRS_TO_STRING_CASE(COLORWRITEENABLE2);
   3949     D3DRS_TO_STRING_CASE(COLORWRITEENABLE3);
   3950     D3DRS_TO_STRING_CASE(BLENDFACTOR);
   3951     D3DRS_TO_STRING_CASE(SRGBWRITEENABLE);
   3952     D3DRS_TO_STRING_CASE(DEPTHBIAS);
   3953     D3DRS_TO_STRING_CASE(WRAP8);
   3954     D3DRS_TO_STRING_CASE(WRAP9);
   3955     D3DRS_TO_STRING_CASE(WRAP10);
   3956     D3DRS_TO_STRING_CASE(WRAP11);
   3957     D3DRS_TO_STRING_CASE(WRAP12);
   3958     D3DRS_TO_STRING_CASE(WRAP13);
   3959     D3DRS_TO_STRING_CASE(WRAP14);
   3960     D3DRS_TO_STRING_CASE(WRAP15);
   3961     D3DRS_TO_STRING_CASE(SEPARATEALPHABLENDENABLE);
   3962     D3DRS_TO_STRING_CASE(SRCBLENDALPHA);
   3963     D3DRS_TO_STRING_CASE(DESTBLENDALPHA);
   3964     D3DRS_TO_STRING_CASE(BLENDOPALPHA);
   3965     default:
   3966         return "(invalid)";
   3967     }
   3968 }
   3969