Home | History | Annotate | Download | only in nvc0
      1 /*
      2  * Copyright 2008 Ben Skeggs
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  */
     22 
     23 #include "nvc0/nvc0_context.h"
     24 #include "nvc0/nvc0_resource.h"
     25 #include "nvc0/gm107_texture.xml.h"
     26 #include "nvc0/nvc0_compute.xml.h"
     27 #include "nv50/g80_texture.xml.h"
     28 #include "nv50/g80_defs.xml.h"
     29 
     30 #include "util/u_format.h"
     31 
     32 #define NVE4_TIC_ENTRY_INVALID 0x000fffff
     33 #define NVE4_TSC_ENTRY_INVALID 0xfff00000
     34 
     35 static inline uint32_t
     36 nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int)
     37 {
     38    switch (swz) {
     39    case PIPE_SWIZZLE_X  : return fmt->tic.src_x;
     40    case PIPE_SWIZZLE_Y: return fmt->tic.src_y;
     41    case PIPE_SWIZZLE_Z : return fmt->tic.src_z;
     42    case PIPE_SWIZZLE_W: return fmt->tic.src_w;
     43    case PIPE_SWIZZLE_1:
     44       return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
     45    case PIPE_SWIZZLE_0:
     46    default:
     47       return G80_TIC_SOURCE_ZERO;
     48    }
     49 }
     50 
     51 struct pipe_sampler_view *
     52 nvc0_create_sampler_view(struct pipe_context *pipe,
     53                          struct pipe_resource *res,
     54                          const struct pipe_sampler_view *templ)
     55 {
     56    uint32_t flags = 0;
     57 
     58    if (templ->target == PIPE_TEXTURE_RECT || templ->target == PIPE_BUFFER)
     59       flags |= NV50_TEXVIEW_SCALED_COORDS;
     60 
     61    return nvc0_create_texture_view(pipe, res, templ, flags, templ->target);
     62 }
     63 
     64 static struct pipe_sampler_view *
     65 gm107_create_texture_view(struct pipe_context *pipe,
     66                           struct pipe_resource *texture,
     67                           const struct pipe_sampler_view *templ,
     68                           uint32_t flags,
     69                           enum pipe_texture_target target)
     70 {
     71    const struct util_format_description *desc;
     72    const struct nvc0_format *fmt;
     73    uint64_t address;
     74    uint32_t *tic;
     75    uint32_t swz[4];
     76    uint32_t width, height;
     77    uint32_t depth;
     78    struct nv50_tic_entry *view;
     79    struct nv50_miptree *mt;
     80    bool tex_int;
     81 
     82    view = MALLOC_STRUCT(nv50_tic_entry);
     83    if (!view)
     84       return NULL;
     85    mt = nv50_miptree(texture);
     86 
     87    view->pipe = *templ;
     88    view->pipe.reference.count = 1;
     89    view->pipe.texture = NULL;
     90    view->pipe.context = pipe;
     91 
     92    view->id = -1;
     93 
     94    pipe_resource_reference(&view->pipe.texture, texture);
     95 
     96    tic = &view->tic[0];
     97 
     98    desc = util_format_description(view->pipe.format);
     99    tex_int = util_format_is_pure_integer(view->pipe.format);
    100 
    101    fmt = &nvc0_format_table[view->pipe.format];
    102    swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
    103    swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
    104    swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
    105    swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
    106 
    107    tic[0]  = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;
    108    tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;
    109    tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;
    110    tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;
    111    tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;
    112    tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT;
    113    tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT;
    114    tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT;
    115    tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT;
    116 
    117    address = mt->base.address;
    118 
    119    tic[3]  = GM107_TIC2_3_LOD_ANISO_QUALITY_2;
    120    tic[4]  = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;
    121    tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;
    122 
    123    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
    124       tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;
    125 
    126    if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
    127       tic[5] = GM107_TIC2_5_NORMALIZED_COORDS;
    128    else
    129       tic[5] = 0;
    130 
    131    /* check for linear storage type */
    132    if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
    133       if (texture->target == PIPE_BUFFER) {
    134          assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS));
    135          width = view->pipe.u.buf.size / (desc->block.bits / 8) - 1;
    136          address +=
    137             view->pipe.u.buf.offset;
    138          tic[2]  = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER;
    139          tic[3] |= width >> 16;
    140          tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER;
    141          tic[4] |= width & 0xffff;
    142       } else {
    143          assert(!(mt->level[0].pitch & 0x1f));
    144          /* must be 2D texture without mip maps */
    145          tic[2]  = GM107_TIC2_2_HEADER_VERSION_PITCH;
    146          tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
    147          tic[3] |= mt->level[0].pitch >> 5;
    148          tic[4] |= mt->base.base.width0 - 1;
    149          tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
    150          tic[5] |= mt->base.base.height0 - 1;
    151       }
    152       tic[1]  = address;
    153       tic[2] |= address >> 32;
    154       tic[6]  = 0;
    155       tic[7]  = 0;
    156       return &view->pipe;
    157    }
    158 
    159    tic[2]  = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;
    160    tic[3] |=
    161       ((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) |
    162       ((mt->level[0].tile_mode & 0xf00) >> 8 << 6);
    163 
    164    depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
    165 
    166    if (mt->base.base.array_size > 1) {
    167       /* there doesn't seem to be a base layer field in TIC */
    168       address += view->pipe.u.tex.first_layer * mt->layer_stride;
    169       depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
    170    }
    171    tic[1]  = address;
    172    tic[2] |= address >> 32;
    173 
    174    switch (target) {
    175    case PIPE_TEXTURE_1D:
    176       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D;
    177       break;
    178    case PIPE_TEXTURE_2D:
    179       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
    180       break;
    181    case PIPE_TEXTURE_RECT:
    182       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
    183       break;
    184    case PIPE_TEXTURE_3D:
    185       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D;
    186       break;
    187    case PIPE_TEXTURE_CUBE:
    188       depth /= 6;
    189       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;
    190       break;
    191    case PIPE_TEXTURE_1D_ARRAY:
    192       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;
    193       break;
    194    case PIPE_TEXTURE_2D_ARRAY:
    195       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;
    196       break;
    197    case PIPE_TEXTURE_CUBE_ARRAY:
    198       depth /= 6;
    199       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;
    200       break;
    201    default:
    202       unreachable("unexpected/invalid texture target");
    203    }
    204 
    205    tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ?
    206              GM107_TIC2_3_USE_HEADER_OPT_CONTROL :
    207              GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |
    208              GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;
    209 
    210    if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
    211       width = mt->base.base.width0 << mt->ms_x;
    212       height = mt->base.base.height0 << mt->ms_y;
    213    } else {
    214       width = mt->base.base.width0;
    215       height = mt->base.base.height0;
    216    }
    217 
    218    tic[4] |= width - 1;
    219 
    220    tic[5] |= (height - 1) & 0xffff;
    221    tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
    222    tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;
    223 
    224    /* sampling points: (?) */
    225    if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) {
    226       tic[6]  = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO;
    227       tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1;
    228    } else {
    229       tic[6]  = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;
    230       tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;
    231    }
    232 
    233    tic[7]  = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
    234    tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT;
    235 
    236    return &view->pipe;
    237 }
    238 
    239 struct pipe_sampler_view *
    240 gm107_create_texture_view_from_image(struct pipe_context *pipe,
    241                                      const struct pipe_image_view *view)
    242 {
    243    struct nv04_resource *res = nv04_resource(view->resource);
    244    struct pipe_sampler_view templ = {};
    245    enum pipe_texture_target target;
    246    uint32_t flags = 0;
    247 
    248    if (!res)
    249       return NULL;
    250    target = res->base.target;
    251 
    252    if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY)
    253       target = PIPE_TEXTURE_2D_ARRAY;
    254 
    255    templ.format = view->format;
    256    templ.swizzle_r = PIPE_SWIZZLE_X;
    257    templ.swizzle_g = PIPE_SWIZZLE_Y;
    258    templ.swizzle_b = PIPE_SWIZZLE_Z;
    259    templ.swizzle_a = PIPE_SWIZZLE_W;
    260 
    261    if (target == PIPE_BUFFER) {
    262       templ.u.buf.offset = view->u.buf.offset;
    263       templ.u.buf.size = view->u.buf.size;
    264    } else {
    265       templ.u.tex.first_layer = view->u.tex.first_layer;
    266       templ.u.tex.last_layer = view->u.tex.last_layer;
    267       templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level;
    268    }
    269 
    270    flags = NV50_TEXVIEW_SCALED_COORDS;
    271 
    272    return nvc0_create_texture_view(pipe, &res->base, &templ, flags, target);
    273 }
    274 
    275 static struct pipe_sampler_view *
    276 gf100_create_texture_view(struct pipe_context *pipe,
    277                           struct pipe_resource *texture,
    278                           const struct pipe_sampler_view *templ,
    279                           uint32_t flags,
    280                           enum pipe_texture_target target)
    281 {
    282    const struct util_format_description *desc;
    283    const struct nvc0_format *fmt;
    284    uint64_t address;
    285    uint32_t *tic;
    286    uint32_t swz[4];
    287    uint32_t width, height;
    288    uint32_t depth;
    289    uint32_t tex_fmt;
    290    struct nv50_tic_entry *view;
    291    struct nv50_miptree *mt;
    292    bool tex_int;
    293 
    294    view = MALLOC_STRUCT(nv50_tic_entry);
    295    if (!view)
    296       return NULL;
    297    mt = nv50_miptree(texture);
    298 
    299    view->pipe = *templ;
    300    view->pipe.reference.count = 1;
    301    view->pipe.texture = NULL;
    302    view->pipe.context = pipe;
    303 
    304    view->id = -1;
    305 
    306    pipe_resource_reference(&view->pipe.texture, texture);
    307 
    308    tic = &view->tic[0];
    309 
    310    desc = util_format_description(view->pipe.format);
    311 
    312    fmt = &nvc0_format_table[view->pipe.format];
    313 
    314    tex_int = util_format_is_pure_integer(view->pipe.format);
    315    tex_fmt = fmt->tic.format & 0x3f;
    316 
    317    swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
    318    swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
    319    swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
    320    swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
    321    tic[0] = (tex_fmt << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
    322             (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
    323             (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
    324             (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
    325             (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
    326             (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
    327             (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
    328             (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
    329             (swz[3] << G80_TIC_0_W_SOURCE__SHIFT) |
    330             ((fmt->tic.format & 0x40) << (GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT - 6));
    331 
    332    address = mt->base.address;
    333 
    334    tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
    335 
    336    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
    337       tic[2] |= G80_TIC_2_SRGB_CONVERSION;
    338 
    339    if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
    340       tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
    341 
    342    /* check for linear storage type */
    343    if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
    344       if (texture->target == PIPE_BUFFER) {
    345          assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS));
    346          address +=
    347             view->pipe.u.buf.offset;
    348          tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
    349          tic[3] = 0;
    350          tic[4] = /* width */
    351             view->pipe.u.buf.size / (desc->block.bits / 8);
    352          tic[5] = 0;
    353       } else {
    354          /* must be 2D texture without mip maps */
    355          tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
    356          tic[3] = mt->level[0].pitch;
    357          tic[4] = mt->base.base.width0;
    358          tic[5] = (1 << 16) | mt->base.base.height0;
    359       }
    360       tic[6] =
    361       tic[7] = 0;
    362       tic[1] = address;
    363       tic[2] |= address >> 32;
    364       return &view->pipe;
    365    }
    366 
    367    tic[2] |=
    368       ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
    369       ((mt->level[0].tile_mode & 0xf00) << (25 - 8));
    370 
    371    depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
    372 
    373    if (mt->base.base.array_size > 1) {
    374       /* there doesn't seem to be a base layer field in TIC */
    375       address += view->pipe.u.tex.first_layer * mt->layer_stride;
    376       depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
    377    }
    378    tic[1] = address;
    379    tic[2] |= address >> 32;
    380 
    381    switch (target) {
    382    case PIPE_TEXTURE_1D:
    383       tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
    384       break;
    385    case PIPE_TEXTURE_2D:
    386       tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
    387       break;
    388    case PIPE_TEXTURE_RECT:
    389       tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
    390       break;
    391    case PIPE_TEXTURE_3D:
    392       tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
    393       break;
    394    case PIPE_TEXTURE_CUBE:
    395       depth /= 6;
    396       tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
    397       break;
    398    case PIPE_TEXTURE_1D_ARRAY:
    399       tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
    400       break;
    401    case PIPE_TEXTURE_2D_ARRAY:
    402       tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
    403       break;
    404    case PIPE_TEXTURE_CUBE_ARRAY:
    405       depth /= 6;
    406       tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
    407       break;
    408    default:
    409       unreachable("unexpected/invalid texture target");
    410    }
    411 
    412    tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
    413 
    414    if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
    415       width = mt->base.base.width0 << mt->ms_x;
    416       height = mt->base.base.height0 << mt->ms_y;
    417    } else {
    418       width = mt->base.base.width0;
    419       height = mt->base.base.height0;
    420    }
    421 
    422    tic[4] = (1 << 31) | width;
    423 
    424    tic[5] = height & 0xffff;
    425    tic[5] |= depth << 16;
    426    tic[5] |= mt->base.base.last_level << 28;
    427 
    428    /* sampling points: (?) */
    429    if (flags & NV50_TEXVIEW_ACCESS_RESOLVE)
    430       tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000;
    431    else
    432       tic[6] = 0x03000000;
    433 
    434    tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
    435    tic[7] |= mt->ms_mode << 12;
    436 
    437    return &view->pipe;
    438 }
    439 
    440 struct pipe_sampler_view *
    441 nvc0_create_texture_view(struct pipe_context *pipe,
    442                          struct pipe_resource *texture,
    443                          const struct pipe_sampler_view *templ,
    444                          uint32_t flags,
    445                          enum pipe_texture_target target)
    446 {
    447    if (nvc0_context(pipe)->screen->tic.maxwell)
    448       return gm107_create_texture_view(pipe, texture, templ, flags, target);
    449    return gf100_create_texture_view(pipe, texture, templ, flags, target);
    450 }
    451 
    452 void
    453 nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
    454                 struct nv04_resource *res)
    455 {
    456    uint64_t address = res->address;
    457    if (res->base.target != PIPE_BUFFER)
    458       return;
    459    address += tic->pipe.u.buf.offset;
    460    if (tic->tic[1] == (uint32_t)address &&
    461        (tic->tic[2] & 0xff) == address >> 32)
    462       return;
    463 
    464    nvc0_screen_tic_unlock(nvc0->screen, tic);
    465    tic->id = -1;
    466    tic->tic[1] = address;
    467    tic->tic[2] &= 0xffffff00;
    468    tic->tic[2] |= address >> 32;
    469 }
    470 
    471 bool
    472 nvc0_validate_tic(struct nvc0_context *nvc0, int s)
    473 {
    474    uint32_t commands[32];
    475    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    476    unsigned i;
    477    unsigned n = 0;
    478    bool need_flush = false;
    479 
    480    for (i = 0; i < nvc0->num_textures[s]; ++i) {
    481       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
    482       struct nv04_resource *res;
    483       const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
    484 
    485       if (!tic) {
    486          if (dirty)
    487             commands[n++] = (i << 1) | 0;
    488          continue;
    489       }
    490       res = nv04_resource(tic->pipe.texture);
    491       nvc0_update_tic(nvc0, tic, res);
    492 
    493       if (tic->id < 0) {
    494          tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
    495 
    496          nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
    497                                NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
    498                                tic->tic);
    499          need_flush = true;
    500       } else
    501       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
    502          if (unlikely(s == 5))
    503             BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);
    504          else
    505             BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
    506          PUSH_DATA (push, (tic->id << 4) | 1);
    507          NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
    508       }
    509       nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
    510 
    511       res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
    512       res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;
    513 
    514       if (!dirty)
    515          continue;
    516       commands[n++] = (tic->id << 9) | (i << 1) | 1;
    517 
    518       if (unlikely(s == 5))
    519          BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
    520       else
    521          BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
    522    }
    523    for (; i < nvc0->state.num_textures[s]; ++i)
    524       commands[n++] = (i << 1) | 0;
    525 
    526    nvc0->state.num_textures[s] = nvc0->num_textures[s];
    527 
    528    if (n) {
    529       if (unlikely(s == 5))
    530          BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);
    531       else
    532          BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
    533       PUSH_DATAp(push, commands, n);
    534    }
    535    nvc0->textures_dirty[s] = 0;
    536 
    537    return need_flush;
    538 }
    539 
    540 static bool
    541 nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
    542 {
    543    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    544    unsigned i;
    545    bool need_flush = false;
    546 
    547    for (i = 0; i < nvc0->num_textures[s]; ++i) {
    548       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
    549       struct nv04_resource *res;
    550       const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
    551 
    552       if (!tic) {
    553          nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
    554          continue;
    555       }
    556       res = nv04_resource(tic->pipe.texture);
    557       nvc0_update_tic(nvc0, tic, res);
    558 
    559       if (tic->id < 0) {
    560          tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
    561 
    562          nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
    563                                NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
    564                                tic->tic);
    565          need_flush = true;
    566       } else
    567       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
    568          BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
    569          PUSH_DATA (push, (tic->id << 4) | 1);
    570       }
    571       nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
    572 
    573       res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
    574       res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;
    575 
    576       nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
    577       nvc0->tex_handles[s][i] |= tic->id;
    578       if (dirty)
    579          BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
    580    }
    581    for (; i < nvc0->state.num_textures[s]; ++i) {
    582       nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
    583       nvc0->textures_dirty[s] |= 1 << i;
    584    }
    585 
    586    nvc0->state.num_textures[s] = nvc0->num_textures[s];
    587 
    588    return need_flush;
    589 }
    590 
    591 void nvc0_validate_textures(struct nvc0_context *nvc0)
    592 {
    593    bool need_flush = false;
    594    int i;
    595 
    596    for (i = 0; i < 5; i++) {
    597       if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
    598          need_flush |= nve4_validate_tic(nvc0, i);
    599       else
    600          need_flush |= nvc0_validate_tic(nvc0, i);
    601    }
    602 
    603    if (need_flush) {
    604       BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
    605       PUSH_DATA (nvc0->base.pushbuf, 0);
    606    }
    607 
    608    /* Invalidate all CP textures because they are aliased. */
    609    for (int i = 0; i < nvc0->num_textures[5]; i++)
    610       nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEX(i));
    611    nvc0->textures_dirty[5] = ~0;
    612    nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
    613 }
    614 
    615 bool
    616 nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
    617 {
    618    uint32_t commands[16];
    619    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    620    unsigned i;
    621    unsigned n = 0;
    622    bool need_flush = false;
    623 
    624    for (i = 0; i < nvc0->num_samplers[s]; ++i) {
    625       struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
    626 
    627       if (!(nvc0->samplers_dirty[s] & (1 << i)))
    628          continue;
    629       if (!tsc) {
    630          commands[n++] = (i << 4) | 0;
    631          continue;
    632       }
    633       nvc0->seamless_cube_map = tsc->seamless_cube_map;
    634       if (tsc->id < 0) {
    635          tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
    636 
    637          nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
    638                                65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
    639                                32, tsc->tsc);
    640          need_flush = true;
    641       }
    642       nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
    643 
    644       commands[n++] = (tsc->id << 12) | (i << 4) | 1;
    645    }
    646    for (; i < nvc0->state.num_samplers[s]; ++i)
    647       commands[n++] = (i << 4) | 0;
    648 
    649    nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
    650 
    651    if (n) {
    652       if (unlikely(s == 5))
    653          BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
    654       else
    655          BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
    656       PUSH_DATAp(push, commands, n);
    657    }
    658    nvc0->samplers_dirty[s] = 0;
    659 
    660    return need_flush;
    661 }
    662 
    663 bool
    664 nve4_validate_tsc(struct nvc0_context *nvc0, int s)
    665 {
    666    unsigned i;
    667    bool need_flush = false;
    668 
    669    for (i = 0; i < nvc0->num_samplers[s]; ++i) {
    670       struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
    671 
    672       if (!tsc) {
    673          nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
    674          continue;
    675       }
    676       if (tsc->id < 0) {
    677          tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
    678 
    679          nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
    680                                65536 + tsc->id * 32,
    681                                NV_VRAM_DOMAIN(&nvc0->screen->base),
    682                                32, tsc->tsc);
    683          need_flush = true;
    684       }
    685       nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
    686 
    687       nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
    688       nvc0->tex_handles[s][i] |= tsc->id << 20;
    689    }
    690    for (; i < nvc0->state.num_samplers[s]; ++i) {
    691       nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
    692       nvc0->samplers_dirty[s] |= 1 << i;
    693    }
    694 
    695    nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
    696 
    697    return need_flush;
    698 }
    699 
    700 void nvc0_validate_samplers(struct nvc0_context *nvc0)
    701 {
    702    bool need_flush = false;
    703    int i;
    704 
    705    for (i = 0; i < 5; i++) {
    706       if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
    707          need_flush |= nve4_validate_tsc(nvc0, i);
    708       else
    709          need_flush |= nvc0_validate_tsc(nvc0, i);
    710    }
    711 
    712    if (need_flush) {
    713       BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
    714       PUSH_DATA (nvc0->base.pushbuf, 0);
    715    }
    716 
    717    /* Invalidate all CP samplers because they are aliased. */
    718    nvc0->samplers_dirty[5] = ~0;
    719    nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
    720 }
    721 
    722 /* Upload the "diagonal" entries for the possible texture sources ($t == $s).
    723  * At some point we might want to get a list of the combinations used by a
    724  * shader and fill in those entries instead of having it extract the handles.
    725  */
    726 void
    727 nve4_set_tex_handles(struct nvc0_context *nvc0)
    728 {
    729    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    730    struct nvc0_screen *screen = nvc0->screen;
    731    unsigned s;
    732 
    733    if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
    734       return;
    735 
    736    for (s = 0; s < 5; ++s) {
    737       uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
    738       if (!dirty)
    739          continue;
    740       BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
    741       PUSH_DATA (push, NVC0_CB_AUX_SIZE);
    742       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
    743       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
    744       do {
    745          int i = ffs(dirty) - 1;
    746          dirty &= ~(1 << i);
    747 
    748          BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
    749          PUSH_DATA (push, (8 + i) * 4);
    750          PUSH_DATA (push, nvc0->tex_handles[s][i]);
    751       } while (dirty);
    752 
    753       nvc0->textures_dirty[s] = 0;
    754       nvc0->samplers_dirty[s] = 0;
    755    }
    756 }
    757 
    758 
    759 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
    760 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
    761 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
    762 
    763 static void
    764 nvc0_get_surface_dims(struct pipe_image_view *view, int *width, int *height,
    765                       int *depth)
    766 {
    767    struct nv04_resource *res = nv04_resource(view->resource);
    768    int level;
    769 
    770    *width = *height = *depth = 1;
    771    if (res->base.target == PIPE_BUFFER) {
    772       *width = view->u.buf.size / util_format_get_blocksize(view->format);
    773       return;
    774    }
    775 
    776    level = view->u.tex.level;
    777    *width = u_minify(view->resource->width0, level);
    778    *height = u_minify(view->resource->height0, level);
    779    *depth = u_minify(view->resource->depth0, level);
    780 
    781    switch (res->base.target) {
    782    case PIPE_TEXTURE_1D_ARRAY:
    783    case PIPE_TEXTURE_2D_ARRAY:
    784    case PIPE_TEXTURE_CUBE:
    785    case PIPE_TEXTURE_CUBE_ARRAY:
    786       *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
    787       break;
    788    case PIPE_TEXTURE_1D:
    789    case PIPE_TEXTURE_2D:
    790    case PIPE_TEXTURE_RECT:
    791    case PIPE_TEXTURE_3D:
    792       break;
    793    default:
    794       assert(!"unexpected texture target");
    795       break;
    796    }
    797 }
    798 
    799 void
    800 nvc0_mark_image_range_valid(const struct pipe_image_view *view)
    801 {
    802    struct nv04_resource *res = (struct nv04_resource *)view->resource;
    803 
    804    assert(view->resource->target == PIPE_BUFFER);
    805 
    806    util_range_add(&res->valid_buffer_range,
    807                   view->u.buf.offset,
    808                   view->u.buf.offset + view->u.buf.size);
    809 }
    810 
    811 void
    812 nve4_set_surface_info(struct nouveau_pushbuf *push,
    813                       struct pipe_image_view *view,
    814                       struct nvc0_context *nvc0)
    815 {
    816    struct nvc0_screen *screen = nvc0->screen;
    817    struct nv04_resource *res;
    818    uint64_t address;
    819    uint32_t *const info = push->cur;
    820    int width, height, depth;
    821    uint8_t log2cpp;
    822 
    823    if (view && !nve4_su_format_map[view->format])
    824       NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
    825 
    826    push->cur += 16;
    827 
    828    if (!view || !nve4_su_format_map[view->format]) {
    829       memset(info, 0, 16 * sizeof(*info));
    830 
    831       info[0] = 0xbadf0000;
    832       info[1] = 0x80004000;
    833       info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
    834          screen->lib_code->start;
    835       return;
    836    }
    837    res = nv04_resource(view->resource);
    838 
    839    address = res->address;
    840 
    841    /* get surface dimensions based on the target. */
    842    nvc0_get_surface_dims(view, &width, &height, &depth);
    843 
    844    info[8] = width;
    845    info[9] = height;
    846    info[10] = depth;
    847    switch (res->base.target) {
    848    case PIPE_TEXTURE_1D_ARRAY:
    849       info[11] = 1;
    850       break;
    851    case PIPE_TEXTURE_2D:
    852    case PIPE_TEXTURE_RECT:
    853       info[11] = 2;
    854       break;
    855    case PIPE_TEXTURE_3D:
    856       info[11] = 3;
    857       break;
    858    case PIPE_TEXTURE_2D_ARRAY:
    859    case PIPE_TEXTURE_CUBE:
    860    case PIPE_TEXTURE_CUBE_ARRAY:
    861       info[11] = 4;
    862       break;
    863    default:
    864       info[11] = 0;
    865       break;
    866    }
    867    log2cpp = (0xf000 & nve4_su_format_aux_map[view->format]) >> 12;
    868 
    869    /* Stick the blockwidth (ie. number of bytes per pixel) to check if the
    870     * format doesn't mismatch. */
    871    info[12] = util_format_get_blocksize(view->format);
    872 
    873    /* limit in bytes for raw access */
    874    info[13] = (0x06 << 22) | ((width << log2cpp) - 1);
    875 
    876    info[1] = nve4_su_format_map[view->format];
    877 
    878 #if 0
    879    switch (util_format_get_blocksizebits(view->format)) {
    880    case  16: info[1] |= 1 << 16; break;
    881    case  32: info[1] |= 2 << 16; break;
    882    case  64: info[1] |= 3 << 16; break;
    883    case 128: info[1] |= 4 << 16; break;
    884    default:
    885       break;
    886    }
    887 #else
    888    info[1] |= log2cpp << 16;
    889    info[1] |=  0x4000;
    890    info[1] |= (0x0f00 & nve4_su_format_aux_map[view->format]);
    891 #endif
    892 
    893    if (res->base.target == PIPE_BUFFER) {
    894       address += view->u.buf.offset;
    895 
    896       info[0]  = address >> 8;
    897       info[2]  = width - 1;
    898       info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
    899       info[3]  = 0;
    900       info[4]  = 0;
    901       info[5]  = 0;
    902       info[6]  = 0;
    903       info[7]  = 0;
    904       info[14] = 0;
    905       info[15] = 0;
    906    } else {
    907       struct nv50_miptree *mt = nv50_miptree(&res->base);
    908       struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
    909       const unsigned z = view->u.tex.first_layer;
    910 
    911       if (z) {
    912          if (mt->layout_3d) {
    913             address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
    914             /* doesn't work if z passes z-tile boundary */
    915             if (depth > 1) {
    916                pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
    917                                   "3D images are not really supported!");
    918                debug_printf("3D images are not really supported!\n");
    919             }
    920          } else {
    921             address += mt->layer_stride * z;
    922          }
    923       }
    924       address += lvl->offset;
    925 
    926       info[0]  = address >> 8;
    927       info[2]  = (width << mt->ms_x) - 1;
    928       /* NOTE: this is really important: */
    929       info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
    930       info[3]  = (0x88 << 24) | (lvl->pitch / 64);
    931       info[4]  = (height << mt->ms_y) - 1;
    932       info[4] |= (lvl->tile_mode & 0x0f0) << 25;
    933       info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
    934       info[5]  = mt->layer_stride >> 8;
    935       info[6]  = depth - 1;
    936       info[6] |= (lvl->tile_mode & 0xf00) << 21;
    937       info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
    938       info[7]  = 0;
    939       info[14] = mt->ms_x;
    940       info[15] = mt->ms_y;
    941    }
    942 }
    943 
    944 static inline void
    945 nvc0_set_surface_info(struct nouveau_pushbuf *push,
    946                       struct pipe_image_view *view, uint64_t address,
    947                       int width, int height, int depth)
    948 {
    949    struct nv04_resource *res;
    950    uint32_t *const info = push->cur;
    951 
    952    push->cur += 16;
    953 
    954    /* Make sure to always initialize the surface information area because it's
    955     * used to check if the given image is bound or not. */
    956    memset(info, 0, 16 * sizeof(*info));
    957 
    958    if (!view || !view->resource)
    959       return;
    960    res = nv04_resource(view->resource);
    961 
    962    /* Stick the image dimensions for the imageSize() builtin. */
    963    info[8] = width;
    964    info[9] = height;
    965    info[10] = depth;
    966 
    967    /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
    968     * offset and to check if the format doesn't mismatch. */
    969    info[12] = util_format_get_blocksize(view->format);
    970 
    971    if (res->base.target == PIPE_BUFFER) {
    972       info[0]  = address >> 8;
    973       info[2]  = width;
    974    } else {
    975       struct nv50_miptree *mt = nv50_miptree(&res->base);
    976 
    977       info[0]  = address >> 8;
    978       info[2]  = width;
    979       info[4]  = height;
    980       info[5]  = mt->layer_stride >> 8;
    981       info[6]  = depth;
    982       info[14] = mt->ms_x;
    983       info[15] = mt->ms_y;
    984    }
    985 }
    986 
    987 void
    988 nvc0_validate_suf(struct nvc0_context *nvc0, int s)
    989 {
    990    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    991    struct nvc0_screen *screen = nvc0->screen;
    992 
    993    for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
    994       struct pipe_image_view *view = &nvc0->images[s][i];
    995       int width, height, depth;
    996       uint64_t address = 0;
    997 
    998       if (s == 5)
    999          BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
   1000       else
   1001          BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
   1002 
   1003       if (view->resource) {
   1004          struct nv04_resource *res = nv04_resource(view->resource);
   1005          unsigned rt = nvc0_format_table[view->format].rt;
   1006 
   1007          if (util_format_is_depth_or_stencil(view->format))
   1008             rt = rt << 12;
   1009          else
   1010             rt = (rt << 4) | (0x14 << 12);
   1011 
   1012          /* get surface dimensions based on the target. */
   1013          nvc0_get_surface_dims(view, &width, &height, &depth);
   1014 
   1015          address = res->address;
   1016          if (res->base.target == PIPE_BUFFER) {
   1017             unsigned blocksize = util_format_get_blocksize(view->format);
   1018 
   1019             address += view->u.buf.offset;
   1020             assert(!(address & 0xff));
   1021 
   1022             if (view->access & PIPE_IMAGE_ACCESS_WRITE)
   1023                nvc0_mark_image_range_valid(view);
   1024 
   1025             PUSH_DATAh(push, address);
   1026             PUSH_DATA (push, address);
   1027             PUSH_DATA (push, align(width * blocksize, 0x100));
   1028             PUSH_DATA (push, NVC0_3D_IMAGE_HEIGHT_LINEAR | 1);
   1029             PUSH_DATA (push, rt);
   1030             PUSH_DATA (push, 0);
   1031          } else {
   1032             struct nv50_miptree *mt = nv50_miptree(view->resource);
   1033             struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
   1034             const unsigned z = view->u.tex.first_layer;
   1035 
   1036             if (mt->layout_3d) {
   1037                address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
   1038                if (depth >= 1) {
   1039                   pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
   1040                                      "3D images are not supported!");
   1041                   debug_printf("3D images are not supported!\n");
   1042                }
   1043             } else {
   1044                address += mt->layer_stride * z;
   1045             }
   1046             address += lvl->offset;
   1047 
   1048             PUSH_DATAh(push, address);
   1049             PUSH_DATA (push, address);
   1050             PUSH_DATA (push, width << mt->ms_x);
   1051             PUSH_DATA (push, height << mt->ms_y);
   1052             PUSH_DATA (push, rt);
   1053             PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */
   1054          }
   1055 
   1056          if (s == 5)
   1057             BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
   1058          else
   1059             BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
   1060       } else {
   1061          PUSH_DATA(push, 0);
   1062          PUSH_DATA(push, 0);
   1063          PUSH_DATA(push, 0);
   1064          PUSH_DATA(push, 0);
   1065          PUSH_DATA(push, 0x14000);
   1066          PUSH_DATA(push, 0);
   1067       }
   1068 
   1069       /* stick surface information into the driver constant buffer */
   1070       if (s == 5)
   1071          BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
   1072       else
   1073          BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
   1074       PUSH_DATA (push, NVC0_CB_AUX_SIZE);
   1075       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
   1076       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
   1077       if (s == 5)
   1078          BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 16);
   1079       else
   1080          BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
   1081       PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
   1082 
   1083       nvc0_set_surface_info(push, view, address, width, height, depth);
   1084    }
   1085 }
   1086 
   1087 static inline void
   1088 nvc0_update_surface_bindings(struct nvc0_context *nvc0)
   1089 {
   1090    nvc0_validate_suf(nvc0, 4);
   1091 
   1092    /* Invalidate all COMPUTE images because they are aliased with FRAGMENT. */
   1093    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
   1094    nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
   1095    nvc0->images_dirty[5] |= nvc0->images_valid[5];
   1096 }
   1097 
   1098 static void
   1099 gm107_validate_surfaces(struct nvc0_context *nvc0,
   1100                         struct pipe_image_view *view, int stage, int slot)
   1101 {
   1102    struct nv04_resource *res = nv04_resource(view->resource);
   1103    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   1104    struct nvc0_screen *screen = nvc0->screen;
   1105    struct nv50_tic_entry *tic;
   1106 
   1107    tic = nv50_tic_entry(nvc0->images_tic[stage][slot]);
   1108 
   1109    res = nv04_resource(tic->pipe.texture);
   1110    nvc0_update_tic(nvc0, tic, res);
   1111 
   1112    if (tic->id < 0) {
   1113       tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
   1114 
   1115       /* upload the texture view */
   1116       nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
   1117                             NV_VRAM_DOMAIN(&nvc0->screen->base), 32, tic->tic);
   1118 
   1119       BEGIN_NVC0(push, NVC0_3D(TIC_FLUSH), 1);
   1120       PUSH_DATA (push, 0);
   1121    } else
   1122    if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
   1123       BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
   1124       PUSH_DATA (push, (tic->id << 4) | 1);
   1125    }
   1126    nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
   1127 
   1128    res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
   1129    res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
   1130 
   1131    BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RD);
   1132 
   1133    /* upload the texture handle */
   1134    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
   1135    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
   1136    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
   1137    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
   1138    BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
   1139    PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));
   1140    PUSH_DATA (push, tic->id);
   1141 }
   1142 
   1143 static inline void
   1144 nve4_update_surface_bindings(struct nvc0_context *nvc0)
   1145 {
   1146    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   1147    struct nvc0_screen *screen = nvc0->screen;
   1148    int i, j, s;
   1149 
   1150    for (s = 0; s < 5; s++) {
   1151       if (!nvc0->images_dirty[s])
   1152          continue;
   1153 
   1154       for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
   1155          struct pipe_image_view *view = &nvc0->images[s][i];
   1156 
   1157          BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
   1158          PUSH_DATA (push, NVC0_CB_AUX_SIZE);
   1159          PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
   1160          PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
   1161          BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
   1162          PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
   1163 
   1164          if (view->resource) {
   1165             struct nv04_resource *res = nv04_resource(view->resource);
   1166 
   1167             if (res->base.target == PIPE_BUFFER) {
   1168                if (view->access & PIPE_IMAGE_ACCESS_WRITE)
   1169                   nvc0_mark_image_range_valid(view);
   1170             }
   1171 
   1172             nve4_set_surface_info(push, view, nvc0);
   1173             BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
   1174 
   1175             if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)
   1176                gm107_validate_surfaces(nvc0, view, s, i);
   1177          } else {
   1178             for (j = 0; j < 16; j++)
   1179                PUSH_DATA(push, 0);
   1180          }
   1181       }
   1182    }
   1183 }
   1184 
   1185 void
   1186 nvc0_validate_surfaces(struct nvc0_context *nvc0)
   1187 {
   1188    if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
   1189       nve4_update_surface_bindings(nvc0);
   1190    } else {
   1191       nvc0_update_surface_bindings(nvc0);
   1192    }
   1193 }
   1194 
   1195 
   1196 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
   1197 {
   1198    [PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT,
   1199    [PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT,
   1200    [PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT,
   1201    [PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT,
   1202    [PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM,
   1203    [PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM,
   1204    [PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT,
   1205    [PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT,
   1206    [PIPE_FORMAT_B8G8R8A8_UNORM] = GK104_IMAGE_FORMAT_BGRA8_UNORM,
   1207    [PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM,
   1208    [PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM,
   1209    [PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT,
   1210    [PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT,
   1211    [PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT,
   1212    [PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM,
   1213    [PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT,
   1214    [PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT,
   1215    [PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT,
   1216    [PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT,
   1217    [PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT,
   1218    [PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM,
   1219    [PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM,
   1220    [PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT,
   1221    [PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT,
   1222    [PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM,
   1223    [PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM,
   1224    [PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT,
   1225    [PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT,
   1226    [PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT,
   1227    [PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT,
   1228    [PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT,
   1229    [PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT,
   1230    [PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM,
   1231    [PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM,
   1232    [PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT,
   1233    [PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT,
   1234    [PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM,
   1235    [PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM,
   1236    [PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT,
   1237    [PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT,
   1238 };
   1239 
   1240 /* Auxiliary format description values for surface instructions.
   1241  * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
   1242  */
   1243 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
   1244 {
   1245    [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
   1246    [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
   1247    [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
   1248 
   1249    [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
   1250    [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
   1251    [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
   1252    [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
   1253    [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
   1254 
   1255    [PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
   1256    [PIPE_FORMAT_R32G32_SINT] = 0x3433,
   1257    [PIPE_FORMAT_R32G32_UINT] = 0x3433,
   1258 
   1259    [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
   1260    [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24,
   1261    [PIPE_FORMAT_B8G8R8A8_UNORM] = 0x2a24,
   1262    [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
   1263    [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
   1264    [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
   1265    [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
   1266    [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
   1267 
   1268    [PIPE_FORMAT_R16G16_UNORM] = 0x2524,
   1269    [PIPE_FORMAT_R16G16_SNORM] = 0x2524,
   1270    [PIPE_FORMAT_R16G16_SINT] = 0x2524,
   1271    [PIPE_FORMAT_R16G16_UINT] = 0x2524,
   1272    [PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
   1273 
   1274    [PIPE_FORMAT_R32_SINT] = 0x2024,
   1275    [PIPE_FORMAT_R32_UINT] = 0x2024,
   1276    [PIPE_FORMAT_R32_FLOAT] = 0x2024,
   1277 
   1278    [PIPE_FORMAT_R8G8_UNORM] = 0x1615,
   1279    [PIPE_FORMAT_R8G8_SNORM] = 0x1615,
   1280    [PIPE_FORMAT_R8G8_SINT] = 0x1615,
   1281    [PIPE_FORMAT_R8G8_UINT] = 0x1615,
   1282 
   1283    [PIPE_FORMAT_R16_UNORM] = 0x1115,
   1284    [PIPE_FORMAT_R16_SNORM] = 0x1115,
   1285    [PIPE_FORMAT_R16_SINT] = 0x1115,
   1286    [PIPE_FORMAT_R16_UINT] = 0x1115,
   1287    [PIPE_FORMAT_R16_FLOAT] = 0x1115,
   1288 
   1289    [PIPE_FORMAT_R8_UNORM] = 0x0206,
   1290    [PIPE_FORMAT_R8_SNORM] = 0x0206,
   1291    [PIPE_FORMAT_R8_SINT] = 0x0206,
   1292    [PIPE_FORMAT_R8_UINT] = 0x0206
   1293 };
   1294 
   1295 /* NOTE: These are hardcoded offsets for the shader library.
   1296  * TODO: Automate them.
   1297  */
   1298 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
   1299 {
   1300    [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
   1301    [PIPE_FORMAT_R32G32B32A32_SINT]  = 0x218,
   1302    [PIPE_FORMAT_R32G32B32A32_UINT]  = 0x218,
   1303    [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
   1304    [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
   1305    [PIPE_FORMAT_R16G16B16A16_SINT]  = 0x330,
   1306    [PIPE_FORMAT_R16G16B16A16_UINT]  = 0x388,
   1307    [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
   1308    [PIPE_FORMAT_R32G32_FLOAT]       = 0x428,
   1309    [PIPE_FORMAT_R32G32_SINT]        = 0x468,
   1310    [PIPE_FORMAT_R32G32_UINT]        = 0x468,
   1311    [PIPE_FORMAT_R10G10B10A2_UNORM]  = 0x4a8,
   1312    [PIPE_FORMAT_R10G10B10A2_UINT]   = 0x530,
   1313    [PIPE_FORMAT_R8G8B8A8_UNORM]     = 0x588,
   1314    [PIPE_FORMAT_R8G8B8A8_SNORM]     = 0x5f8,
   1315    [PIPE_FORMAT_R8G8B8A8_SINT]      = 0x670,
   1316    [PIPE_FORMAT_R8G8B8A8_UINT]      = 0x6c8,
   1317    [PIPE_FORMAT_B5G6R5_UNORM]       = 0x718,
   1318    [PIPE_FORMAT_B5G5R5X1_UNORM]     = 0x7a0,
   1319    [PIPE_FORMAT_R16G16_UNORM]       = 0x828,
   1320    [PIPE_FORMAT_R16G16_SNORM]       = 0x890,
   1321    [PIPE_FORMAT_R16G16_SINT]        = 0x8f0,
   1322    [PIPE_FORMAT_R16G16_UINT]        = 0x948,
   1323    [PIPE_FORMAT_R16G16_FLOAT]       = 0x998,
   1324    [PIPE_FORMAT_R32_FLOAT]          = 0x9e8,
   1325    [PIPE_FORMAT_R32_SINT]           = 0xa30,
   1326    [PIPE_FORMAT_R32_UINT]           = 0xa30,
   1327    [PIPE_FORMAT_R8G8_UNORM]         = 0xa78,
   1328    [PIPE_FORMAT_R8G8_SNORM]         = 0xae0,
   1329    [PIPE_FORMAT_R8G8_UINT]          = 0xb48,
   1330    [PIPE_FORMAT_R8G8_SINT]          = 0xb98,
   1331    [PIPE_FORMAT_R16_UNORM]          = 0xbe8,
   1332    [PIPE_FORMAT_R16_SNORM]          = 0xc48,
   1333    [PIPE_FORMAT_R16_SINT]           = 0xca0,
   1334    [PIPE_FORMAT_R16_UINT]           = 0xce8,
   1335    [PIPE_FORMAT_R16_FLOAT]          = 0xd30,
   1336    [PIPE_FORMAT_R8_UNORM]           = 0xd88,
   1337    [PIPE_FORMAT_R8_SNORM]           = 0xde0,
   1338    [PIPE_FORMAT_R8_SINT]            = 0xe38,
   1339    [PIPE_FORMAT_R8_UINT]            = 0xe88,
   1340    [PIPE_FORMAT_R11G11B10_FLOAT]    = 0xed0
   1341 };
   1342