Home | History | Annotate | Download | only in nv50
      1 /*
      2  * Copyright 2010 Christoph Bumiller
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
     19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     20  * SOFTWARE.
     21  */
     22 
     23 #include "util/u_format.h"
     24 #include "util/u_format_s3tc.h"
     25 #include "pipe/p_screen.h"
     26 
     27 #include "nv50_context.h"
     28 #include "nv50_screen.h"
     29 
     30 #include "nouveau/nv_object.xml.h"
     31 #include <errno.h>
     32 
     33 #ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
     34 # define NOUVEAU_GETPARAM_GRAPH_UNITS 13
     35 #endif
     36 
     37 /* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */
     38 #define LOCAL_WARPS_ALLOC 32
     39 /* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */
     40 #define STACK_WARPS_ALLOC 32
     41 
     42 #define THREADS_IN_WARP 32
     43 
     44 #define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
     45 
     46 static boolean
     47 nv50_screen_is_format_supported(struct pipe_screen *pscreen,
     48                                 enum pipe_format format,
     49                                 enum pipe_texture_target target,
     50                                 unsigned sample_count,
     51                                 unsigned bindings)
     52 {
     53    if (sample_count > 8)
     54       return FALSE;
     55    if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
     56       return FALSE;
     57    if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
     58       return FALSE;
     59 
     60    if (!util_format_is_supported(format, bindings))
     61       return FALSE;
     62 
     63    switch (format) {
     64    case PIPE_FORMAT_Z16_UNORM:
     65       if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
     66          return FALSE;
     67       break;
     68    case PIPE_FORMAT_R8G8B8A8_UNORM:
     69    case PIPE_FORMAT_R8G8B8X8_UNORM:
     70       /* HACK: GL requires equal formats for MS resolve and window is BGRA */
     71       if (bindings & PIPE_BIND_RENDER_TARGET)
     72          return FALSE;
     73    default:
     74       break;
     75    }
     76 
     77    /* transfers & shared are always supported */
     78    bindings &= ~(PIPE_BIND_TRANSFER_READ |
     79                  PIPE_BIND_TRANSFER_WRITE |
     80                  PIPE_BIND_SHARED);
     81 
     82    return (nv50_format_table[format].usage & bindings) == bindings;
     83 }
     84 
     85 static int
     86 nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
     87 {
     88    const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
     89 
     90    switch (param) {
     91    case PIPE_CAP_MAX_COMBINED_SAMPLERS:
     92       return 64;
     93    case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
     94       return 14;
     95    case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
     96       return 12;
     97    case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
     98       return 14;
     99    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
    100       return 512;
    101    case PIPE_CAP_MIN_TEXEL_OFFSET:
    102       return -8;
    103    case PIPE_CAP_MAX_TEXEL_OFFSET:
    104       return 7;
    105    case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
    106    case PIPE_CAP_TEXTURE_SWIZZLE:
    107    case PIPE_CAP_TEXTURE_SHADOW_MAP:
    108    case PIPE_CAP_NPOT_TEXTURES:
    109    case PIPE_CAP_ANISOTROPIC_FILTER:
    110    case PIPE_CAP_SCALED_RESOLVE:
    111       return 1;
    112    case PIPE_CAP_SEAMLESS_CUBE_MAP:
    113       return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS;
    114    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
    115       return 0;
    116    case PIPE_CAP_TWO_SIDED_STENCIL:
    117    case PIPE_CAP_DEPTH_CLIP_DISABLE:
    118    case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
    119    case PIPE_CAP_POINT_SPRITE:
    120       return 1;
    121    case PIPE_CAP_SM3:
    122       return 1;
    123    case PIPE_CAP_GLSL_FEATURE_LEVEL:
    124       return 130;
    125    case PIPE_CAP_MAX_RENDER_TARGETS:
    126       return 8;
    127    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
    128       return 1;
    129    case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
    130    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
    131    case PIPE_CAP_VERTEX_COLOR_CLAMPED:
    132       return 1;
    133    case PIPE_CAP_QUERY_TIMESTAMP:
    134    case PIPE_CAP_TIMER_QUERY:
    135    case PIPE_CAP_OCCLUSION_QUERY:
    136       return 1;
    137    case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
    138       return 4;
    139    case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
    140    case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
    141       return 64;
    142    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
    143       return (class_3d >= NVA0_3D_CLASS) ? 1 : 0;
    144    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
    145    case PIPE_CAP_INDEP_BLEND_ENABLE:
    146       return 1;
    147    case PIPE_CAP_INDEP_BLEND_FUNC:
    148       return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS;
    149    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
    150    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
    151       return 1;
    152    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
    153    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
    154       return 0;
    155    case PIPE_CAP_SHADER_STENCIL_EXPORT:
    156       return 0;
    157    case PIPE_CAP_PRIMITIVE_RESTART:
    158    case PIPE_CAP_TGSI_INSTANCEID:
    159    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
    160    case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
    161    case PIPE_CAP_CONDITIONAL_RENDER:
    162    case PIPE_CAP_TEXTURE_BARRIER:
    163    case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
    164    case PIPE_CAP_START_INSTANCE:
    165       return 1;
    166    case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS:
    167    case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
    168       return 0; /* state trackers will know better */
    169    case PIPE_CAP_USER_CONSTANT_BUFFERS:
    170    case PIPE_CAP_USER_INDEX_BUFFERS:
    171    case PIPE_CAP_USER_VERTEX_BUFFERS:
    172       return 1;
    173    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
    174       return 256;
    175    case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
    176    case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
    177    case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
    178       return 0;
    179    default:
    180       NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
    181       return 0;
    182    }
    183 }
    184 
    185 static int
    186 nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    187                              enum pipe_shader_cap param)
    188 {
    189    switch (shader) {
    190    case PIPE_SHADER_VERTEX:
    191    case PIPE_SHADER_GEOMETRY:
    192    case PIPE_SHADER_FRAGMENT:
    193       break;
    194    default:
    195       return 0;
    196    }
    197 
    198    switch (param) {
    199    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
    200    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
    201    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
    202    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
    203       return 16384;
    204    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
    205       return 4;
    206    case PIPE_SHADER_CAP_MAX_INPUTS:
    207       if (shader == PIPE_SHADER_VERTEX)
    208          return 32;
    209       return 0x300 / 16;
    210    case PIPE_SHADER_CAP_MAX_CONSTS:
    211       return 65536 / 16;
    212    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
    213       return NV50_MAX_PIPE_CONSTBUFS;
    214    case PIPE_SHADER_CAP_MAX_ADDRS:
    215       return 1;
    216    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
    217    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
    218       return shader != PIPE_SHADER_FRAGMENT;
    219    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
    220    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
    221       return 1;
    222    case PIPE_SHADER_CAP_MAX_PREDS:
    223       return 0;
    224    case PIPE_SHADER_CAP_MAX_TEMPS:
    225       return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE;
    226    case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
    227       return 1;
    228    case PIPE_SHADER_CAP_SUBROUTINES:
    229       return 0; /* please inline, or provide function declarations */
    230    case PIPE_SHADER_CAP_INTEGERS:
    231       return 1;
    232    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
    233       return 32;
    234    default:
    235       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
    236       return 0;
    237    }
    238 }
    239 
    240 static float
    241 nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
    242 {
    243    switch (param) {
    244    case PIPE_CAPF_MAX_LINE_WIDTH:
    245    case PIPE_CAPF_MAX_LINE_WIDTH_AA:
    246       return 10.0f;
    247    case PIPE_CAPF_MAX_POINT_WIDTH:
    248    case PIPE_CAPF_MAX_POINT_WIDTH_AA:
    249       return 64.0f;
    250    case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
    251       return 16.0f;
    252    case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
    253       return 4.0f;
    254    default:
    255       NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
    256       return 0.0f;
    257    }
    258 }
    259 
    260 static void
    261 nv50_screen_destroy(struct pipe_screen *pscreen)
    262 {
    263    struct nv50_screen *screen = nv50_screen(pscreen);
    264 
    265    if (screen->base.fence.current) {
    266       nouveau_fence_wait(screen->base.fence.current);
    267       nouveau_fence_ref (NULL, &screen->base.fence.current);
    268    }
    269    if (screen->base.pushbuf)
    270       screen->base.pushbuf->user_priv = NULL;
    271 
    272    if (screen->blitctx)
    273       FREE(screen->blitctx);
    274 
    275    nouveau_bo_ref(NULL, &screen->code);
    276    nouveau_bo_ref(NULL, &screen->tls_bo);
    277    nouveau_bo_ref(NULL, &screen->stack_bo);
    278    nouveau_bo_ref(NULL, &screen->txc);
    279    nouveau_bo_ref(NULL, &screen->uniforms);
    280    nouveau_bo_ref(NULL, &screen->fence.bo);
    281 
    282    nouveau_heap_destroy(&screen->vp_code_heap);
    283    nouveau_heap_destroy(&screen->gp_code_heap);
    284    nouveau_heap_destroy(&screen->fp_code_heap);
    285 
    286    if (screen->tic.entries)
    287       FREE(screen->tic.entries);
    288 
    289    nouveau_object_del(&screen->tesla);
    290    nouveau_object_del(&screen->eng2d);
    291    nouveau_object_del(&screen->m2mf);
    292    nouveau_object_del(&screen->sync);
    293 
    294    nouveau_screen_fini(&screen->base);
    295 
    296    FREE(screen);
    297 }
    298 
    299 static void
    300 nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
    301 {
    302    struct nv50_screen *screen = nv50_screen(pscreen);
    303    struct nouveau_pushbuf *push = screen->base.pushbuf;
    304 
    305    /* we need to do it after possible flush in MARK_RING */
    306    *sequence = ++screen->base.fence.sequence;
    307 
    308    PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
    309    PUSH_DATAh(push, screen->fence.bo->offset);
    310    PUSH_DATA (push, screen->fence.bo->offset);
    311    PUSH_DATA (push, *sequence);
    312    PUSH_DATA (push, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 |
    313                     NV50_3D_QUERY_GET_UNK4 |
    314                     NV50_3D_QUERY_GET_UNIT_CROP |
    315                     NV50_3D_QUERY_GET_TYPE_QUERY |
    316                     NV50_3D_QUERY_GET_QUERY_SELECT_ZERO |
    317                     NV50_3D_QUERY_GET_SHORT);
    318 }
    319 
    320 static u32
    321 nv50_screen_fence_update(struct pipe_screen *pscreen)
    322 {
    323    return nv50_screen(pscreen)->fence.map[0];
    324 }
    325 
    326 static void
    327 nv50_screen_init_hwctx(struct nv50_screen *screen)
    328 {
    329    struct nouveau_pushbuf *push = screen->base.pushbuf;
    330    struct nv04_fifo *fifo;
    331    unsigned i;
    332 
    333    fifo = (struct nv04_fifo *)screen->base.channel->data;
    334 
    335    BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
    336    PUSH_DATA (push, screen->m2mf->handle);
    337    BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3);
    338    PUSH_DATA (push, screen->sync->handle);
    339    PUSH_DATA (push, fifo->vram);
    340    PUSH_DATA (push, fifo->vram);
    341 
    342    BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);
    343    PUSH_DATA (push, screen->eng2d->handle);
    344    BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4);
    345    PUSH_DATA (push, screen->sync->handle);
    346    PUSH_DATA (push, fifo->vram);
    347    PUSH_DATA (push, fifo->vram);
    348    PUSH_DATA (push, fifo->vram);
    349    BEGIN_NV04(push, NV50_2D(OPERATION), 1);
    350    PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);
    351    BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1);
    352    PUSH_DATA (push, 0);
    353    BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1);
    354    PUSH_DATA (push, 0);
    355    BEGIN_NV04(push, SUBC_2D(0x0888), 1);
    356    PUSH_DATA (push, 1);
    357 
    358    BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
    359    PUSH_DATA (push, screen->tesla->handle);
    360 
    361    BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
    362    PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
    363 
    364    BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1);
    365    PUSH_DATA (push, screen->sync->handle);
    366    BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11);
    367    for (i = 0; i < 11; ++i)
    368       PUSH_DATA(push, fifo->vram);
    369    BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN);
    370    for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i)
    371       PUSH_DATA(push, fifo->vram);
    372 
    373    BEGIN_NV04(push, NV50_3D(REG_MODE), 1);
    374    PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED);
    375    BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
    376    PUSH_DATA (push, 0xf);
    377 
    378    if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
    379       BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
    380       PUSH_DATA (push, 0x18);
    381    }
    382 
    383    BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
    384    PUSH_DATA (push, 1);
    385 
    386    BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1);
    387    PUSH_DATA (push, 0);
    388    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1);
    389    PUSH_DATA (push, 0);
    390    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
    391    PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1);
    392    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
    393    PUSH_DATA (push, 0);
    394    BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1);
    395    PUSH_DATA (push, 0);
    396    BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);
    397    PUSH_DATA (push, 1);
    398 
    399    if (screen->tesla->oclass >= NVA0_3D_CLASS) {
    400       BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
    401       PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
    402    }
    403 
    404    BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);
    405    PUSH_DATA (push, 0);
    406    BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2);
    407    PUSH_DATA (push, 0);
    408    PUSH_DATA (push, 0);
    409    BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1);
    410    PUSH_DATA (push, 0x3f);
    411 
    412    BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2);
    413    PUSH_DATAh(push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));
    414    PUSH_DATA (push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));
    415 
    416    BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2);
    417    PUSH_DATAh(push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));
    418    PUSH_DATA (push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));
    419 
    420    BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2);
    421    PUSH_DATAh(push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));
    422    PUSH_DATA (push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));
    423 
    424    BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
    425    PUSH_DATAh(push, screen->tls_bo->offset);
    426    PUSH_DATA (push, screen->tls_bo->offset);
    427    PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
    428 
    429    BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3);
    430    PUSH_DATAh(push, screen->stack_bo->offset);
    431    PUSH_DATA (push, screen->stack_bo->offset);
    432    PUSH_DATA (push, 4);
    433 
    434    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
    435    PUSH_DATAh(push, screen->uniforms->offset + (0 << 16));
    436    PUSH_DATA (push, screen->uniforms->offset + (0 << 16));
    437    PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000);
    438 
    439    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
    440    PUSH_DATAh(push, screen->uniforms->offset + (1 << 16));
    441    PUSH_DATA (push, screen->uniforms->offset + (1 << 16));
    442    PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000);
    443 
    444    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
    445    PUSH_DATAh(push, screen->uniforms->offset + (2 << 16));
    446    PUSH_DATA (push, screen->uniforms->offset + (2 << 16));
    447    PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000);
    448 
    449    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
    450    PUSH_DATAh(push, screen->uniforms->offset + (3 << 16));
    451    PUSH_DATA (push, screen->uniforms->offset + (3 << 16));
    452    PUSH_DATA (push, (NV50_CB_AUX << 16) | 0x0200);
    453 
    454    BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3);
    455    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01);
    456    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21);
    457    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31);
    458 
    459    /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */
    460    BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
    461    PUSH_DATA (push, ((1 << 9) << 6) | NV50_CB_AUX);
    462    BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4);
    463    PUSH_DATAf(push, 0.0f);
    464    PUSH_DATAf(push, 0.0f);
    465    PUSH_DATAf(push, 0.0f);
    466    PUSH_DATAf(push, 0.0f);
    467    BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
    468    PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + (1 << 9));
    469    PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + (1 << 9));
    470 
    471    /* max TIC (bits 4:8) & TSC bindings, per program type */
    472    for (i = 0; i < 3; ++i) {
    473       BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1);
    474       PUSH_DATA (push, 0x54);
    475    }
    476 
    477    BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3);
    478    PUSH_DATAh(push, screen->txc->offset);
    479    PUSH_DATA (push, screen->txc->offset);
    480    PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
    481 
    482    BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3);
    483    PUSH_DATAh(push, screen->txc->offset + 65536);
    484    PUSH_DATA (push, screen->txc->offset + 65536);
    485    PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
    486 
    487    BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1);
    488    PUSH_DATA (push, 0);
    489 
    490    BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1);
    491    PUSH_DATA (push, 0);
    492    BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1);
    493    PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY);
    494    BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
    495    for (i = 0; i < 8 * 2; ++i)
    496       PUSH_DATA(push, 0);
    497    BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1);
    498    PUSH_DATA (push, 0);
    499 
    500    BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
    501    PUSH_DATA (push, 1);
    502    BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(0)), 2);
    503    PUSH_DATAf(push, 0.0f);
    504    PUSH_DATAf(push, 1.0f);
    505 
    506    BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
    507 #ifdef NV50_SCISSORS_CLIPPING
    508    PUSH_DATA (push, 0x0000);
    509 #else
    510    PUSH_DATA (push, 0x1080);
    511 #endif
    512 
    513    BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1);
    514    PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT);
    515 
    516    /* We use scissors instead of exact view volume clipping,
    517     * so they're always enabled.
    518     */
    519    BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 3);
    520    PUSH_DATA (push, 1);
    521    PUSH_DATA (push, 8192 << 16);
    522    PUSH_DATA (push, 8192 << 16);
    523 
    524    BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
    525    PUSH_DATA (push, 1);
    526    BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1);
    527    PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL);
    528    BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1);
    529    PUSH_DATA (push, 0x11111111);
    530    BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);
    531    PUSH_DATA (push, 1);
    532 
    533    PUSH_KICK (push);
    534 }
    535 
    536 static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space,
    537       uint64_t *tls_size)
    538 {
    539    struct nouveau_device *dev = screen->base.device;
    540    int ret;
    541 
    542    screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) *
    543          ONE_TEMP_SIZE;
    544    if (nouveau_mesa_debug)
    545       debug_printf("allocating space for %u temps\n",
    546             util_next_power_of_two(tls_space / ONE_TEMP_SIZE));
    547    *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) *
    548          screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP;
    549 
    550    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
    551                         *tls_size, NULL, &screen->tls_bo);
    552    if (ret) {
    553       NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret);
    554       return ret;
    555    }
    556 
    557    return 0;
    558 }
    559 
    560 int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
    561 {
    562    struct nouveau_pushbuf *push = screen->base.pushbuf;
    563    int ret;
    564    uint64_t tls_size;
    565 
    566    if (tls_space < screen->cur_tls_space)
    567       return 0;
    568    if (tls_space > screen->max_tls_space) {
    569       /* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC /
    570        * LOCAL_WARPS_NO_CLAMP) */
    571       NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n",
    572             (unsigned)(tls_space / ONE_TEMP_SIZE),
    573             (unsigned)(screen->max_tls_space / ONE_TEMP_SIZE));
    574       return -ENOMEM;
    575    }
    576 
    577    nouveau_bo_ref(NULL, &screen->tls_bo);
    578    ret = nv50_tls_alloc(screen, tls_space, &tls_size);
    579    if (ret)
    580       return ret;
    581 
    582    BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
    583    PUSH_DATAh(push, screen->tls_bo->offset);
    584    PUSH_DATA (push, screen->tls_bo->offset);
    585    PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
    586 
    587    return 1;
    588 }
    589 
    590 struct pipe_screen *
    591 nv50_screen_create(struct nouveau_device *dev)
    592 {
    593    struct nv50_screen *screen;
    594    struct pipe_screen *pscreen;
    595    struct nouveau_object *chan;
    596    uint64_t value;
    597    uint32_t tesla_class;
    598    unsigned stack_size;
    599    int ret;
    600 
    601    screen = CALLOC_STRUCT(nv50_screen);
    602    if (!screen)
    603       return NULL;
    604    pscreen = &screen->base.base;
    605 
    606    ret = nouveau_screen_init(&screen->base, dev);
    607    if (ret) {
    608       NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret);
    609       goto fail;
    610    }
    611 
    612    /* TODO: Prevent FIFO prefetch before transfer of index buffers and
    613     *  admit them to VRAM.
    614     */
    615    screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
    616       PIPE_BIND_VERTEX_BUFFER;
    617    screen->base.sysmem_bindings |=
    618       PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
    619 
    620    screen->base.pushbuf->user_priv = screen;
    621    screen->base.pushbuf->rsvd_kick = 5;
    622 
    623    chan = screen->base.channel;
    624 
    625    pscreen->destroy = nv50_screen_destroy;
    626    pscreen->context_create = nv50_create;
    627    pscreen->is_format_supported = nv50_screen_is_format_supported;
    628    pscreen->get_param = nv50_screen_get_param;
    629    pscreen->get_shader_param = nv50_screen_get_shader_param;
    630    pscreen->get_paramf = nv50_screen_get_paramf;
    631 
    632    nv50_screen_init_resource_functions(pscreen);
    633 
    634    nouveau_screen_init_vdec(&screen->base);
    635 
    636    ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
    637                         NULL, &screen->fence.bo);
    638    if (ret) {
    639       NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret);
    640       goto fail;
    641    }
    642 
    643    nouveau_bo_map(screen->fence.bo, 0, NULL);
    644    screen->fence.map = screen->fence.bo->map;
    645    screen->base.fence.emit = nv50_screen_fence_emit;
    646    screen->base.fence.update = nv50_screen_fence_update;
    647 
    648    ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS,
    649                             &(struct nv04_notify){ .length = 32 },
    650                             sizeof(struct nv04_notify), &screen->sync);
    651    if (ret) {
    652       NOUVEAU_ERR("Failed to allocate notifier: %d\n", ret);
    653       goto fail;
    654    }
    655 
    656    ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS,
    657                             NULL, 0, &screen->m2mf);
    658    if (ret) {
    659       NOUVEAU_ERR("Failed to allocate PGRAPH context for M2MF: %d\n", ret);
    660       goto fail;
    661    }
    662 
    663    ret = nouveau_object_new(chan, 0xbeef502d, NV50_2D_CLASS,
    664                             NULL, 0, &screen->eng2d);
    665    if (ret) {
    666       NOUVEAU_ERR("Failed to allocate PGRAPH context for 2D: %d\n", ret);
    667       goto fail;
    668    }
    669 
    670    switch (dev->chipset & 0xf0) {
    671    case 0x50:
    672       tesla_class = NV50_3D_CLASS;
    673       break;
    674    case 0x80:
    675    case 0x90:
    676       tesla_class = NV84_3D_CLASS;
    677       break;
    678    case 0xa0:
    679       switch (dev->chipset) {
    680       case 0xa0:
    681       case 0xaa:
    682       case 0xac:
    683          tesla_class = NVA0_3D_CLASS;
    684          break;
    685       case 0xaf:
    686          tesla_class = NVAF_3D_CLASS;
    687          break;
    688       default:
    689          tesla_class = NVA3_3D_CLASS;
    690          break;
    691       }
    692       break;
    693    default:
    694       NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", dev->chipset);
    695       goto fail;
    696    }
    697    screen->base.class_3d = tesla_class;
    698 
    699    ret = nouveau_object_new(chan, 0xbeef5097, tesla_class,
    700                             NULL, 0, &screen->tesla);
    701    if (ret) {
    702       NOUVEAU_ERR("Failed to allocate PGRAPH context for 3D: %d\n", ret);
    703       goto fail;
    704    }
    705 
    706    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
    707                         3 << NV50_CODE_BO_SIZE_LOG2, NULL, &screen->code);
    708    if (ret) {
    709       NOUVEAU_ERR("Failed to allocate code bo: %d\n", ret);
    710       goto fail;
    711    }
    712 
    713    nouveau_heap_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
    714    nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
    715    nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
    716 
    717    nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
    718 
    719    screen->TPs = util_bitcount(value & 0xffff);
    720    screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
    721 
    722    stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
    723          STACK_WARPS_ALLOC * 64 * 8;
    724 
    725    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL,
    726                         &screen->stack_bo);
    727    if (ret) {
    728       NOUVEAU_ERR("Failed to allocate stack bo: %d\n", ret);
    729       goto fail;
    730    }
    731 
    732    uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) *
    733          screen->MPsInTP * LOCAL_WARPS_ALLOC *  THREADS_IN_WARP *
    734          ONE_TEMP_SIZE;
    735    screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE;
    736    screen->max_tls_space /= 2; /* half of vram */
    737 
    738    /* hw can address max 64 KiB */
    739    screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10);
    740 
    741    uint64_t tls_size;
    742    unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE;
    743    ret = nv50_tls_alloc(screen, tls_space, &tls_size);
    744    if (ret)
    745       goto fail;
    746 
    747    if (nouveau_mesa_debug)
    748       debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n",
    749             screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10);
    750 
    751    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, NULL,
    752                         &screen->uniforms);
    753    if (ret) {
    754       NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret);
    755       goto fail;
    756    }
    757 
    758    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, NULL,
    759                         &screen->txc);
    760    if (ret) {
    761       NOUVEAU_ERR("Failed to allocate TIC/TSC bo: %d\n", ret);
    762       goto fail;
    763    }
    764 
    765    screen->tic.entries = CALLOC(4096, sizeof(void *));
    766    screen->tsc.entries = screen->tic.entries + 2048;
    767 
    768    if (!nv50_blitctx_create(screen))
    769       goto fail;
    770 
    771    nv50_screen_init_hwctx(screen);
    772 
    773    nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
    774 
    775    return pscreen;
    776 
    777 fail:
    778    nv50_screen_destroy(pscreen);
    779    return NULL;
    780 }
    781 
    782 int
    783 nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry)
    784 {
    785    int i = screen->tic.next;
    786 
    787    while (screen->tic.lock[i / 32] & (1 << (i % 32)))
    788       i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
    789 
    790    screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
    791 
    792    if (screen->tic.entries[i])
    793       nv50_tic_entry(screen->tic.entries[i])->id = -1;
    794 
    795    screen->tic.entries[i] = entry;
    796    return i;
    797 }
    798 
    799 int
    800 nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry)
    801 {
    802    int i = screen->tsc.next;
    803 
    804    while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
    805       i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
    806 
    807    screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
    808 
    809    if (screen->tsc.entries[i])
    810       nv50_tsc_entry(screen->tsc.entries[i])->id = -1;
    811 
    812    screen->tsc.entries[i] = entry;
    813    return i;
    814 }
    815