Home | History | Annotate | Download | only in nv50
      1 /*
      2  * Copyright 2011 Christoph Bumiller
      3  * Copyright 2015 Samuel Pitoiset
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice shall be included in
     13  * all copies or substantial portions of the Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21  * OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #define NV50_PUSH_EXPLICIT_SPACE_CHECKING
     25 
     26 #include "nv50/nv50_context.h"
     27 #include "nv50/nv50_query_hw.h"
     28 #include "nv50/nv50_query_hw_metric.h"
     29 #include "nv50/nv50_query_hw_sm.h"
     30 #include "nv_object.xml.h"
     31 
     32 #define NV50_HW_QUERY_STATE_READY   0
     33 #define NV50_HW_QUERY_STATE_ACTIVE  1
     34 #define NV50_HW_QUERY_STATE_ENDED   2
     35 #define NV50_HW_QUERY_STATE_FLUSHED 3
     36 
     37 /* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
     38  * (since we use only a single GPU channel per screen) will not work properly.
     39  *
     40  * The first is not that big of an issue because OpenGL does not allow nested
     41  * queries anyway.
     42  */
     43 
     44 #define NV50_HW_QUERY_ALLOC_SPACE 256
     45 
     46 bool
     47 nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,
     48                        int size)
     49 {
     50    struct nv50_screen *screen = nv50->screen;
     51    struct nv50_hw_query *hq = nv50_hw_query(q);
     52    int ret;
     53 
     54    if (hq->bo) {
     55       nouveau_bo_ref(NULL, &hq->bo);
     56       if (hq->mm) {
     57          if (hq->state == NV50_HW_QUERY_STATE_READY)
     58             nouveau_mm_free(hq->mm);
     59          else
     60             nouveau_fence_work(screen->base.fence.current,
     61                                nouveau_mm_free_work, hq->mm);
     62       }
     63    }
     64    if (size) {
     65       hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size,
     66                                    &hq->bo, &hq->base_offset);
     67       if (!hq->bo)
     68          return false;
     69       hq->offset = hq->base_offset;
     70 
     71       ret = nouveau_bo_map(hq->bo, 0, screen->base.client);
     72       if (ret) {
     73          nv50_hw_query_allocate(nv50, q, 0);
     74          return false;
     75       }
     76       hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset);
     77    }
     78    return true;
     79 }
     80 
     81 static void
     82 nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
     83                unsigned offset, uint32_t get)
     84 {
     85    struct nv50_hw_query *hq = nv50_hw_query(q);
     86 
     87    offset += hq->offset;
     88 
     89    PUSH_SPACE(push, 5);
     90    PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
     91    BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
     92    PUSH_DATAh(push, hq->bo->offset + offset);
     93    PUSH_DATA (push, hq->bo->offset + offset);
     94    PUSH_DATA (push, hq->sequence);
     95    PUSH_DATA (push, get);
     96 }
     97 
     98 static inline void
     99 nv50_hw_query_update(struct nv50_query *q)
    100 {
    101    struct nv50_hw_query *hq = nv50_hw_query(q);
    102 
    103    if (hq->is64bit) {
    104       if (nouveau_fence_signalled(hq->fence))
    105          hq->state = NV50_HW_QUERY_STATE_READY;
    106    } else {
    107       if (hq->data[0] == hq->sequence)
    108          hq->state = NV50_HW_QUERY_STATE_READY;
    109    }
    110 }
    111 
    112 static void
    113 nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q)
    114 {
    115    struct nv50_hw_query *hq = nv50_hw_query(q);
    116 
    117    if (hq->funcs && hq->funcs->destroy_query) {
    118       hq->funcs->destroy_query(nv50, hq);
    119       return;
    120    }
    121 
    122    nv50_hw_query_allocate(nv50, q, 0);
    123    nouveau_fence_ref(NULL, &hq->fence);
    124    FREE(hq);
    125 }
    126 
    127 static boolean
    128 nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
    129 {
    130    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    131    struct nv50_hw_query *hq = nv50_hw_query(q);
    132 
    133    if (hq->funcs && hq->funcs->begin_query)
    134       return hq->funcs->begin_query(nv50, hq);
    135 
    136    /* For occlusion queries we have to change the storage, because a previous
    137     * query might set the initial render condition to false even *after* we re-
    138     * initialized it to true.
    139     */
    140    if (hq->rotate) {
    141       hq->offset += hq->rotate;
    142       hq->data += hq->rotate / sizeof(*hq->data);
    143       if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
    144          nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
    145 
    146       /* XXX: can we do this with the GPU, and sync with respect to a previous
    147        *  query ?
    148        */
    149       hq->data[0] = hq->sequence; /* initialize sequence */
    150       hq->data[1] = 1; /* initial render condition = true */
    151       hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */
    152       hq->data[5] = 0;
    153    }
    154    if (!hq->is64bit)
    155       hq->data[0] = hq->sequence++; /* the previously used one */
    156 
    157    switch (q->type) {
    158    case PIPE_QUERY_OCCLUSION_COUNTER:
    159    case PIPE_QUERY_OCCLUSION_PREDICATE:
    160       hq->nesting = nv50->screen->num_occlusion_queries_active++;
    161       if (hq->nesting) {
    162          nv50_hw_query_get(push, q, 0x10, 0x0100f002);
    163       } else {
    164          PUSH_SPACE(push, 4);
    165          BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
    166          PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
    167          BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
    168          PUSH_DATA (push, 1);
    169       }
    170       break;
    171    case PIPE_QUERY_PRIMITIVES_GENERATED:
    172       nv50_hw_query_get(push, q, 0x10, 0x06805002);
    173       break;
    174    case PIPE_QUERY_PRIMITIVES_EMITTED:
    175       nv50_hw_query_get(push, q, 0x10, 0x05805002);
    176       break;
    177    case PIPE_QUERY_SO_STATISTICS:
    178       nv50_hw_query_get(push, q, 0x20, 0x05805002);
    179       nv50_hw_query_get(push, q, 0x30, 0x06805002);
    180       break;
    181    case PIPE_QUERY_PIPELINE_STATISTICS:
    182       nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */
    183       nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */
    184       nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */
    185       nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */
    186       nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */
    187       nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */
    188       nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */
    189       nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */
    190       break;
    191    case PIPE_QUERY_TIME_ELAPSED:
    192       nv50_hw_query_get(push, q, 0x10, 0x00005002);
    193       break;
    194    default:
    195       assert(0);
    196       return false;
    197    }
    198    hq->state = NV50_HW_QUERY_STATE_ACTIVE;
    199    return true;
    200 }
    201 
    202 static void
    203 nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
    204 {
    205    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    206    struct nv50_hw_query *hq = nv50_hw_query(q);
    207 
    208    if (hq->funcs && hq->funcs->end_query) {
    209       hq->funcs->end_query(nv50, hq);
    210       return;
    211    }
    212 
    213    hq->state = NV50_HW_QUERY_STATE_ENDED;
    214 
    215    switch (q->type) {
    216    case PIPE_QUERY_OCCLUSION_COUNTER:
    217    case PIPE_QUERY_OCCLUSION_PREDICATE:
    218       nv50_hw_query_get(push, q, 0, 0x0100f002);
    219       if (--nv50->screen->num_occlusion_queries_active == 0) {
    220          PUSH_SPACE(push, 2);
    221          BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
    222          PUSH_DATA (push, 0);
    223       }
    224       break;
    225    case PIPE_QUERY_PRIMITIVES_GENERATED:
    226       nv50_hw_query_get(push, q, 0, 0x06805002);
    227       break;
    228    case PIPE_QUERY_PRIMITIVES_EMITTED:
    229       nv50_hw_query_get(push, q, 0, 0x05805002);
    230       break;
    231    case PIPE_QUERY_SO_STATISTICS:
    232       nv50_hw_query_get(push, q, 0x00, 0x05805002);
    233       nv50_hw_query_get(push, q, 0x10, 0x06805002);
    234       break;
    235    case PIPE_QUERY_PIPELINE_STATISTICS:
    236       nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
    237       nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
    238       nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
    239       nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
    240       nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
    241       nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
    242       nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
    243       nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
    244       break;
    245    case PIPE_QUERY_TIMESTAMP:
    246       hq->sequence++;
    247       /* fall through */
    248    case PIPE_QUERY_TIME_ELAPSED:
    249       nv50_hw_query_get(push, q, 0, 0x00005002);
    250       break;
    251    case PIPE_QUERY_GPU_FINISHED:
    252       hq->sequence++;
    253       nv50_hw_query_get(push, q, 0, 0x1000f010);
    254       break;
    255    case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
    256       hq->sequence++;
    257       nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
    258       break;
    259    case PIPE_QUERY_TIMESTAMP_DISJOINT:
    260       /* This query is not issued on GPU because disjoint is forced to false */
    261       hq->state = NV50_HW_QUERY_STATE_READY;
    262       break;
    263    default:
    264       assert(0);
    265       break;
    266    }
    267    if (hq->is64bit)
    268       nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence);
    269 }
    270 
    271 static boolean
    272 nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
    273                          boolean wait, union pipe_query_result *result)
    274 {
    275    struct nv50_hw_query *hq = nv50_hw_query(q);
    276    uint64_t *res64 = (uint64_t *)result;
    277    uint32_t *res32 = (uint32_t *)result;
    278    uint8_t *res8 = (uint8_t *)result;
    279    uint64_t *data64 = (uint64_t *)hq->data;
    280    int i;
    281 
    282    if (hq->funcs && hq->funcs->get_query_result)
    283       return hq->funcs->get_query_result(nv50, hq, wait, result);
    284 
    285    if (hq->state != NV50_HW_QUERY_STATE_READY)
    286       nv50_hw_query_update(q);
    287 
    288    if (hq->state != NV50_HW_QUERY_STATE_READY) {
    289       if (!wait) {
    290          /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
    291          if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) {
    292             hq->state = NV50_HW_QUERY_STATE_FLUSHED;
    293             PUSH_KICK(nv50->base.pushbuf);
    294          }
    295          return false;
    296       }
    297       if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
    298          return false;
    299    }
    300    hq->state = NV50_HW_QUERY_STATE_READY;
    301 
    302    switch (q->type) {
    303    case PIPE_QUERY_GPU_FINISHED:
    304       res8[0] = true;
    305       break;
    306    case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
    307       res64[0] = hq->data[1] - hq->data[5];
    308       break;
    309    case PIPE_QUERY_OCCLUSION_PREDICATE:
    310       res8[0] = hq->data[1] != hq->data[5];
    311       break;
    312    case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
    313    case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
    314       res64[0] = data64[0] - data64[2];
    315       break;
    316    case PIPE_QUERY_SO_STATISTICS:
    317       res64[0] = data64[0] - data64[4];
    318       res64[1] = data64[2] - data64[6];
    319       break;
    320    case PIPE_QUERY_PIPELINE_STATISTICS:
    321       for (i = 0; i < 8; ++i)
    322          res64[i] = data64[i * 2] - data64[16 + i * 2];
    323       break;
    324    case PIPE_QUERY_TIMESTAMP:
    325       res64[0] = data64[1];
    326       break;
    327    case PIPE_QUERY_TIMESTAMP_DISJOINT:
    328       res64[0] = 1000000000;
    329       res8[8] = false;
    330       break;
    331    case PIPE_QUERY_TIME_ELAPSED:
    332       res64[0] = data64[1] - data64[3];
    333       break;
    334    case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
    335       res32[0] = hq->data[1];
    336       break;
    337    default:
    338       assert(0);
    339       return false;
    340    }
    341 
    342    return true;
    343 }
    344 
    345 static const struct nv50_query_funcs hw_query_funcs = {
    346    .destroy_query = nv50_hw_destroy_query,
    347    .begin_query = nv50_hw_begin_query,
    348    .end_query = nv50_hw_end_query,
    349    .get_query_result = nv50_hw_get_query_result,
    350 };
    351 
    352 struct nv50_query *
    353 nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
    354 {
    355    struct nv50_hw_query *hq;
    356    struct nv50_query *q;
    357 
    358    hq = nv50_hw_sm_create_query(nv50, type);
    359    if (hq) {
    360       hq->base.funcs = &hw_query_funcs;
    361       return (struct nv50_query *)hq;
    362    }
    363 
    364    hq = nv50_hw_metric_create_query(nv50, type);
    365    if (hq) {
    366       hq->base.funcs = &hw_query_funcs;
    367       return (struct nv50_query *)hq;
    368    }
    369 
    370    hq = CALLOC_STRUCT(nv50_hw_query);
    371    if (!hq)
    372       return NULL;
    373 
    374    q = &hq->base;
    375    q->funcs = &hw_query_funcs;
    376    q->type = type;
    377 
    378    switch (q->type) {
    379    case PIPE_QUERY_OCCLUSION_COUNTER:
    380    case PIPE_QUERY_OCCLUSION_PREDICATE:
    381       hq->rotate = 32;
    382       break;
    383    case PIPE_QUERY_PRIMITIVES_GENERATED:
    384    case PIPE_QUERY_PRIMITIVES_EMITTED:
    385    case PIPE_QUERY_SO_STATISTICS:
    386    case PIPE_QUERY_PIPELINE_STATISTICS:
    387       hq->is64bit = true;
    388       break;
    389    case PIPE_QUERY_TIME_ELAPSED:
    390    case PIPE_QUERY_TIMESTAMP:
    391    case PIPE_QUERY_TIMESTAMP_DISJOINT:
    392    case PIPE_QUERY_GPU_FINISHED:
    393    case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
    394       break;
    395    default:
    396       debug_printf("invalid query type: %u\n", type);
    397       FREE(q);
    398       return NULL;
    399    }
    400 
    401    if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
    402       FREE(hq);
    403       return NULL;
    404    }
    405 
    406    if (hq->rotate) {
    407       /* we advance before query_begin ! */
    408       hq->offset -= hq->rotate;
    409       hq->data -= hq->rotate / sizeof(*hq->data);
    410    }
    411 
    412    return q;
    413 }
    414 
    415 int
    416 nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id,
    417                               struct pipe_driver_query_info *info)
    418 {
    419    int num_hw_sm_queries = 0, num_hw_metric_queries = 0;
    420 
    421    num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL);
    422    num_hw_metric_queries =
    423       nv50_hw_metric_get_driver_query_info(screen, 0, NULL);
    424 
    425    if (!info)
    426       return num_hw_sm_queries + num_hw_metric_queries;
    427 
    428    if (id < num_hw_sm_queries)
    429       return nv50_hw_sm_get_driver_query_info(screen, id, info);
    430 
    431    return nv50_hw_metric_get_driver_query_info(screen,
    432                                                id - num_hw_sm_queries, info);
    433 }
    434 
    435 void
    436 nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
    437                              struct nv50_query *q, unsigned result_offset)
    438 {
    439    struct nv50_hw_query *hq = nv50_hw_query(q);
    440 
    441    nv50_hw_query_update(q);
    442    if (hq->state != NV50_HW_QUERY_STATE_READY)
    443       nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client);
    444    hq->state = NV50_HW_QUERY_STATE_READY;
    445 
    446    BEGIN_NV04(push, SUBC_3D(method), 1);
    447    PUSH_DATA (push, hq->data[result_offset / 4]);
    448 }
    449 
    450 void
    451 nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q)
    452 {
    453    struct nv50_hw_query *hq = nv50_hw_query(q);
    454    unsigned offset = hq->offset;
    455 
    456    PUSH_SPACE(push, 5);
    457    PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
    458    BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
    459    PUSH_DATAh(push, hq->bo->offset + offset);
    460    PUSH_DATA (push, hq->bo->offset + offset);
    461    PUSH_DATA (push, hq->sequence);
    462    PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
    463 }
    464