Home | History | Annotate | Download | only in nouveau
      1 
      2 #include "util/u_inlines.h"
      3 #include "util/u_memory.h"
      4 #include "util/u_math.h"
      5 #include "util/u_surface.h"
      6 
      7 #include "nouveau_screen.h"
      8 #include "nouveau_context.h"
      9 #include "nouveau_winsys.h"
     10 #include "nouveau_fence.h"
     11 #include "nouveau_buffer.h"
     12 #include "nouveau_mm.h"
     13 
     14 #define NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD 192
     15 
     16 struct nouveau_transfer {
     17    struct pipe_transfer base;
     18 
     19    uint8_t *map;
     20    struct nouveau_bo *bo;
     21    struct nouveau_mm_allocation *mm;
     22    uint32_t offset;
     23 };
     24 
     25 static inline struct nouveau_transfer *
     26 nouveau_transfer(struct pipe_transfer *transfer)
     27 {
     28    return (struct nouveau_transfer *)transfer;
     29 }
     30 
     31 static inline bool
     32 nouveau_buffer_malloc(struct nv04_resource *buf)
     33 {
     34    if (!buf->data)
     35       buf->data = align_malloc(buf->base.width0, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
     36    return !!buf->data;
     37 }
     38 
     39 static inline bool
     40 nouveau_buffer_allocate(struct nouveau_screen *screen,
     41                         struct nv04_resource *buf, unsigned domain)
     42 {
     43    uint32_t size = align(buf->base.width0, 0x100);
     44 
     45    if (domain == NOUVEAU_BO_VRAM) {
     46       buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size,
     47                                     &buf->bo, &buf->offset);
     48       if (!buf->bo)
     49          return nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_GART);
     50       NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_vid, buf->base.width0);
     51    } else
     52    if (domain == NOUVEAU_BO_GART) {
     53       buf->mm = nouveau_mm_allocate(screen->mm_GART, size,
     54                                     &buf->bo, &buf->offset);
     55       if (!buf->bo)
     56          return false;
     57       NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0);
     58    } else {
     59       assert(domain == 0);
     60       if (!nouveau_buffer_malloc(buf))
     61          return false;
     62    }
     63    buf->domain = domain;
     64    if (buf->bo)
     65       buf->address = buf->bo->offset + buf->offset;
     66 
     67    util_range_set_empty(&buf->valid_buffer_range);
     68 
     69    return true;
     70 }
     71 
     72 static inline void
     73 release_allocation(struct nouveau_mm_allocation **mm,
     74                    struct nouveau_fence *fence)
     75 {
     76    nouveau_fence_work(fence, nouveau_mm_free_work, *mm);
     77    (*mm) = NULL;
     78 }
     79 
     80 inline void
     81 nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
     82 {
     83    if (buf->fence && buf->fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
     84       nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo);
     85       buf->bo = NULL;
     86    } else {
     87       nouveau_bo_ref(NULL, &buf->bo);
     88    }
     89 
     90    if (buf->mm)
     91       release_allocation(&buf->mm, buf->fence);
     92 
     93    if (buf->domain == NOUVEAU_BO_VRAM)
     94       NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_vid, -(uint64_t)buf->base.width0);
     95    if (buf->domain == NOUVEAU_BO_GART)
     96       NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_sys, -(uint64_t)buf->base.width0);
     97 
     98    buf->domain = 0;
     99 }
    100 
    101 static inline bool
    102 nouveau_buffer_reallocate(struct nouveau_screen *screen,
    103                           struct nv04_resource *buf, unsigned domain)
    104 {
    105    nouveau_buffer_release_gpu_storage(buf);
    106 
    107    nouveau_fence_ref(NULL, &buf->fence);
    108    nouveau_fence_ref(NULL, &buf->fence_wr);
    109 
    110    buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
    111 
    112    return nouveau_buffer_allocate(screen, buf, domain);
    113 }
    114 
    115 static void
    116 nouveau_buffer_destroy(struct pipe_screen *pscreen,
    117                        struct pipe_resource *presource)
    118 {
    119    struct nv04_resource *res = nv04_resource(presource);
    120 
    121    nouveau_buffer_release_gpu_storage(res);
    122 
    123    if (res->data && !(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
    124       align_free(res->data);
    125 
    126    nouveau_fence_ref(NULL, &res->fence);
    127    nouveau_fence_ref(NULL, &res->fence_wr);
    128 
    129    util_range_destroy(&res->valid_buffer_range);
    130 
    131    FREE(res);
    132 
    133    NOUVEAU_DRV_STAT(nouveau_screen(pscreen), buf_obj_current_count, -1);
    134 }
    135 
    136 /* Set up a staging area for the transfer. This is either done in "regular"
    137  * system memory if the driver supports push_data (nv50+) and the data is
    138  * small enough (and permit_pb == true), or in GART memory.
    139  */
    140 static uint8_t *
    141 nouveau_transfer_staging(struct nouveau_context *nv,
    142                          struct nouveau_transfer *tx, bool permit_pb)
    143 {
    144    const unsigned adj = tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK;
    145    const unsigned size = align(tx->base.box.width, 4) + adj;
    146 
    147    if (!nv->push_data)
    148       permit_pb = false;
    149 
    150    if ((size <= NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD) && permit_pb) {
    151       tx->map = align_malloc(size, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
    152       if (tx->map)
    153          tx->map += adj;
    154    } else {
    155       tx->mm =
    156          nouveau_mm_allocate(nv->screen->mm_GART, size, &tx->bo, &tx->offset);
    157       if (tx->bo) {
    158          tx->offset += adj;
    159          if (!nouveau_bo_map(tx->bo, 0, NULL))
    160             tx->map = (uint8_t *)tx->bo->map + tx->offset;
    161       }
    162    }
    163    return tx->map;
    164 }
    165 
    166 /* Copies data from the resource into the transfer's temporary GART
    167  * buffer. Also updates buf->data if present.
    168  *
    169  * Maybe just migrate to GART right away if we actually need to do this. */
    170 static bool
    171 nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
    172 {
    173    struct nv04_resource *buf = nv04_resource(tx->base.resource);
    174    const unsigned base = tx->base.box.x;
    175    const unsigned size = tx->base.box.width;
    176 
    177    NOUVEAU_DRV_STAT(nv->screen, buf_read_bytes_staging_vid, size);
    178 
    179    nv->copy_data(nv, tx->bo, tx->offset, NOUVEAU_BO_GART,
    180                  buf->bo, buf->offset + base, buf->domain, size);
    181 
    182    if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client))
    183       return false;
    184 
    185    if (buf->data)
    186       memcpy(buf->data + base, tx->map, size);
    187 
    188    return true;
    189 }
    190 
    191 static void
    192 nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
    193                        unsigned offset, unsigned size)
    194 {
    195    struct nv04_resource *buf = nv04_resource(tx->base.resource);
    196    uint8_t *data = tx->map + offset;
    197    const unsigned base = tx->base.box.x + offset;
    198    const bool can_cb = !((base | size) & 3);
    199 
    200    if (buf->data)
    201       memcpy(data, buf->data + base, size);
    202    else
    203       buf->status |= NOUVEAU_BUFFER_STATUS_DIRTY;
    204 
    205    if (buf->domain == NOUVEAU_BO_VRAM)
    206       NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_vid, size);
    207    if (buf->domain == NOUVEAU_BO_GART)
    208       NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_sys, size);
    209 
    210    if (tx->bo)
    211       nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
    212                     tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
    213    else
    214    if (nv->push_cb && can_cb)
    215       nv->push_cb(nv, buf,
    216                   base, size / 4, (const uint32_t *)data);
    217    else
    218       nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
    219 
    220    nouveau_fence_ref(nv->screen->fence.current, &buf->fence);
    221    nouveau_fence_ref(nv->screen->fence.current, &buf->fence_wr);
    222 }
    223 
    224 /* Does a CPU wait for the buffer's backing data to become reliably accessible
    225  * for write/read by waiting on the buffer's relevant fences.
    226  */
    227 static inline bool
    228 nouveau_buffer_sync(struct nouveau_context *nv,
    229                     struct nv04_resource *buf, unsigned rw)
    230 {
    231    if (rw == PIPE_TRANSFER_READ) {
    232       if (!buf->fence_wr)
    233          return true;
    234       NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
    235                            !nouveau_fence_signalled(buf->fence_wr));
    236       if (!nouveau_fence_wait(buf->fence_wr, &nv->debug))
    237          return false;
    238    } else {
    239       if (!buf->fence)
    240          return true;
    241       NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
    242                            !nouveau_fence_signalled(buf->fence));
    243       if (!nouveau_fence_wait(buf->fence, &nv->debug))
    244          return false;
    245 
    246       nouveau_fence_ref(NULL, &buf->fence);
    247    }
    248    nouveau_fence_ref(NULL, &buf->fence_wr);
    249 
    250    return true;
    251 }
    252 
    253 static inline bool
    254 nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
    255 {
    256    if (rw == PIPE_TRANSFER_READ)
    257       return (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr));
    258    else
    259       return (buf->fence && !nouveau_fence_signalled(buf->fence));
    260 }
    261 
    262 static inline void
    263 nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
    264                              struct pipe_resource *resource,
    265                              const struct pipe_box *box,
    266                              unsigned usage)
    267 {
    268    tx->base.resource = resource;
    269    tx->base.level = 0;
    270    tx->base.usage = usage;
    271    tx->base.box.x = box->x;
    272    tx->base.box.y = 0;
    273    tx->base.box.z = 0;
    274    tx->base.box.width = box->width;
    275    tx->base.box.height = 1;
    276    tx->base.box.depth = 1;
    277    tx->base.stride = 0;
    278    tx->base.layer_stride = 0;
    279 
    280    tx->bo = NULL;
    281    tx->map = NULL;
    282 }
    283 
    284 static inline void
    285 nouveau_buffer_transfer_del(struct nouveau_context *nv,
    286                             struct nouveau_transfer *tx)
    287 {
    288    if (tx->map) {
    289       if (likely(tx->bo)) {
    290          nouveau_fence_work(nv->screen->fence.current,
    291                             nouveau_fence_unref_bo, tx->bo);
    292          if (tx->mm)
    293             release_allocation(&tx->mm, nv->screen->fence.current);
    294       } else {
    295          align_free(tx->map -
    296                     (tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK));
    297       }
    298    }
    299 }
    300 
    301 /* Creates a cache in system memory of the buffer data. */
    302 static bool
    303 nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
    304 {
    305    struct nouveau_transfer tx;
    306    bool ret;
    307    tx.base.resource = &buf->base;
    308    tx.base.box.x = 0;
    309    tx.base.box.width = buf->base.width0;
    310    tx.bo = NULL;
    311    tx.map = NULL;
    312 
    313    if (!buf->data)
    314       if (!nouveau_buffer_malloc(buf))
    315          return false;
    316    if (!(buf->status & NOUVEAU_BUFFER_STATUS_DIRTY))
    317       return true;
    318    nv->stats.buf_cache_count++;
    319 
    320    if (!nouveau_transfer_staging(nv, &tx, false))
    321       return false;
    322 
    323    ret = nouveau_transfer_read(nv, &tx);
    324    if (ret) {
    325       buf->status &= ~NOUVEAU_BUFFER_STATUS_DIRTY;
    326       memcpy(buf->data, tx.map, buf->base.width0);
    327    }
    328    nouveau_buffer_transfer_del(nv, &tx);
    329    return ret;
    330 }
    331 
    332 
    333 #define NOUVEAU_TRANSFER_DISCARD \
    334    (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
    335 
    336 /* Checks whether it is possible to completely discard the memory backing this
    337  * resource. This can be useful if we would otherwise have to wait for a read
    338  * operation to complete on this data.
    339  */
    340 static inline bool
    341 nouveau_buffer_should_discard(struct nv04_resource *buf, unsigned usage)
    342 {
    343    if (!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE))
    344       return false;
    345    if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
    346       return false;
    347    if (unlikely(usage & PIPE_TRANSFER_PERSISTENT))
    348       return false;
    349    return buf->mm && nouveau_buffer_busy(buf, PIPE_TRANSFER_WRITE);
    350 }
    351 
    352 /* Returns a pointer to a memory area representing a window into the
    353  * resource's data.
    354  *
    355  * This may or may not be the _actual_ memory area of the resource. However
    356  * when calling nouveau_buffer_transfer_unmap, if it wasn't the actual memory
    357  * area, the contents of the returned map are copied over to the resource.
    358  *
    359  * The usage indicates what the caller plans to do with the map:
    360  *
    361  *   WRITE means that the user plans to write to it
    362  *
    363  *   READ means that the user plans on reading from it
    364  *
    365  *   DISCARD_WHOLE_RESOURCE means that the whole resource is going to be
    366  *   potentially overwritten, and even if it isn't, the bits that aren't don't
    367  *   need to be maintained.
    368  *
    369  *   DISCARD_RANGE means that all the data in the specified range is going to
    370  *   be overwritten.
    371  *
    372  * The strategy for determining what kind of memory area to return is complex,
    373  * see comments inside of the function.
    374  */
    375 static void *
    376 nouveau_buffer_transfer_map(struct pipe_context *pipe,
    377                             struct pipe_resource *resource,
    378                             unsigned level, unsigned usage,
    379                             const struct pipe_box *box,
    380                             struct pipe_transfer **ptransfer)
    381 {
    382    struct nouveau_context *nv = nouveau_context(pipe);
    383    struct nv04_resource *buf = nv04_resource(resource);
    384    struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
    385    uint8_t *map;
    386    int ret;
    387 
    388    if (!tx)
    389       return NULL;
    390    nouveau_buffer_transfer_init(tx, resource, box, usage);
    391    *ptransfer = &tx->base;
    392 
    393    if (usage & PIPE_TRANSFER_READ)
    394       NOUVEAU_DRV_STAT(nv->screen, buf_transfers_rd, 1);
    395    if (usage & PIPE_TRANSFER_WRITE)
    396       NOUVEAU_DRV_STAT(nv->screen, buf_transfers_wr, 1);
    397 
    398    /* If we are trying to write to an uninitialized range, the user shouldn't
    399     * care what was there before. So we can treat the write as if the target
    400     * range were being discarded. Furthermore, since we know that even if this
    401     * buffer is busy due to GPU activity, because the contents were
    402     * uninitialized, the GPU can't care what was there, and so we can treat
    403     * the write as being unsynchronized.
    404     */
    405    if ((usage & PIPE_TRANSFER_WRITE) &&
    406        !util_ranges_intersect(&buf->valid_buffer_range, box->x, box->x + box->width))
    407       usage |= PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_UNSYNCHRONIZED;
    408 
    409    if (buf->domain == NOUVEAU_BO_VRAM) {
    410       if (usage & NOUVEAU_TRANSFER_DISCARD) {
    411          /* Set up a staging area for the user to write to. It will be copied
    412           * back into VRAM on unmap. */
    413          if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
    414             buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
    415          nouveau_transfer_staging(nv, tx, true);
    416       } else {
    417          if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
    418             /* The GPU is currently writing to this buffer. Copy its current
    419              * contents to a staging area in the GART. This is necessary since
    420              * not the whole area being mapped is being discarded.
    421              */
    422             if (buf->data) {
    423                align_free(buf->data);
    424                buf->data = NULL;
    425             }
    426             nouveau_transfer_staging(nv, tx, false);
    427             nouveau_transfer_read(nv, tx);
    428          } else {
    429             /* The buffer is currently idle. Create a staging area for writes,
    430              * and make sure that the cached data is up-to-date. */
    431             if (usage & PIPE_TRANSFER_WRITE)
    432                nouveau_transfer_staging(nv, tx, true);
    433             if (!buf->data)
    434                nouveau_buffer_cache(nv, buf);
    435          }
    436       }
    437       return buf->data ? (buf->data + box->x) : tx->map;
    438    } else
    439    if (unlikely(buf->domain == 0)) {
    440       return buf->data + box->x;
    441    }
    442 
    443    /* At this point, buf->domain == GART */
    444 
    445    if (nouveau_buffer_should_discard(buf, usage)) {
    446       int ref = buf->base.reference.count - 1;
    447       nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
    448       if (ref > 0) /* any references inside context possible ? */
    449          nv->invalidate_resource_storage(nv, &buf->base, ref);
    450    }
    451 
    452    /* Note that nouveau_bo_map ends up doing a nouveau_bo_wait with the
    453     * relevant flags. If buf->mm is set, that means this resource is part of a
    454     * larger slab bo that holds multiple resources. So in that case, don't
    455     * wait on the whole slab and instead use the logic below to return a
    456     * reasonable buffer for that case.
    457     */
    458    ret = nouveau_bo_map(buf->bo,
    459                         buf->mm ? 0 : nouveau_screen_transfer_flags(usage),
    460                         nv->client);
    461    if (ret) {
    462       FREE(tx);
    463       return NULL;
    464    }
    465    map = (uint8_t *)buf->bo->map + buf->offset + box->x;
    466 
    467    /* using kernel fences only if !buf->mm */
    468    if ((usage & PIPE_TRANSFER_UNSYNCHRONIZED) || !buf->mm)
    469       return map;
    470 
    471    /* If the GPU is currently reading/writing this buffer, we shouldn't
    472     * interfere with its progress. So instead we either wait for the GPU to
    473     * complete its operation, or set up a staging area to perform our work in.
    474     */
    475    if (nouveau_buffer_busy(buf, usage & PIPE_TRANSFER_READ_WRITE)) {
    476       if (unlikely(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
    477                             PIPE_TRANSFER_PERSISTENT))) {
    478          /* Discarding was not possible, must sync because
    479           * subsequent transfers might use UNSYNCHRONIZED. */
    480          nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
    481       } else
    482       if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
    483          /* The whole range is being discarded, so it doesn't matter what was
    484           * there before. No need to copy anything over. */
    485          nouveau_transfer_staging(nv, tx, true);
    486          map = tx->map;
    487       } else
    488       if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) {
    489          if (usage & PIPE_TRANSFER_DONTBLOCK)
    490             map = NULL;
    491          else
    492             nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
    493       } else {
    494          /* It is expected that the returned buffer be a representation of the
    495           * data in question, so we must copy it over from the buffer. */
    496          nouveau_transfer_staging(nv, tx, true);
    497          if (tx->map)
    498             memcpy(tx->map, map, box->width);
    499          map = tx->map;
    500       }
    501    }
    502    if (!map)
    503       FREE(tx);
    504    return map;
    505 }
    506 
    507 
    508 
    509 static void
    510 nouveau_buffer_transfer_flush_region(struct pipe_context *pipe,
    511                                      struct pipe_transfer *transfer,
    512                                      const struct pipe_box *box)
    513 {
    514    struct nouveau_transfer *tx = nouveau_transfer(transfer);
    515    struct nv04_resource *buf = nv04_resource(transfer->resource);
    516 
    517    if (tx->map)
    518       nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
    519 
    520    util_range_add(&buf->valid_buffer_range,
    521                   tx->base.box.x + box->x,
    522                   tx->base.box.x + box->x + box->width);
    523 }
    524 
    525 /* Unmap stage of the transfer. If it was a WRITE transfer and the map that
    526  * was returned was not the real resource's data, this needs to transfer the
    527  * data back to the resource.
    528  *
    529  * Also marks vbo dirty based on the buffer's binding
    530  */
    531 static void
    532 nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
    533                               struct pipe_transfer *transfer)
    534 {
    535    struct nouveau_context *nv = nouveau_context(pipe);
    536    struct nouveau_transfer *tx = nouveau_transfer(transfer);
    537    struct nv04_resource *buf = nv04_resource(transfer->resource);
    538 
    539    if (tx->base.usage & PIPE_TRANSFER_WRITE) {
    540       if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
    541          if (tx->map)
    542             nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
    543 
    544          util_range_add(&buf->valid_buffer_range,
    545                         tx->base.box.x, tx->base.box.x + tx->base.box.width);
    546       }
    547 
    548       if (likely(buf->domain)) {
    549          const uint8_t bind = buf->base.bind;
    550          /* make sure we invalidate dedicated caches */
    551          if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
    552             nv->vbo_dirty = true;
    553       }
    554    }
    555 
    556    if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE))
    557       NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_direct, tx->base.box.width);
    558 
    559    nouveau_buffer_transfer_del(nv, tx);
    560    FREE(tx);
    561 }
    562 
    563 
    564 void
    565 nouveau_copy_buffer(struct nouveau_context *nv,
    566                     struct nv04_resource *dst, unsigned dstx,
    567                     struct nv04_resource *src, unsigned srcx, unsigned size)
    568 {
    569    assert(dst->base.target == PIPE_BUFFER && src->base.target == PIPE_BUFFER);
    570 
    571    if (likely(dst->domain) && likely(src->domain)) {
    572       nv->copy_data(nv,
    573                     dst->bo, dst->offset + dstx, dst->domain,
    574                     src->bo, src->offset + srcx, src->domain, size);
    575 
    576       dst->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
    577       nouveau_fence_ref(nv->screen->fence.current, &dst->fence);
    578       nouveau_fence_ref(nv->screen->fence.current, &dst->fence_wr);
    579 
    580       src->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
    581       nouveau_fence_ref(nv->screen->fence.current, &src->fence);
    582    } else {
    583       struct pipe_box src_box;
    584       src_box.x = srcx;
    585       src_box.y = 0;
    586       src_box.z = 0;
    587       src_box.width = size;
    588       src_box.height = 1;
    589       src_box.depth = 1;
    590       util_resource_copy_region(&nv->pipe,
    591                                 &dst->base, 0, dstx, 0, 0,
    592                                 &src->base, 0, &src_box);
    593    }
    594 
    595    util_range_add(&dst->valid_buffer_range, dstx, dstx + size);
    596 }
    597 
    598 
    599 void *
    600 nouveau_resource_map_offset(struct nouveau_context *nv,
    601                             struct nv04_resource *res, uint32_t offset,
    602                             uint32_t flags)
    603 {
    604    if (unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
    605       return res->data + offset;
    606 
    607    if (res->domain == NOUVEAU_BO_VRAM) {
    608       if (!res->data || (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING))
    609          nouveau_buffer_cache(nv, res);
    610    }
    611    if (res->domain != NOUVEAU_BO_GART)
    612       return res->data + offset;
    613 
    614    if (res->mm) {
    615       unsigned rw;
    616       rw = (flags & NOUVEAU_BO_WR) ? PIPE_TRANSFER_WRITE : PIPE_TRANSFER_READ;
    617       nouveau_buffer_sync(nv, res, rw);
    618       if (nouveau_bo_map(res->bo, 0, NULL))
    619          return NULL;
    620    } else {
    621       if (nouveau_bo_map(res->bo, flags, nv->client))
    622          return NULL;
    623    }
    624    return (uint8_t *)res->bo->map + res->offset + offset;
    625 }
    626 
    627 
    628 const struct u_resource_vtbl nouveau_buffer_vtbl =
    629 {
    630    u_default_resource_get_handle,     /* get_handle */
    631    nouveau_buffer_destroy,               /* resource_destroy */
    632    nouveau_buffer_transfer_map,          /* transfer_map */
    633    nouveau_buffer_transfer_flush_region, /* transfer_flush_region */
    634    nouveau_buffer_transfer_unmap,        /* transfer_unmap */
    635 };
    636 
    637 struct pipe_resource *
    638 nouveau_buffer_create(struct pipe_screen *pscreen,
    639                       const struct pipe_resource *templ)
    640 {
    641    struct nouveau_screen *screen = nouveau_screen(pscreen);
    642    struct nv04_resource *buffer;
    643    bool ret;
    644 
    645    buffer = CALLOC_STRUCT(nv04_resource);
    646    if (!buffer)
    647       return NULL;
    648 
    649    buffer->base = *templ;
    650    buffer->vtbl = &nouveau_buffer_vtbl;
    651    pipe_reference_init(&buffer->base.reference, 1);
    652    buffer->base.screen = pscreen;
    653 
    654    if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
    655                              PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
    656       buffer->domain = NOUVEAU_BO_GART;
    657    } else if (buffer->base.bind == 0 || (buffer->base.bind &
    658               (screen->vidmem_bindings & screen->sysmem_bindings))) {
    659       switch (buffer->base.usage) {
    660       case PIPE_USAGE_DEFAULT:
    661       case PIPE_USAGE_IMMUTABLE:
    662          buffer->domain = NV_VRAM_DOMAIN(screen);
    663          break;
    664       case PIPE_USAGE_DYNAMIC:
    665          /* For most apps, we'd have to do staging transfers to avoid sync
    666           * with this usage, and GART -> GART copies would be suboptimal.
    667           */
    668          buffer->domain = NV_VRAM_DOMAIN(screen);
    669          break;
    670       case PIPE_USAGE_STAGING:
    671       case PIPE_USAGE_STREAM:
    672          buffer->domain = NOUVEAU_BO_GART;
    673          break;
    674       default:
    675          assert(0);
    676          break;
    677       }
    678    } else {
    679       if (buffer->base.bind & screen->vidmem_bindings)
    680          buffer->domain = NV_VRAM_DOMAIN(screen);
    681       else
    682       if (buffer->base.bind & screen->sysmem_bindings)
    683          buffer->domain = NOUVEAU_BO_GART;
    684    }
    685 
    686    ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
    687 
    688    if (ret == false)
    689       goto fail;
    690 
    691    if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy)
    692       nouveau_buffer_cache(NULL, buffer);
    693 
    694    NOUVEAU_DRV_STAT(screen, buf_obj_current_count, 1);
    695 
    696    util_range_init(&buffer->valid_buffer_range);
    697 
    698    return &buffer->base;
    699 
    700 fail:
    701    FREE(buffer);
    702    return NULL;
    703 }
    704 
    705 
    706 struct pipe_resource *
    707 nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr,
    708                            unsigned bytes, unsigned bind)
    709 {
    710    struct nv04_resource *buffer;
    711 
    712    buffer = CALLOC_STRUCT(nv04_resource);
    713    if (!buffer)
    714       return NULL;
    715 
    716    pipe_reference_init(&buffer->base.reference, 1);
    717    buffer->vtbl = &nouveau_buffer_vtbl;
    718    buffer->base.screen = pscreen;
    719    buffer->base.format = PIPE_FORMAT_R8_UNORM;
    720    buffer->base.usage = PIPE_USAGE_IMMUTABLE;
    721    buffer->base.bind = bind;
    722    buffer->base.width0 = bytes;
    723    buffer->base.height0 = 1;
    724    buffer->base.depth0 = 1;
    725 
    726    buffer->data = ptr;
    727    buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY;
    728 
    729    util_range_init(&buffer->valid_buffer_range);
    730    util_range_add(&buffer->valid_buffer_range, 0, bytes);
    731 
    732    return &buffer->base;
    733 }
    734 
    735 static inline bool
    736 nouveau_buffer_data_fetch(struct nouveau_context *nv, struct nv04_resource *buf,
    737                           struct nouveau_bo *bo, unsigned offset, unsigned size)
    738 {
    739    if (!nouveau_buffer_malloc(buf))
    740       return false;
    741    if (nouveau_bo_map(bo, NOUVEAU_BO_RD, nv->client))
    742       return false;
    743    memcpy(buf->data, (uint8_t *)bo->map + offset, size);
    744    return true;
    745 }
    746 
    747 /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
    748 bool
    749 nouveau_buffer_migrate(struct nouveau_context *nv,
    750                        struct nv04_resource *buf, const unsigned new_domain)
    751 {
    752    struct nouveau_screen *screen = nv->screen;
    753    struct nouveau_bo *bo;
    754    const unsigned old_domain = buf->domain;
    755    unsigned size = buf->base.width0;
    756    unsigned offset;
    757    int ret;
    758 
    759    assert(new_domain != old_domain);
    760 
    761    if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
    762       if (!nouveau_buffer_allocate(screen, buf, new_domain))
    763          return false;
    764       ret = nouveau_bo_map(buf->bo, 0, nv->client);
    765       if (ret)
    766          return ret;
    767       memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size);
    768       align_free(buf->data);
    769    } else
    770    if (old_domain != 0 && new_domain != 0) {
    771       struct nouveau_mm_allocation *mm = buf->mm;
    772 
    773       if (new_domain == NOUVEAU_BO_VRAM) {
    774          /* keep a system memory copy of our data in case we hit a fallback */
    775          if (!nouveau_buffer_data_fetch(nv, buf, buf->bo, buf->offset, size))
    776             return false;
    777          if (nouveau_mesa_debug)
    778             debug_printf("migrating %u KiB to VRAM\n", size / 1024);
    779       }
    780 
    781       offset = buf->offset;
    782       bo = buf->bo;
    783       buf->bo = NULL;
    784       buf->mm = NULL;
    785       nouveau_buffer_allocate(screen, buf, new_domain);
    786 
    787       nv->copy_data(nv, buf->bo, buf->offset, new_domain,
    788                     bo, offset, old_domain, buf->base.width0);
    789 
    790       nouveau_fence_work(screen->fence.current, nouveau_fence_unref_bo, bo);
    791       if (mm)
    792          release_allocation(&mm, screen->fence.current);
    793    } else
    794    if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
    795       struct nouveau_transfer tx;
    796       if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
    797          return false;
    798       tx.base.resource = &buf->base;
    799       tx.base.box.x = 0;
    800       tx.base.box.width = buf->base.width0;
    801       tx.bo = NULL;
    802       tx.map = NULL;
    803       if (!nouveau_transfer_staging(nv, &tx, false))
    804          return false;
    805       nouveau_transfer_write(nv, &tx, 0, tx.base.box.width);
    806       nouveau_buffer_transfer_del(nv, &tx);
    807    } else
    808       return false;
    809 
    810    assert(buf->domain == new_domain);
    811    return true;
    812 }
    813 
    814 /* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
    815  * We'd like to only allocate @size bytes here, but then we'd have to rebase
    816  * the vertex indices ...
    817  */
    818 bool
    819 nouveau_user_buffer_upload(struct nouveau_context *nv,
    820                            struct nv04_resource *buf,
    821                            unsigned base, unsigned size)
    822 {
    823    struct nouveau_screen *screen = nouveau_screen(buf->base.screen);
    824    int ret;
    825 
    826    assert(buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY);
    827 
    828    buf->base.width0 = base + size;
    829    if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
    830       return false;
    831 
    832    ret = nouveau_bo_map(buf->bo, 0, nv->client);
    833    if (ret)
    834       return false;
    835    memcpy((uint8_t *)buf->bo->map + buf->offset + base, buf->data + base, size);
    836 
    837    return true;
    838 }
    839 
    840 /* Invalidate underlying buffer storage, reset fences, reallocate to non-busy
    841  * buffer.
    842  */
    843 void
    844 nouveau_buffer_invalidate(struct pipe_context *pipe,
    845                           struct pipe_resource *resource)
    846 {
    847    struct nouveau_context *nv = nouveau_context(pipe);
    848    struct nv04_resource *buf = nv04_resource(resource);
    849    int ref = buf->base.reference.count - 1;
    850 
    851    /* Shared buffers shouldn't get reallocated */
    852    if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
    853       return;
    854 
    855    /* We can't touch persistent/coherent buffers */
    856    if (buf->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
    857                           PIPE_RESOURCE_FLAG_MAP_COHERENT))
    858       return;
    859 
    860    /* If the buffer is sub-allocated and not currently being written, just
    861     * wipe the valid buffer range. Otherwise we have to create fresh
    862     * storage. (We don't keep track of fences for non-sub-allocated BO's.)
    863     */
    864    if (buf->mm && !nouveau_buffer_busy(buf, PIPE_TRANSFER_WRITE)) {
    865       util_range_set_empty(&buf->valid_buffer_range);
    866    } else {
    867       nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
    868       if (ref > 0) /* any references inside context possible ? */
    869          nv->invalidate_resource_storage(nv, &buf->base, ref);
    870    }
    871 }
    872 
    873 
    874 /* Scratch data allocation. */
    875 
    876 static inline int
    877 nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo,
    878                          unsigned size)
    879 {
    880    return nouveau_bo_new(nv->screen->device, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
    881                          4096, size, NULL, pbo);
    882 }
    883 
    884 static void
    885 nouveau_scratch_unref_bos(void *d)
    886 {
    887    struct runout *b = d;
    888    int i;
    889 
    890    for (i = 0; i < b->nr; ++i)
    891       nouveau_bo_ref(NULL, &b->bo[i]);
    892 
    893    FREE(b);
    894 }
    895 
    896 void
    897 nouveau_scratch_runout_release(struct nouveau_context *nv)
    898 {
    899    if (!nv->scratch.runout)
    900       return;
    901 
    902    if (!nouveau_fence_work(nv->screen->fence.current, nouveau_scratch_unref_bos,
    903          nv->scratch.runout))
    904       return;
    905 
    906    nv->scratch.end = 0;
    907    nv->scratch.runout = NULL;
    908 }
    909 
    910 /* Allocate an extra bo if we can't fit everything we need simultaneously.
    911  * (Could happen for very large user arrays.)
    912  */
    913 static inline bool
    914 nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
    915 {
    916    int ret;
    917    unsigned n;
    918 
    919    if (nv->scratch.runout)
    920       n = nv->scratch.runout->nr;
    921    else
    922       n = 0;
    923    nv->scratch.runout = REALLOC(nv->scratch.runout, n == 0 ? 0 :
    924                                 (sizeof(*nv->scratch.runout) + (n + 0) * sizeof(void *)),
    925                                  sizeof(*nv->scratch.runout) + (n + 1) * sizeof(void *));
    926    nv->scratch.runout->nr = n + 1;
    927    nv->scratch.runout->bo[n] = NULL;
    928 
    929    ret = nouveau_scratch_bo_alloc(nv, &nv->scratch.runout->bo[n], size);
    930    if (!ret) {
    931       ret = nouveau_bo_map(nv->scratch.runout->bo[n], 0, NULL);
    932       if (ret)
    933          nouveau_bo_ref(NULL, &nv->scratch.runout->bo[--nv->scratch.runout->nr]);
    934    }
    935    if (!ret) {
    936       nv->scratch.current = nv->scratch.runout->bo[n];
    937       nv->scratch.offset = 0;
    938       nv->scratch.end = size;
    939       nv->scratch.map = nv->scratch.current->map;
    940    }
    941    return !ret;
    942 }
    943 
    944 /* Continue to next scratch buffer, if available (no wrapping, large enough).
    945  * Allocate it if it has not yet been created.
    946  */
    947 static inline bool
    948 nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
    949 {
    950    struct nouveau_bo *bo;
    951    int ret;
    952    const unsigned i = (nv->scratch.id + 1) % NOUVEAU_MAX_SCRATCH_BUFS;
    953 
    954    if ((size > nv->scratch.bo_size) || (i == nv->scratch.wrap))
    955       return false;
    956    nv->scratch.id = i;
    957 
    958    bo = nv->scratch.bo[i];
    959    if (!bo) {
    960       ret = nouveau_scratch_bo_alloc(nv, &bo, nv->scratch.bo_size);
    961       if (ret)
    962          return false;
    963       nv->scratch.bo[i] = bo;
    964    }
    965    nv->scratch.current = bo;
    966    nv->scratch.offset = 0;
    967    nv->scratch.end = nv->scratch.bo_size;
    968 
    969    ret = nouveau_bo_map(bo, NOUVEAU_BO_WR, nv->client);
    970    if (!ret)
    971       nv->scratch.map = bo->map;
    972    return !ret;
    973 }
    974 
    975 static bool
    976 nouveau_scratch_more(struct nouveau_context *nv, unsigned min_size)
    977 {
    978    bool ret;
    979 
    980    ret = nouveau_scratch_next(nv, min_size);
    981    if (!ret)
    982       ret = nouveau_scratch_runout(nv, min_size);
    983    return ret;
    984 }
    985 
    986 
    987 /* Copy data to a scratch buffer and return address & bo the data resides in. */
    988 uint64_t
    989 nouveau_scratch_data(struct nouveau_context *nv,
    990                      const void *data, unsigned base, unsigned size,
    991                      struct nouveau_bo **bo)
    992 {
    993    unsigned bgn = MAX2(base, nv->scratch.offset);
    994    unsigned end = bgn + size;
    995 
    996    if (end >= nv->scratch.end) {
    997       end = base + size;
    998       if (!nouveau_scratch_more(nv, end))
    999          return 0;
   1000       bgn = base;
   1001    }
   1002    nv->scratch.offset = align(end, 4);
   1003 
   1004    memcpy(nv->scratch.map + bgn, (const uint8_t *)data + base, size);
   1005 
   1006    *bo = nv->scratch.current;
   1007    return (*bo)->offset + (bgn - base);
   1008 }
   1009 
   1010 void *
   1011 nouveau_scratch_get(struct nouveau_context *nv,
   1012                     unsigned size, uint64_t *gpu_addr, struct nouveau_bo **pbo)
   1013 {
   1014    unsigned bgn = nv->scratch.offset;
   1015    unsigned end = nv->scratch.offset + size;
   1016 
   1017    if (end >= nv->scratch.end) {
   1018       end = size;
   1019       if (!nouveau_scratch_more(nv, end))
   1020          return NULL;
   1021       bgn = 0;
   1022    }
   1023    nv->scratch.offset = align(end, 4);
   1024 
   1025    *pbo = nv->scratch.current;
   1026    *gpu_addr = nv->scratch.current->offset + bgn;
   1027    return nv->scratch.map + bgn;
   1028 }
   1029