1 2 #include "util/u_inlines.h" 3 #include "util/u_memory.h" 4 #include "util/u_math.h" 5 #include "util/u_surface.h" 6 7 #include "nouveau_screen.h" 8 #include "nouveau_context.h" 9 #include "nouveau_winsys.h" 10 #include "nouveau_fence.h" 11 #include "nouveau_buffer.h" 12 #include "nouveau_mm.h" 13 14 #define NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD 192 15 16 struct nouveau_transfer { 17 struct pipe_transfer base; 18 19 uint8_t *map; 20 struct nouveau_bo *bo; 21 struct nouveau_mm_allocation *mm; 22 uint32_t offset; 23 }; 24 25 static inline struct nouveau_transfer * 26 nouveau_transfer(struct pipe_transfer *transfer) 27 { 28 return (struct nouveau_transfer *)transfer; 29 } 30 31 static inline bool 32 nouveau_buffer_malloc(struct nv04_resource *buf) 33 { 34 if (!buf->data) 35 buf->data = align_malloc(buf->base.width0, NOUVEAU_MIN_BUFFER_MAP_ALIGN); 36 return !!buf->data; 37 } 38 39 static inline bool 40 nouveau_buffer_allocate(struct nouveau_screen *screen, 41 struct nv04_resource *buf, unsigned domain) 42 { 43 uint32_t size = align(buf->base.width0, 0x100); 44 45 if (domain == NOUVEAU_BO_VRAM) { 46 buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size, 47 &buf->bo, &buf->offset); 48 if (!buf->bo) 49 return nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_GART); 50 NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_vid, buf->base.width0); 51 } else 52 if (domain == NOUVEAU_BO_GART) { 53 buf->mm = nouveau_mm_allocate(screen->mm_GART, size, 54 &buf->bo, &buf->offset); 55 if (!buf->bo) 56 return false; 57 NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0); 58 } else { 59 assert(domain == 0); 60 if (!nouveau_buffer_malloc(buf)) 61 return false; 62 } 63 buf->domain = domain; 64 if (buf->bo) 65 buf->address = buf->bo->offset + buf->offset; 66 67 util_range_set_empty(&buf->valid_buffer_range); 68 69 return true; 70 } 71 72 static inline void 73 release_allocation(struct nouveau_mm_allocation **mm, 74 struct nouveau_fence *fence) 75 { 76 nouveau_fence_work(fence, nouveau_mm_free_work, *mm); 77 (*mm) = NULL; 78 } 79 80 inline void 81 nouveau_buffer_release_gpu_storage(struct nv04_resource *buf) 82 { 83 if (buf->fence && buf->fence->state < NOUVEAU_FENCE_STATE_FLUSHED) { 84 nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo); 85 buf->bo = NULL; 86 } else { 87 nouveau_bo_ref(NULL, &buf->bo); 88 } 89 90 if (buf->mm) 91 release_allocation(&buf->mm, buf->fence); 92 93 if (buf->domain == NOUVEAU_BO_VRAM) 94 NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_vid, -(uint64_t)buf->base.width0); 95 if (buf->domain == NOUVEAU_BO_GART) 96 NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_sys, -(uint64_t)buf->base.width0); 97 98 buf->domain = 0; 99 } 100 101 static inline bool 102 nouveau_buffer_reallocate(struct nouveau_screen *screen, 103 struct nv04_resource *buf, unsigned domain) 104 { 105 nouveau_buffer_release_gpu_storage(buf); 106 107 nouveau_fence_ref(NULL, &buf->fence); 108 nouveau_fence_ref(NULL, &buf->fence_wr); 109 110 buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK; 111 112 return nouveau_buffer_allocate(screen, buf, domain); 113 } 114 115 static void 116 nouveau_buffer_destroy(struct pipe_screen *pscreen, 117 struct pipe_resource *presource) 118 { 119 struct nv04_resource *res = nv04_resource(presource); 120 121 nouveau_buffer_release_gpu_storage(res); 122 123 if (res->data && !(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY)) 124 align_free(res->data); 125 126 nouveau_fence_ref(NULL, &res->fence); 127 nouveau_fence_ref(NULL, &res->fence_wr); 128 129 util_range_destroy(&res->valid_buffer_range); 130 131 FREE(res); 132 133 NOUVEAU_DRV_STAT(nouveau_screen(pscreen), buf_obj_current_count, -1); 134 } 135 136 /* Set up a staging area for the transfer. This is either done in "regular" 137 * system memory if the driver supports push_data (nv50+) and the data is 138 * small enough (and permit_pb == true), or in GART memory. 139 */ 140 static uint8_t * 141 nouveau_transfer_staging(struct nouveau_context *nv, 142 struct nouveau_transfer *tx, bool permit_pb) 143 { 144 const unsigned adj = tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK; 145 const unsigned size = align(tx->base.box.width, 4) + adj; 146 147 if (!nv->push_data) 148 permit_pb = false; 149 150 if ((size <= NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD) && permit_pb) { 151 tx->map = align_malloc(size, NOUVEAU_MIN_BUFFER_MAP_ALIGN); 152 if (tx->map) 153 tx->map += adj; 154 } else { 155 tx->mm = 156 nouveau_mm_allocate(nv->screen->mm_GART, size, &tx->bo, &tx->offset); 157 if (tx->bo) { 158 tx->offset += adj; 159 if (!nouveau_bo_map(tx->bo, 0, NULL)) 160 tx->map = (uint8_t *)tx->bo->map + tx->offset; 161 } 162 } 163 return tx->map; 164 } 165 166 /* Copies data from the resource into the transfer's temporary GART 167 * buffer. Also updates buf->data if present. 168 * 169 * Maybe just migrate to GART right away if we actually need to do this. */ 170 static bool 171 nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx) 172 { 173 struct nv04_resource *buf = nv04_resource(tx->base.resource); 174 const unsigned base = tx->base.box.x; 175 const unsigned size = tx->base.box.width; 176 177 NOUVEAU_DRV_STAT(nv->screen, buf_read_bytes_staging_vid, size); 178 179 nv->copy_data(nv, tx->bo, tx->offset, NOUVEAU_BO_GART, 180 buf->bo, buf->offset + base, buf->domain, size); 181 182 if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client)) 183 return false; 184 185 if (buf->data) 186 memcpy(buf->data + base, tx->map, size); 187 188 return true; 189 } 190 191 static void 192 nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx, 193 unsigned offset, unsigned size) 194 { 195 struct nv04_resource *buf = nv04_resource(tx->base.resource); 196 uint8_t *data = tx->map + offset; 197 const unsigned base = tx->base.box.x + offset; 198 const bool can_cb = !((base | size) & 3); 199 200 if (buf->data) 201 memcpy(data, buf->data + base, size); 202 else 203 buf->status |= NOUVEAU_BUFFER_STATUS_DIRTY; 204 205 if (buf->domain == NOUVEAU_BO_VRAM) 206 NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_vid, size); 207 if (buf->domain == NOUVEAU_BO_GART) 208 NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_sys, size); 209 210 if (tx->bo) 211 nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain, 212 tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size); 213 else 214 if (nv->push_cb && can_cb) 215 nv->push_cb(nv, buf, 216 base, size / 4, (const uint32_t *)data); 217 else 218 nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data); 219 220 nouveau_fence_ref(nv->screen->fence.current, &buf->fence); 221 nouveau_fence_ref(nv->screen->fence.current, &buf->fence_wr); 222 } 223 224 /* Does a CPU wait for the buffer's backing data to become reliably accessible 225 * for write/read by waiting on the buffer's relevant fences. 226 */ 227 static inline bool 228 nouveau_buffer_sync(struct nouveau_context *nv, 229 struct nv04_resource *buf, unsigned rw) 230 { 231 if (rw == PIPE_TRANSFER_READ) { 232 if (!buf->fence_wr) 233 return true; 234 NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count, 235 !nouveau_fence_signalled(buf->fence_wr)); 236 if (!nouveau_fence_wait(buf->fence_wr, &nv->debug)) 237 return false; 238 } else { 239 if (!buf->fence) 240 return true; 241 NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count, 242 !nouveau_fence_signalled(buf->fence)); 243 if (!nouveau_fence_wait(buf->fence, &nv->debug)) 244 return false; 245 246 nouveau_fence_ref(NULL, &buf->fence); 247 } 248 nouveau_fence_ref(NULL, &buf->fence_wr); 249 250 return true; 251 } 252 253 static inline bool 254 nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw) 255 { 256 if (rw == PIPE_TRANSFER_READ) 257 return (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)); 258 else 259 return (buf->fence && !nouveau_fence_signalled(buf->fence)); 260 } 261 262 static inline void 263 nouveau_buffer_transfer_init(struct nouveau_transfer *tx, 264 struct pipe_resource *resource, 265 const struct pipe_box *box, 266 unsigned usage) 267 { 268 tx->base.resource = resource; 269 tx->base.level = 0; 270 tx->base.usage = usage; 271 tx->base.box.x = box->x; 272 tx->base.box.y = 0; 273 tx->base.box.z = 0; 274 tx->base.box.width = box->width; 275 tx->base.box.height = 1; 276 tx->base.box.depth = 1; 277 tx->base.stride = 0; 278 tx->base.layer_stride = 0; 279 280 tx->bo = NULL; 281 tx->map = NULL; 282 } 283 284 static inline void 285 nouveau_buffer_transfer_del(struct nouveau_context *nv, 286 struct nouveau_transfer *tx) 287 { 288 if (tx->map) { 289 if (likely(tx->bo)) { 290 nouveau_fence_work(nv->screen->fence.current, 291 nouveau_fence_unref_bo, tx->bo); 292 if (tx->mm) 293 release_allocation(&tx->mm, nv->screen->fence.current); 294 } else { 295 align_free(tx->map - 296 (tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK)); 297 } 298 } 299 } 300 301 /* Creates a cache in system memory of the buffer data. */ 302 static bool 303 nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf) 304 { 305 struct nouveau_transfer tx; 306 bool ret; 307 tx.base.resource = &buf->base; 308 tx.base.box.x = 0; 309 tx.base.box.width = buf->base.width0; 310 tx.bo = NULL; 311 tx.map = NULL; 312 313 if (!buf->data) 314 if (!nouveau_buffer_malloc(buf)) 315 return false; 316 if (!(buf->status & NOUVEAU_BUFFER_STATUS_DIRTY)) 317 return true; 318 nv->stats.buf_cache_count++; 319 320 if (!nouveau_transfer_staging(nv, &tx, false)) 321 return false; 322 323 ret = nouveau_transfer_read(nv, &tx); 324 if (ret) { 325 buf->status &= ~NOUVEAU_BUFFER_STATUS_DIRTY; 326 memcpy(buf->data, tx.map, buf->base.width0); 327 } 328 nouveau_buffer_transfer_del(nv, &tx); 329 return ret; 330 } 331 332 333 #define NOUVEAU_TRANSFER_DISCARD \ 334 (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) 335 336 /* Checks whether it is possible to completely discard the memory backing this 337 * resource. This can be useful if we would otherwise have to wait for a read 338 * operation to complete on this data. 339 */ 340 static inline bool 341 nouveau_buffer_should_discard(struct nv04_resource *buf, unsigned usage) 342 { 343 if (!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) 344 return false; 345 if (unlikely(buf->base.bind & PIPE_BIND_SHARED)) 346 return false; 347 if (unlikely(usage & PIPE_TRANSFER_PERSISTENT)) 348 return false; 349 return buf->mm && nouveau_buffer_busy(buf, PIPE_TRANSFER_WRITE); 350 } 351 352 /* Returns a pointer to a memory area representing a window into the 353 * resource's data. 354 * 355 * This may or may not be the _actual_ memory area of the resource. However 356 * when calling nouveau_buffer_transfer_unmap, if it wasn't the actual memory 357 * area, the contents of the returned map are copied over to the resource. 358 * 359 * The usage indicates what the caller plans to do with the map: 360 * 361 * WRITE means that the user plans to write to it 362 * 363 * READ means that the user plans on reading from it 364 * 365 * DISCARD_WHOLE_RESOURCE means that the whole resource is going to be 366 * potentially overwritten, and even if it isn't, the bits that aren't don't 367 * need to be maintained. 368 * 369 * DISCARD_RANGE means that all the data in the specified range is going to 370 * be overwritten. 371 * 372 * The strategy for determining what kind of memory area to return is complex, 373 * see comments inside of the function. 374 */ 375 static void * 376 nouveau_buffer_transfer_map(struct pipe_context *pipe, 377 struct pipe_resource *resource, 378 unsigned level, unsigned usage, 379 const struct pipe_box *box, 380 struct pipe_transfer **ptransfer) 381 { 382 struct nouveau_context *nv = nouveau_context(pipe); 383 struct nv04_resource *buf = nv04_resource(resource); 384 struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer); 385 uint8_t *map; 386 int ret; 387 388 if (!tx) 389 return NULL; 390 nouveau_buffer_transfer_init(tx, resource, box, usage); 391 *ptransfer = &tx->base; 392 393 if (usage & PIPE_TRANSFER_READ) 394 NOUVEAU_DRV_STAT(nv->screen, buf_transfers_rd, 1); 395 if (usage & PIPE_TRANSFER_WRITE) 396 NOUVEAU_DRV_STAT(nv->screen, buf_transfers_wr, 1); 397 398 /* If we are trying to write to an uninitialized range, the user shouldn't 399 * care what was there before. So we can treat the write as if the target 400 * range were being discarded. Furthermore, since we know that even if this 401 * buffer is busy due to GPU activity, because the contents were 402 * uninitialized, the GPU can't care what was there, and so we can treat 403 * the write as being unsynchronized. 404 */ 405 if ((usage & PIPE_TRANSFER_WRITE) && 406 !util_ranges_intersect(&buf->valid_buffer_range, box->x, box->x + box->width)) 407 usage |= PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_UNSYNCHRONIZED; 408 409 if (buf->domain == NOUVEAU_BO_VRAM) { 410 if (usage & NOUVEAU_TRANSFER_DISCARD) { 411 /* Set up a staging area for the user to write to. It will be copied 412 * back into VRAM on unmap. */ 413 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) 414 buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK; 415 nouveau_transfer_staging(nv, tx, true); 416 } else { 417 if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { 418 /* The GPU is currently writing to this buffer. Copy its current 419 * contents to a staging area in the GART. This is necessary since 420 * not the whole area being mapped is being discarded. 421 */ 422 if (buf->data) { 423 align_free(buf->data); 424 buf->data = NULL; 425 } 426 nouveau_transfer_staging(nv, tx, false); 427 nouveau_transfer_read(nv, tx); 428 } else { 429 /* The buffer is currently idle. Create a staging area for writes, 430 * and make sure that the cached data is up-to-date. */ 431 if (usage & PIPE_TRANSFER_WRITE) 432 nouveau_transfer_staging(nv, tx, true); 433 if (!buf->data) 434 nouveau_buffer_cache(nv, buf); 435 } 436 } 437 return buf->data ? (buf->data + box->x) : tx->map; 438 } else 439 if (unlikely(buf->domain == 0)) { 440 return buf->data + box->x; 441 } 442 443 /* At this point, buf->domain == GART */ 444 445 if (nouveau_buffer_should_discard(buf, usage)) { 446 int ref = buf->base.reference.count - 1; 447 nouveau_buffer_reallocate(nv->screen, buf, buf->domain); 448 if (ref > 0) /* any references inside context possible ? */ 449 nv->invalidate_resource_storage(nv, &buf->base, ref); 450 } 451 452 /* Note that nouveau_bo_map ends up doing a nouveau_bo_wait with the 453 * relevant flags. If buf->mm is set, that means this resource is part of a 454 * larger slab bo that holds multiple resources. So in that case, don't 455 * wait on the whole slab and instead use the logic below to return a 456 * reasonable buffer for that case. 457 */ 458 ret = nouveau_bo_map(buf->bo, 459 buf->mm ? 0 : nouveau_screen_transfer_flags(usage), 460 nv->client); 461 if (ret) { 462 FREE(tx); 463 return NULL; 464 } 465 map = (uint8_t *)buf->bo->map + buf->offset + box->x; 466 467 /* using kernel fences only if !buf->mm */ 468 if ((usage & PIPE_TRANSFER_UNSYNCHRONIZED) || !buf->mm) 469 return map; 470 471 /* If the GPU is currently reading/writing this buffer, we shouldn't 472 * interfere with its progress. So instead we either wait for the GPU to 473 * complete its operation, or set up a staging area to perform our work in. 474 */ 475 if (nouveau_buffer_busy(buf, usage & PIPE_TRANSFER_READ_WRITE)) { 476 if (unlikely(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | 477 PIPE_TRANSFER_PERSISTENT))) { 478 /* Discarding was not possible, must sync because 479 * subsequent transfers might use UNSYNCHRONIZED. */ 480 nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE); 481 } else 482 if (usage & PIPE_TRANSFER_DISCARD_RANGE) { 483 /* The whole range is being discarded, so it doesn't matter what was 484 * there before. No need to copy anything over. */ 485 nouveau_transfer_staging(nv, tx, true); 486 map = tx->map; 487 } else 488 if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) { 489 if (usage & PIPE_TRANSFER_DONTBLOCK) 490 map = NULL; 491 else 492 nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE); 493 } else { 494 /* It is expected that the returned buffer be a representation of the 495 * data in question, so we must copy it over from the buffer. */ 496 nouveau_transfer_staging(nv, tx, true); 497 if (tx->map) 498 memcpy(tx->map, map, box->width); 499 map = tx->map; 500 } 501 } 502 if (!map) 503 FREE(tx); 504 return map; 505 } 506 507 508 509 static void 510 nouveau_buffer_transfer_flush_region(struct pipe_context *pipe, 511 struct pipe_transfer *transfer, 512 const struct pipe_box *box) 513 { 514 struct nouveau_transfer *tx = nouveau_transfer(transfer); 515 struct nv04_resource *buf = nv04_resource(transfer->resource); 516 517 if (tx->map) 518 nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width); 519 520 util_range_add(&buf->valid_buffer_range, 521 tx->base.box.x + box->x, 522 tx->base.box.x + box->x + box->width); 523 } 524 525 /* Unmap stage of the transfer. If it was a WRITE transfer and the map that 526 * was returned was not the real resource's data, this needs to transfer the 527 * data back to the resource. 528 * 529 * Also marks vbo dirty based on the buffer's binding 530 */ 531 static void 532 nouveau_buffer_transfer_unmap(struct pipe_context *pipe, 533 struct pipe_transfer *transfer) 534 { 535 struct nouveau_context *nv = nouveau_context(pipe); 536 struct nouveau_transfer *tx = nouveau_transfer(transfer); 537 struct nv04_resource *buf = nv04_resource(transfer->resource); 538 539 if (tx->base.usage & PIPE_TRANSFER_WRITE) { 540 if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { 541 if (tx->map) 542 nouveau_transfer_write(nv, tx, 0, tx->base.box.width); 543 544 util_range_add(&buf->valid_buffer_range, 545 tx->base.box.x, tx->base.box.x + tx->base.box.width); 546 } 547 548 if (likely(buf->domain)) { 549 const uint8_t bind = buf->base.bind; 550 /* make sure we invalidate dedicated caches */ 551 if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) 552 nv->vbo_dirty = true; 553 } 554 } 555 556 if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE)) 557 NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_direct, tx->base.box.width); 558 559 nouveau_buffer_transfer_del(nv, tx); 560 FREE(tx); 561 } 562 563 564 void 565 nouveau_copy_buffer(struct nouveau_context *nv, 566 struct nv04_resource *dst, unsigned dstx, 567 struct nv04_resource *src, unsigned srcx, unsigned size) 568 { 569 assert(dst->base.target == PIPE_BUFFER && src->base.target == PIPE_BUFFER); 570 571 if (likely(dst->domain) && likely(src->domain)) { 572 nv->copy_data(nv, 573 dst->bo, dst->offset + dstx, dst->domain, 574 src->bo, src->offset + srcx, src->domain, size); 575 576 dst->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; 577 nouveau_fence_ref(nv->screen->fence.current, &dst->fence); 578 nouveau_fence_ref(nv->screen->fence.current, &dst->fence_wr); 579 580 src->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; 581 nouveau_fence_ref(nv->screen->fence.current, &src->fence); 582 } else { 583 struct pipe_box src_box; 584 src_box.x = srcx; 585 src_box.y = 0; 586 src_box.z = 0; 587 src_box.width = size; 588 src_box.height = 1; 589 src_box.depth = 1; 590 util_resource_copy_region(&nv->pipe, 591 &dst->base, 0, dstx, 0, 0, 592 &src->base, 0, &src_box); 593 } 594 595 util_range_add(&dst->valid_buffer_range, dstx, dstx + size); 596 } 597 598 599 void * 600 nouveau_resource_map_offset(struct nouveau_context *nv, 601 struct nv04_resource *res, uint32_t offset, 602 uint32_t flags) 603 { 604 if (unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY)) 605 return res->data + offset; 606 607 if (res->domain == NOUVEAU_BO_VRAM) { 608 if (!res->data || (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING)) 609 nouveau_buffer_cache(nv, res); 610 } 611 if (res->domain != NOUVEAU_BO_GART) 612 return res->data + offset; 613 614 if (res->mm) { 615 unsigned rw; 616 rw = (flags & NOUVEAU_BO_WR) ? PIPE_TRANSFER_WRITE : PIPE_TRANSFER_READ; 617 nouveau_buffer_sync(nv, res, rw); 618 if (nouveau_bo_map(res->bo, 0, NULL)) 619 return NULL; 620 } else { 621 if (nouveau_bo_map(res->bo, flags, nv->client)) 622 return NULL; 623 } 624 return (uint8_t *)res->bo->map + res->offset + offset; 625 } 626 627 628 const struct u_resource_vtbl nouveau_buffer_vtbl = 629 { 630 u_default_resource_get_handle, /* get_handle */ 631 nouveau_buffer_destroy, /* resource_destroy */ 632 nouveau_buffer_transfer_map, /* transfer_map */ 633 nouveau_buffer_transfer_flush_region, /* transfer_flush_region */ 634 nouveau_buffer_transfer_unmap, /* transfer_unmap */ 635 }; 636 637 struct pipe_resource * 638 nouveau_buffer_create(struct pipe_screen *pscreen, 639 const struct pipe_resource *templ) 640 { 641 struct nouveau_screen *screen = nouveau_screen(pscreen); 642 struct nv04_resource *buffer; 643 bool ret; 644 645 buffer = CALLOC_STRUCT(nv04_resource); 646 if (!buffer) 647 return NULL; 648 649 buffer->base = *templ; 650 buffer->vtbl = &nouveau_buffer_vtbl; 651 pipe_reference_init(&buffer->base.reference, 1); 652 buffer->base.screen = pscreen; 653 654 if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | 655 PIPE_RESOURCE_FLAG_MAP_COHERENT)) { 656 buffer->domain = NOUVEAU_BO_GART; 657 } else if (buffer->base.bind == 0 || (buffer->base.bind & 658 (screen->vidmem_bindings & screen->sysmem_bindings))) { 659 switch (buffer->base.usage) { 660 case PIPE_USAGE_DEFAULT: 661 case PIPE_USAGE_IMMUTABLE: 662 buffer->domain = NV_VRAM_DOMAIN(screen); 663 break; 664 case PIPE_USAGE_DYNAMIC: 665 /* For most apps, we'd have to do staging transfers to avoid sync 666 * with this usage, and GART -> GART copies would be suboptimal. 667 */ 668 buffer->domain = NV_VRAM_DOMAIN(screen); 669 break; 670 case PIPE_USAGE_STAGING: 671 case PIPE_USAGE_STREAM: 672 buffer->domain = NOUVEAU_BO_GART; 673 break; 674 default: 675 assert(0); 676 break; 677 } 678 } else { 679 if (buffer->base.bind & screen->vidmem_bindings) 680 buffer->domain = NV_VRAM_DOMAIN(screen); 681 else 682 if (buffer->base.bind & screen->sysmem_bindings) 683 buffer->domain = NOUVEAU_BO_GART; 684 } 685 686 ret = nouveau_buffer_allocate(screen, buffer, buffer->domain); 687 688 if (ret == false) 689 goto fail; 690 691 if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy) 692 nouveau_buffer_cache(NULL, buffer); 693 694 NOUVEAU_DRV_STAT(screen, buf_obj_current_count, 1); 695 696 util_range_init(&buffer->valid_buffer_range); 697 698 return &buffer->base; 699 700 fail: 701 FREE(buffer); 702 return NULL; 703 } 704 705 706 struct pipe_resource * 707 nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr, 708 unsigned bytes, unsigned bind) 709 { 710 struct nv04_resource *buffer; 711 712 buffer = CALLOC_STRUCT(nv04_resource); 713 if (!buffer) 714 return NULL; 715 716 pipe_reference_init(&buffer->base.reference, 1); 717 buffer->vtbl = &nouveau_buffer_vtbl; 718 buffer->base.screen = pscreen; 719 buffer->base.format = PIPE_FORMAT_R8_UNORM; 720 buffer->base.usage = PIPE_USAGE_IMMUTABLE; 721 buffer->base.bind = bind; 722 buffer->base.width0 = bytes; 723 buffer->base.height0 = 1; 724 buffer->base.depth0 = 1; 725 726 buffer->data = ptr; 727 buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY; 728 729 util_range_init(&buffer->valid_buffer_range); 730 util_range_add(&buffer->valid_buffer_range, 0, bytes); 731 732 return &buffer->base; 733 } 734 735 static inline bool 736 nouveau_buffer_data_fetch(struct nouveau_context *nv, struct nv04_resource *buf, 737 struct nouveau_bo *bo, unsigned offset, unsigned size) 738 { 739 if (!nouveau_buffer_malloc(buf)) 740 return false; 741 if (nouveau_bo_map(bo, NOUVEAU_BO_RD, nv->client)) 742 return false; 743 memcpy(buf->data, (uint8_t *)bo->map + offset, size); 744 return true; 745 } 746 747 /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */ 748 bool 749 nouveau_buffer_migrate(struct nouveau_context *nv, 750 struct nv04_resource *buf, const unsigned new_domain) 751 { 752 struct nouveau_screen *screen = nv->screen; 753 struct nouveau_bo *bo; 754 const unsigned old_domain = buf->domain; 755 unsigned size = buf->base.width0; 756 unsigned offset; 757 int ret; 758 759 assert(new_domain != old_domain); 760 761 if (new_domain == NOUVEAU_BO_GART && old_domain == 0) { 762 if (!nouveau_buffer_allocate(screen, buf, new_domain)) 763 return false; 764 ret = nouveau_bo_map(buf->bo, 0, nv->client); 765 if (ret) 766 return ret; 767 memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size); 768 align_free(buf->data); 769 } else 770 if (old_domain != 0 && new_domain != 0) { 771 struct nouveau_mm_allocation *mm = buf->mm; 772 773 if (new_domain == NOUVEAU_BO_VRAM) { 774 /* keep a system memory copy of our data in case we hit a fallback */ 775 if (!nouveau_buffer_data_fetch(nv, buf, buf->bo, buf->offset, size)) 776 return false; 777 if (nouveau_mesa_debug) 778 debug_printf("migrating %u KiB to VRAM\n", size / 1024); 779 } 780 781 offset = buf->offset; 782 bo = buf->bo; 783 buf->bo = NULL; 784 buf->mm = NULL; 785 nouveau_buffer_allocate(screen, buf, new_domain); 786 787 nv->copy_data(nv, buf->bo, buf->offset, new_domain, 788 bo, offset, old_domain, buf->base.width0); 789 790 nouveau_fence_work(screen->fence.current, nouveau_fence_unref_bo, bo); 791 if (mm) 792 release_allocation(&mm, screen->fence.current); 793 } else 794 if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) { 795 struct nouveau_transfer tx; 796 if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM)) 797 return false; 798 tx.base.resource = &buf->base; 799 tx.base.box.x = 0; 800 tx.base.box.width = buf->base.width0; 801 tx.bo = NULL; 802 tx.map = NULL; 803 if (!nouveau_transfer_staging(nv, &tx, false)) 804 return false; 805 nouveau_transfer_write(nv, &tx, 0, tx.base.box.width); 806 nouveau_buffer_transfer_del(nv, &tx); 807 } else 808 return false; 809 810 assert(buf->domain == new_domain); 811 return true; 812 } 813 814 /* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART. 815 * We'd like to only allocate @size bytes here, but then we'd have to rebase 816 * the vertex indices ... 817 */ 818 bool 819 nouveau_user_buffer_upload(struct nouveau_context *nv, 820 struct nv04_resource *buf, 821 unsigned base, unsigned size) 822 { 823 struct nouveau_screen *screen = nouveau_screen(buf->base.screen); 824 int ret; 825 826 assert(buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY); 827 828 buf->base.width0 = base + size; 829 if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART)) 830 return false; 831 832 ret = nouveau_bo_map(buf->bo, 0, nv->client); 833 if (ret) 834 return false; 835 memcpy((uint8_t *)buf->bo->map + buf->offset + base, buf->data + base, size); 836 837 return true; 838 } 839 840 /* Invalidate underlying buffer storage, reset fences, reallocate to non-busy 841 * buffer. 842 */ 843 void 844 nouveau_buffer_invalidate(struct pipe_context *pipe, 845 struct pipe_resource *resource) 846 { 847 struct nouveau_context *nv = nouveau_context(pipe); 848 struct nv04_resource *buf = nv04_resource(resource); 849 int ref = buf->base.reference.count - 1; 850 851 /* Shared buffers shouldn't get reallocated */ 852 if (unlikely(buf->base.bind & PIPE_BIND_SHARED)) 853 return; 854 855 /* We can't touch persistent/coherent buffers */ 856 if (buf->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | 857 PIPE_RESOURCE_FLAG_MAP_COHERENT)) 858 return; 859 860 /* If the buffer is sub-allocated and not currently being written, just 861 * wipe the valid buffer range. Otherwise we have to create fresh 862 * storage. (We don't keep track of fences for non-sub-allocated BO's.) 863 */ 864 if (buf->mm && !nouveau_buffer_busy(buf, PIPE_TRANSFER_WRITE)) { 865 util_range_set_empty(&buf->valid_buffer_range); 866 } else { 867 nouveau_buffer_reallocate(nv->screen, buf, buf->domain); 868 if (ref > 0) /* any references inside context possible ? */ 869 nv->invalidate_resource_storage(nv, &buf->base, ref); 870 } 871 } 872 873 874 /* Scratch data allocation. */ 875 876 static inline int 877 nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo, 878 unsigned size) 879 { 880 return nouveau_bo_new(nv->screen->device, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 881 4096, size, NULL, pbo); 882 } 883 884 static void 885 nouveau_scratch_unref_bos(void *d) 886 { 887 struct runout *b = d; 888 int i; 889 890 for (i = 0; i < b->nr; ++i) 891 nouveau_bo_ref(NULL, &b->bo[i]); 892 893 FREE(b); 894 } 895 896 void 897 nouveau_scratch_runout_release(struct nouveau_context *nv) 898 { 899 if (!nv->scratch.runout) 900 return; 901 902 if (!nouveau_fence_work(nv->screen->fence.current, nouveau_scratch_unref_bos, 903 nv->scratch.runout)) 904 return; 905 906 nv->scratch.end = 0; 907 nv->scratch.runout = NULL; 908 } 909 910 /* Allocate an extra bo if we can't fit everything we need simultaneously. 911 * (Could happen for very large user arrays.) 912 */ 913 static inline bool 914 nouveau_scratch_runout(struct nouveau_context *nv, unsigned size) 915 { 916 int ret; 917 unsigned n; 918 919 if (nv->scratch.runout) 920 n = nv->scratch.runout->nr; 921 else 922 n = 0; 923 nv->scratch.runout = REALLOC(nv->scratch.runout, n == 0 ? 0 : 924 (sizeof(*nv->scratch.runout) + (n + 0) * sizeof(void *)), 925 sizeof(*nv->scratch.runout) + (n + 1) * sizeof(void *)); 926 nv->scratch.runout->nr = n + 1; 927 nv->scratch.runout->bo[n] = NULL; 928 929 ret = nouveau_scratch_bo_alloc(nv, &nv->scratch.runout->bo[n], size); 930 if (!ret) { 931 ret = nouveau_bo_map(nv->scratch.runout->bo[n], 0, NULL); 932 if (ret) 933 nouveau_bo_ref(NULL, &nv->scratch.runout->bo[--nv->scratch.runout->nr]); 934 } 935 if (!ret) { 936 nv->scratch.current = nv->scratch.runout->bo[n]; 937 nv->scratch.offset = 0; 938 nv->scratch.end = size; 939 nv->scratch.map = nv->scratch.current->map; 940 } 941 return !ret; 942 } 943 944 /* Continue to next scratch buffer, if available (no wrapping, large enough). 945 * Allocate it if it has not yet been created. 946 */ 947 static inline bool 948 nouveau_scratch_next(struct nouveau_context *nv, unsigned size) 949 { 950 struct nouveau_bo *bo; 951 int ret; 952 const unsigned i = (nv->scratch.id + 1) % NOUVEAU_MAX_SCRATCH_BUFS; 953 954 if ((size > nv->scratch.bo_size) || (i == nv->scratch.wrap)) 955 return false; 956 nv->scratch.id = i; 957 958 bo = nv->scratch.bo[i]; 959 if (!bo) { 960 ret = nouveau_scratch_bo_alloc(nv, &bo, nv->scratch.bo_size); 961 if (ret) 962 return false; 963 nv->scratch.bo[i] = bo; 964 } 965 nv->scratch.current = bo; 966 nv->scratch.offset = 0; 967 nv->scratch.end = nv->scratch.bo_size; 968 969 ret = nouveau_bo_map(bo, NOUVEAU_BO_WR, nv->client); 970 if (!ret) 971 nv->scratch.map = bo->map; 972 return !ret; 973 } 974 975 static bool 976 nouveau_scratch_more(struct nouveau_context *nv, unsigned min_size) 977 { 978 bool ret; 979 980 ret = nouveau_scratch_next(nv, min_size); 981 if (!ret) 982 ret = nouveau_scratch_runout(nv, min_size); 983 return ret; 984 } 985 986 987 /* Copy data to a scratch buffer and return address & bo the data resides in. */ 988 uint64_t 989 nouveau_scratch_data(struct nouveau_context *nv, 990 const void *data, unsigned base, unsigned size, 991 struct nouveau_bo **bo) 992 { 993 unsigned bgn = MAX2(base, nv->scratch.offset); 994 unsigned end = bgn + size; 995 996 if (end >= nv->scratch.end) { 997 end = base + size; 998 if (!nouveau_scratch_more(nv, end)) 999 return 0; 1000 bgn = base; 1001 } 1002 nv->scratch.offset = align(end, 4); 1003 1004 memcpy(nv->scratch.map + bgn, (const uint8_t *)data + base, size); 1005 1006 *bo = nv->scratch.current; 1007 return (*bo)->offset + (bgn - base); 1008 } 1009 1010 void * 1011 nouveau_scratch_get(struct nouveau_context *nv, 1012 unsigned size, uint64_t *gpu_addr, struct nouveau_bo **pbo) 1013 { 1014 unsigned bgn = nv->scratch.offset; 1015 unsigned end = nv->scratch.offset + size; 1016 1017 if (end >= nv->scratch.end) { 1018 end = size; 1019 if (!nouveau_scratch_more(nv, end)) 1020 return NULL; 1021 bgn = 0; 1022 } 1023 nv->scratch.offset = align(end, 4); 1024 1025 *pbo = nv->scratch.current; 1026 *gpu_addr = nv->scratch.current->offset + bgn; 1027 return nv->scratch.map + bgn; 1028 } 1029