1 /* 2 * Copyright 2007 Red Hat Inc. 3 * Copyright 2007-2017 Intel Corporation 4 * Copyright 2006 VMware, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 * IN THE SOFTWARE. 25 */ 26 27 /* 28 * Authors: Thomas Hellstrm <thellstrom (at) vmware.com> 29 * Keith Whitwell <keithw (at) vmware.com> 30 * Eric Anholt <eric (at) anholt.net> 31 * Dave Airlie <airlied (at) linux.ie> 32 */ 33 34 #ifdef HAVE_CONFIG_H 35 #include "config.h" 36 #endif 37 38 #include <xf86drm.h> 39 #include <util/u_atomic.h> 40 #include <fcntl.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <unistd.h> 45 #include <assert.h> 46 #include <sys/ioctl.h> 47 #include <sys/stat.h> 48 #include <sys/types.h> 49 #include <stdbool.h> 50 51 #include "errno.h" 52 #ifndef ETIME 53 #define ETIME ETIMEDOUT 54 #endif 55 #include "common/gen_clflush.h" 56 #include "common/gen_debug.h" 57 #include "common/gen_device_info.h" 58 #include "libdrm_macros.h" 59 #include "main/macros.h" 60 #include "util/macros.h" 61 #include "util/hash_table.h" 62 #include "util/list.h" 63 #include "brw_bufmgr.h" 64 #include "brw_context.h" 65 #include "string.h" 66 67 #include "i915_drm.h" 68 69 #ifdef HAVE_VALGRIND 70 #include <valgrind.h> 71 #include <memcheck.h> 72 #define VG(x) x 73 #else 74 #define VG(x) 75 #endif 76 77 /* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier 78 * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is 79 * leaked. All because it does not call VG(cli_free) from its 80 * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like 81 * and allocation, we mark it available for use upon mmapping and remove 82 * it upon unmapping. 83 */ 84 #define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size)) 85 #define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size)) 86 87 #define PAGE_SIZE 4096 88 89 #define FILE_DEBUG_FLAG DEBUG_BUFMGR 90 91 static inline int 92 atomic_add_unless(int *v, int add, int unless) 93 { 94 int c, old; 95 c = p_atomic_read(v); 96 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c) 97 c = old; 98 return c == unless; 99 } 100 101 struct bo_cache_bucket { 102 struct list_head head; 103 uint64_t size; 104 }; 105 106 struct brw_bufmgr { 107 int fd; 108 109 mtx_t lock; 110 111 /** Array of lists of cached gem objects of power-of-two sizes */ 112 struct bo_cache_bucket cache_bucket[14 * 4]; 113 int num_buckets; 114 time_t time; 115 116 struct hash_table *name_table; 117 struct hash_table *handle_table; 118 119 bool has_llc:1; 120 bool has_mmap_wc:1; 121 bool bo_reuse:1; 122 }; 123 124 static int bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode, 125 uint32_t stride); 126 127 static void bo_free(struct brw_bo *bo); 128 129 static uint32_t 130 key_hash_uint(const void *key) 131 { 132 return _mesa_hash_data(key, 4); 133 } 134 135 static bool 136 key_uint_equal(const void *a, const void *b) 137 { 138 return *((unsigned *) a) == *((unsigned *) b); 139 } 140 141 static struct brw_bo * 142 hash_find_bo(struct hash_table *ht, unsigned int key) 143 { 144 struct hash_entry *entry = _mesa_hash_table_search(ht, &key); 145 return entry ? (struct brw_bo *) entry->data : NULL; 146 } 147 148 static uint64_t 149 bo_tile_size(struct brw_bufmgr *bufmgr, uint64_t size, uint32_t tiling) 150 { 151 if (tiling == I915_TILING_NONE) 152 return size; 153 154 /* 965+ just need multiples of page size for tiling */ 155 return ALIGN(size, 4096); 156 } 157 158 /* 159 * Round a given pitch up to the minimum required for X tiling on a 160 * given chip. We use 512 as the minimum to allow for a later tiling 161 * change. 162 */ 163 static uint32_t 164 bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling) 165 { 166 unsigned long tile_width; 167 168 /* If untiled, then just align it so that we can do rendering 169 * to it with the 3D engine. 170 */ 171 if (tiling == I915_TILING_NONE) 172 return ALIGN(pitch, 64); 173 174 if (tiling == I915_TILING_X) 175 tile_width = 512; 176 else 177 tile_width = 128; 178 179 /* 965 is flexible */ 180 return ALIGN(pitch, tile_width); 181 } 182 183 /** 184 * This function finds the correct bucket fit for the input size. 185 * The function works with O(1) complexity when the requested size 186 * was queried instead of iterating the size through all the buckets. 187 */ 188 static struct bo_cache_bucket * 189 bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) 190 { 191 /* Calculating the pages and rounding up to the page size. */ 192 const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; 193 194 /* Row Bucket sizes clz((x-1) | 3) Row Column 195 * in pages stride size 196 * 0: 1 2 3 4 -> 30 30 30 30 4 1 197 * 1: 5 6 7 8 -> 29 29 29 29 4 1 198 * 2: 10 12 14 16 -> 28 28 28 28 8 2 199 * 3: 20 24 28 32 -> 27 27 27 27 16 4 200 */ 201 const unsigned row = 30 - __builtin_clz((pages - 1) | 3); 202 const unsigned row_max_pages = 4 << row; 203 204 /* The '& ~2' is the special case for row 1. In row 1, max pages / 205 * 2 is 2, but the previous row maximum is zero (because there is 206 * no previous row). All row maximum sizes are power of 2, so that 207 * is the only case where that bit will be set. 208 */ 209 const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2; 210 int col_size_log2 = row - 1; 211 col_size_log2 += (col_size_log2 < 0); 212 213 const unsigned col = (pages - prev_row_max_pages + 214 ((1 << col_size_log2) - 1)) >> col_size_log2; 215 216 /* Calculating the index based on the row and column. */ 217 const unsigned index = (row * 4) + (col - 1); 218 219 return (index < bufmgr->num_buckets) ? 220 &bufmgr->cache_bucket[index] : NULL; 221 } 222 223 int 224 brw_bo_busy(struct brw_bo *bo) 225 { 226 struct brw_bufmgr *bufmgr = bo->bufmgr; 227 struct drm_i915_gem_busy busy = { .handle = bo->gem_handle }; 228 229 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 230 if (ret == 0) { 231 bo->idle = !busy.busy; 232 return busy.busy; 233 } 234 return false; 235 } 236 237 int 238 brw_bo_madvise(struct brw_bo *bo, int state) 239 { 240 struct drm_i915_gem_madvise madv = { 241 .handle = bo->gem_handle, 242 .madv = state, 243 .retained = 1, 244 }; 245 246 drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 247 248 return madv.retained; 249 } 250 251 /* drop the oldest entries that have been purged by the kernel */ 252 static void 253 brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr, 254 struct bo_cache_bucket *bucket) 255 { 256 list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) { 257 if (brw_bo_madvise(bo, I915_MADV_DONTNEED)) 258 break; 259 260 list_del(&bo->head); 261 bo_free(bo); 262 } 263 } 264 265 static struct brw_bo * 266 bo_alloc_internal(struct brw_bufmgr *bufmgr, 267 const char *name, 268 uint64_t size, 269 unsigned flags, 270 uint32_t tiling_mode, 271 uint32_t stride, uint64_t alignment) 272 { 273 struct brw_bo *bo; 274 unsigned int page_size = getpagesize(); 275 int ret; 276 struct bo_cache_bucket *bucket; 277 bool alloc_from_cache; 278 uint64_t bo_size; 279 bool busy = false; 280 bool zeroed = false; 281 282 if (flags & BO_ALLOC_BUSY) 283 busy = true; 284 285 if (flags & BO_ALLOC_ZEROED) 286 zeroed = true; 287 288 /* BUSY does doesn't really jive with ZEROED as we have to wait for it to 289 * be idle before we can memset. Just disallow that combination. 290 */ 291 assert(!(busy && zeroed)); 292 293 /* Round the allocated size up to a power of two number of pages. */ 294 bucket = bucket_for_size(bufmgr, size); 295 296 /* If we don't have caching at this size, don't actually round the 297 * allocation up. 298 */ 299 if (bucket == NULL) { 300 bo_size = size; 301 if (bo_size < page_size) 302 bo_size = page_size; 303 } else { 304 bo_size = bucket->size; 305 } 306 307 mtx_lock(&bufmgr->lock); 308 /* Get a buffer out of the cache if available */ 309 retry: 310 alloc_from_cache = false; 311 if (bucket != NULL && !list_empty(&bucket->head)) { 312 if (busy && !zeroed) { 313 /* Allocate new render-target BOs from the tail (MRU) 314 * of the list, as it will likely be hot in the GPU 315 * cache and in the aperture for us. If the caller 316 * asked us to zero the buffer, we don't want this 317 * because we are going to mmap it. 318 */ 319 bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head); 320 list_del(&bo->head); 321 alloc_from_cache = true; 322 bo->align = alignment; 323 } else { 324 assert(alignment == 0); 325 /* For non-render-target BOs (where we're probably 326 * going to map it first thing in order to fill it 327 * with data), check if the last BO in the cache is 328 * unbusy, and only reuse in that case. Otherwise, 329 * allocating a new buffer is probably faster than 330 * waiting for the GPU to finish. 331 */ 332 bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head); 333 if (!brw_bo_busy(bo)) { 334 alloc_from_cache = true; 335 list_del(&bo->head); 336 } 337 } 338 339 if (alloc_from_cache) { 340 if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) { 341 bo_free(bo); 342 brw_bo_cache_purge_bucket(bufmgr, bucket); 343 goto retry; 344 } 345 346 if (bo_set_tiling_internal(bo, tiling_mode, stride)) { 347 bo_free(bo); 348 goto retry; 349 } 350 351 if (zeroed) { 352 void *map = brw_bo_map(NULL, bo, MAP_WRITE | MAP_RAW); 353 if (!map) { 354 bo_free(bo); 355 goto retry; 356 } 357 memset(map, 0, bo_size); 358 } 359 } 360 } 361 362 if (!alloc_from_cache) { 363 bo = calloc(1, sizeof(*bo)); 364 if (!bo) 365 goto err; 366 367 bo->size = bo_size; 368 bo->idle = true; 369 370 struct drm_i915_gem_create create = { .size = bo_size }; 371 372 /* All new BOs we get from the kernel are zeroed, so we don't need to 373 * worry about that here. 374 */ 375 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create); 376 if (ret != 0) { 377 free(bo); 378 goto err; 379 } 380 381 bo->gem_handle = create.handle; 382 383 bo->bufmgr = bufmgr; 384 bo->align = alignment; 385 386 bo->tiling_mode = I915_TILING_NONE; 387 bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 388 bo->stride = 0; 389 390 if (bo_set_tiling_internal(bo, tiling_mode, stride)) 391 goto err_free; 392 393 /* Calling set_domain() will allocate pages for the BO outside of the 394 * struct mutex lock in the kernel, which is more efficient than waiting 395 * to create them during the first execbuf that uses the BO. 396 */ 397 struct drm_i915_gem_set_domain sd = { 398 .handle = bo->gem_handle, 399 .read_domains = I915_GEM_DOMAIN_CPU, 400 .write_domain = 0, 401 }; 402 403 if (drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0) 404 goto err_free; 405 } 406 407 bo->name = name; 408 p_atomic_set(&bo->refcount, 1); 409 bo->reusable = true; 410 bo->cache_coherent = bufmgr->has_llc; 411 bo->index = -1; 412 413 mtx_unlock(&bufmgr->lock); 414 415 DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name, 416 (unsigned long long) size); 417 418 return bo; 419 420 err_free: 421 bo_free(bo); 422 err: 423 mtx_unlock(&bufmgr->lock); 424 return NULL; 425 } 426 427 struct brw_bo * 428 brw_bo_alloc(struct brw_bufmgr *bufmgr, 429 const char *name, uint64_t size, uint64_t alignment) 430 { 431 return bo_alloc_internal(bufmgr, name, size, 0, I915_TILING_NONE, 0, 0); 432 } 433 434 struct brw_bo * 435 brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr, const char *name, 436 uint64_t size, uint32_t tiling_mode, uint32_t pitch, 437 unsigned flags) 438 { 439 return bo_alloc_internal(bufmgr, name, size, flags, tiling_mode, pitch, 0); 440 } 441 442 struct brw_bo * 443 brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char *name, 444 int x, int y, int cpp, uint32_t tiling, 445 uint32_t *pitch, unsigned flags) 446 { 447 uint64_t size; 448 uint32_t stride; 449 unsigned long aligned_y, height_alignment; 450 451 /* If we're tiled, our allocations are in 8 or 32-row blocks, 452 * so failure to align our height means that we won't allocate 453 * enough pages. 454 * 455 * If we're untiled, we still have to align to 2 rows high 456 * because the data port accesses 2x2 blocks even if the 457 * bottom row isn't to be rendered, so failure to align means 458 * we could walk off the end of the GTT and fault. This is 459 * documented on 965, and may be the case on older chipsets 460 * too so we try to be careful. 461 */ 462 aligned_y = y; 463 height_alignment = 2; 464 465 if (tiling == I915_TILING_X) 466 height_alignment = 8; 467 else if (tiling == I915_TILING_Y) 468 height_alignment = 32; 469 aligned_y = ALIGN(y, height_alignment); 470 471 stride = x * cpp; 472 stride = bo_tile_pitch(bufmgr, stride, tiling); 473 size = stride * aligned_y; 474 size = bo_tile_size(bufmgr, size, tiling); 475 *pitch = stride; 476 477 if (tiling == I915_TILING_NONE) 478 stride = 0; 479 480 return bo_alloc_internal(bufmgr, name, size, flags, tiling, stride, 0); 481 } 482 483 /** 484 * Returns a brw_bo wrapping the given buffer object handle. 485 * 486 * This can be used when one application needs to pass a buffer object 487 * to another. 488 */ 489 struct brw_bo * 490 brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr, 491 const char *name, unsigned int handle) 492 { 493 struct brw_bo *bo; 494 495 /* At the moment most applications only have a few named bo. 496 * For instance, in a DRI client only the render buffers passed 497 * between X and the client are named. And since X returns the 498 * alternating names for the front/back buffer a linear search 499 * provides a sufficiently fast match. 500 */ 501 mtx_lock(&bufmgr->lock); 502 bo = hash_find_bo(bufmgr->name_table, handle); 503 if (bo) { 504 brw_bo_reference(bo); 505 goto out; 506 } 507 508 struct drm_gem_open open_arg = { .name = handle }; 509 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg); 510 if (ret != 0) { 511 DBG("Couldn't reference %s handle 0x%08x: %s\n", 512 name, handle, strerror(errno)); 513 bo = NULL; 514 goto out; 515 } 516 /* Now see if someone has used a prime handle to get this 517 * object from the kernel before by looking through the list 518 * again for a matching gem_handle 519 */ 520 bo = hash_find_bo(bufmgr->handle_table, open_arg.handle); 521 if (bo) { 522 brw_bo_reference(bo); 523 goto out; 524 } 525 526 bo = calloc(1, sizeof(*bo)); 527 if (!bo) 528 goto out; 529 530 p_atomic_set(&bo->refcount, 1); 531 532 bo->size = open_arg.size; 533 bo->gtt_offset = 0; 534 bo->bufmgr = bufmgr; 535 bo->gem_handle = open_arg.handle; 536 bo->name = name; 537 bo->global_name = handle; 538 bo->reusable = false; 539 bo->external = true; 540 541 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 542 _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); 543 544 struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; 545 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); 546 if (ret != 0) 547 goto err_unref; 548 549 bo->tiling_mode = get_tiling.tiling_mode; 550 bo->swizzle_mode = get_tiling.swizzle_mode; 551 /* XXX stride is unknown */ 552 DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name); 553 554 out: 555 mtx_unlock(&bufmgr->lock); 556 return bo; 557 558 err_unref: 559 bo_free(bo); 560 mtx_unlock(&bufmgr->lock); 561 return NULL; 562 } 563 564 static void 565 bo_free(struct brw_bo *bo) 566 { 567 struct brw_bufmgr *bufmgr = bo->bufmgr; 568 569 if (bo->map_cpu) { 570 VG_NOACCESS(bo->map_cpu, bo->size); 571 drm_munmap(bo->map_cpu, bo->size); 572 } 573 if (bo->map_wc) { 574 VG_NOACCESS(bo->map_wc, bo->size); 575 drm_munmap(bo->map_wc, bo->size); 576 } 577 if (bo->map_gtt) { 578 VG_NOACCESS(bo->map_gtt, bo->size); 579 drm_munmap(bo->map_gtt, bo->size); 580 } 581 582 if (bo->external) { 583 struct hash_entry *entry; 584 585 if (bo->global_name) { 586 entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name); 587 _mesa_hash_table_remove(bufmgr->name_table, entry); 588 } 589 590 entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle); 591 _mesa_hash_table_remove(bufmgr->handle_table, entry); 592 } 593 594 /* Close this object */ 595 struct drm_gem_close close = { .handle = bo->gem_handle }; 596 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close); 597 if (ret != 0) { 598 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 599 bo->gem_handle, bo->name, strerror(errno)); 600 } 601 free(bo); 602 } 603 604 /** Frees all cached buffers significantly older than @time. */ 605 static void 606 cleanup_bo_cache(struct brw_bufmgr *bufmgr, time_t time) 607 { 608 int i; 609 610 if (bufmgr->time == time) 611 return; 612 613 for (i = 0; i < bufmgr->num_buckets; i++) { 614 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; 615 616 list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) { 617 if (time - bo->free_time <= 1) 618 break; 619 620 list_del(&bo->head); 621 622 bo_free(bo); 623 } 624 } 625 626 bufmgr->time = time; 627 } 628 629 static void 630 bo_unreference_final(struct brw_bo *bo, time_t time) 631 { 632 struct brw_bufmgr *bufmgr = bo->bufmgr; 633 struct bo_cache_bucket *bucket; 634 635 DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name); 636 637 bucket = bucket_for_size(bufmgr, bo->size); 638 /* Put the buffer into our internal cache for reuse if we can. */ 639 if (bufmgr->bo_reuse && bo->reusable && bucket != NULL && 640 brw_bo_madvise(bo, I915_MADV_DONTNEED)) { 641 bo->free_time = time; 642 643 bo->name = NULL; 644 bo->kflags = 0; 645 646 list_addtail(&bo->head, &bucket->head); 647 } else { 648 bo_free(bo); 649 } 650 } 651 652 void 653 brw_bo_unreference(struct brw_bo *bo) 654 { 655 if (bo == NULL) 656 return; 657 658 assert(p_atomic_read(&bo->refcount) > 0); 659 660 if (atomic_add_unless(&bo->refcount, -1, 1)) { 661 struct brw_bufmgr *bufmgr = bo->bufmgr; 662 struct timespec time; 663 664 clock_gettime(CLOCK_MONOTONIC, &time); 665 666 mtx_lock(&bufmgr->lock); 667 668 if (p_atomic_dec_zero(&bo->refcount)) { 669 bo_unreference_final(bo, time.tv_sec); 670 cleanup_bo_cache(bufmgr, time.tv_sec); 671 } 672 673 mtx_unlock(&bufmgr->lock); 674 } 675 } 676 677 static void 678 bo_wait_with_stall_warning(struct brw_context *brw, 679 struct brw_bo *bo, 680 const char *action) 681 { 682 bool busy = brw && brw->perf_debug && !bo->idle; 683 double elapsed = unlikely(busy) ? -get_time() : 0.0; 684 685 brw_bo_wait_rendering(bo); 686 687 if (unlikely(busy)) { 688 elapsed += get_time(); 689 if (elapsed > 1e-5) /* 0.01ms */ 690 perf_debug("%s a busy \"%s\" BO stalled and took %.03f ms.\n", 691 action, bo->name, elapsed * 1000); 692 } 693 } 694 695 static void 696 print_flags(unsigned flags) 697 { 698 if (flags & MAP_READ) 699 DBG("READ "); 700 if (flags & MAP_WRITE) 701 DBG("WRITE "); 702 if (flags & MAP_ASYNC) 703 DBG("ASYNC "); 704 if (flags & MAP_PERSISTENT) 705 DBG("PERSISTENT "); 706 if (flags & MAP_COHERENT) 707 DBG("COHERENT "); 708 if (flags & MAP_RAW) 709 DBG("RAW "); 710 DBG("\n"); 711 } 712 713 static void * 714 brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags) 715 { 716 struct brw_bufmgr *bufmgr = bo->bufmgr; 717 718 /* We disallow CPU maps for writing to non-coherent buffers, as the 719 * CPU map can become invalidated when a batch is flushed out, which 720 * can happen at unpredictable times. You should use WC maps instead. 721 */ 722 assert(bo->cache_coherent || !(flags & MAP_WRITE)); 723 724 if (!bo->map_cpu) { 725 DBG("brw_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name); 726 727 struct drm_i915_gem_mmap mmap_arg = { 728 .handle = bo->gem_handle, 729 .size = bo->size, 730 }; 731 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); 732 if (ret != 0) { 733 ret = -errno; 734 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 735 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 736 return NULL; 737 } 738 void *map = (void *) (uintptr_t) mmap_arg.addr_ptr; 739 VG_DEFINED(map, bo->size); 740 741 if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) { 742 VG_NOACCESS(map, bo->size); 743 drm_munmap(map, bo->size); 744 } 745 } 746 assert(bo->map_cpu); 747 748 DBG("brw_bo_map_cpu: %d (%s) -> %p, ", bo->gem_handle, bo->name, 749 bo->map_cpu); 750 print_flags(flags); 751 752 if (!(flags & MAP_ASYNC)) { 753 bo_wait_with_stall_warning(brw, bo, "CPU mapping"); 754 } 755 756 if (!bo->cache_coherent && !bo->bufmgr->has_llc) { 757 /* If we're reusing an existing CPU mapping, the CPU caches may 758 * contain stale data from the last time we read from that mapping. 759 * (With the BO cache, it might even be data from a previous buffer!) 760 * Even if it's a brand new mapping, the kernel may have zeroed the 761 * buffer via CPU writes. 762 * 763 * We need to invalidate those cachelines so that we see the latest 764 * contents, and so long as we only read from the CPU mmap we do not 765 * need to write those cachelines back afterwards. 766 * 767 * On LLC, the emprical evidence suggests that writes from the GPU 768 * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU 769 * cachelines. (Other reads, such as the display engine, bypass the 770 * LLC entirely requiring us to keep dirty pixels for the scanout 771 * out of any cache.) 772 */ 773 gen_invalidate_range(bo->map_cpu, bo->size); 774 } 775 776 return bo->map_cpu; 777 } 778 779 static void * 780 brw_bo_map_wc(struct brw_context *brw, struct brw_bo *bo, unsigned flags) 781 { 782 struct brw_bufmgr *bufmgr = bo->bufmgr; 783 784 if (!bufmgr->has_mmap_wc) 785 return NULL; 786 787 if (!bo->map_wc) { 788 DBG("brw_bo_map_wc: %d (%s)\n", bo->gem_handle, bo->name); 789 790 struct drm_i915_gem_mmap mmap_arg = { 791 .handle = bo->gem_handle, 792 .size = bo->size, 793 .flags = I915_MMAP_WC, 794 }; 795 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); 796 if (ret != 0) { 797 ret = -errno; 798 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 799 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 800 return NULL; 801 } 802 803 void *map = (void *) (uintptr_t) mmap_arg.addr_ptr; 804 VG_DEFINED(map, bo->size); 805 806 if (p_atomic_cmpxchg(&bo->map_wc, NULL, map)) { 807 VG_NOACCESS(map, bo->size); 808 drm_munmap(map, bo->size); 809 } 810 } 811 assert(bo->map_wc); 812 813 DBG("brw_bo_map_wc: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_wc); 814 print_flags(flags); 815 816 if (!(flags & MAP_ASYNC)) { 817 bo_wait_with_stall_warning(brw, bo, "WC mapping"); 818 } 819 820 return bo->map_wc; 821 } 822 823 /** 824 * Perform an uncached mapping via the GTT. 825 * 826 * Write access through the GTT is not quite fully coherent. On low power 827 * systems especially, like modern Atoms, we can observe reads from RAM before 828 * the write via GTT has landed. A write memory barrier that flushes the Write 829 * Combining Buffer (i.e. sfence/mfence) is not sufficient to order the later 830 * read after the write as the GTT write suffers a small delay through the GTT 831 * indirection. The kernel uses an uncached mmio read to ensure the GTT write 832 * is ordered with reads (either by the GPU, WB or WC) and unconditionally 833 * flushes prior to execbuf submission. However, if we are not informing the 834 * kernel about our GTT writes, it will not flush before earlier access, such 835 * as when using the cmdparser. Similarly, we need to be careful if we should 836 * ever issue a CPU read immediately following a GTT write. 837 * 838 * Telling the kernel about write access also has one more important 839 * side-effect. Upon receiving notification about the write, it cancels any 840 * scanout buffering for FBC/PSR and friends. Later FBC/PSR is then flushed by 841 * either SW_FINISH or DIRTYFB. The presumption is that we never write to the 842 * actual scanout via a mmaping, only to a backbuffer and so all the FBC/PSR 843 * tracking is handled on the buffer exchange instead. 844 */ 845 static void * 846 brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags) 847 { 848 struct brw_bufmgr *bufmgr = bo->bufmgr; 849 850 /* Get a mapping of the buffer if we haven't before. */ 851 if (bo->map_gtt == NULL) { 852 DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name); 853 854 struct drm_i915_gem_mmap_gtt mmap_arg = { .handle = bo->gem_handle }; 855 856 /* Get the fake offset back... */ 857 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg); 858 if (ret != 0) { 859 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 860 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 861 return NULL; 862 } 863 864 /* and mmap it. */ 865 void *map = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, 866 MAP_SHARED, bufmgr->fd, mmap_arg.offset); 867 if (map == MAP_FAILED) { 868 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 869 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 870 return NULL; 871 } 872 873 /* We don't need to use VALGRIND_MALLOCLIKE_BLOCK because Valgrind will 874 * already intercept this mmap call. However, for consistency between 875 * all the mmap paths, we mark the pointer as defined now and mark it 876 * as inaccessible afterwards. 877 */ 878 VG_DEFINED(map, bo->size); 879 880 if (p_atomic_cmpxchg(&bo->map_gtt, NULL, map)) { 881 VG_NOACCESS(map, bo->size); 882 drm_munmap(map, bo->size); 883 } 884 } 885 assert(bo->map_gtt); 886 887 DBG("bo_map_gtt: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_gtt); 888 print_flags(flags); 889 890 if (!(flags & MAP_ASYNC)) { 891 bo_wait_with_stall_warning(brw, bo, "GTT mapping"); 892 } 893 894 return bo->map_gtt; 895 } 896 897 static bool 898 can_map_cpu(struct brw_bo *bo, unsigned flags) 899 { 900 if (bo->cache_coherent) 901 return true; 902 903 /* Even if the buffer itself is not cache-coherent (such as a scanout), on 904 * an LLC platform reads always are coherent (as they are performed via the 905 * central system agent). It is just the writes that we need to take special 906 * care to ensure that land in main memory and not stick in the CPU cache. 907 */ 908 if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc) 909 return true; 910 911 /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid 912 * across batch flushes where the kernel will change cache domains of the 913 * bo, invalidating continued access to the CPU mmap on non-LLC device. 914 * 915 * Similarly, ASYNC typically means that the buffer will be accessed via 916 * both the CPU and the GPU simultaneously. Batches may be executed that 917 * use the BO even while it is mapped. While OpenGL technically disallows 918 * most drawing while non-persistent mappings are active, we may still use 919 * the GPU for blits or other operations, causing batches to happen at 920 * inconvenient times. 921 */ 922 if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC)) 923 return false; 924 925 return !(flags & MAP_WRITE); 926 } 927 928 void * 929 brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags) 930 { 931 if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW)) 932 return brw_bo_map_gtt(brw, bo, flags); 933 934 void *map; 935 936 if (can_map_cpu(bo, flags)) 937 map = brw_bo_map_cpu(brw, bo, flags); 938 else 939 map = brw_bo_map_wc(brw, bo, flags); 940 941 /* Allow the attempt to fail by falling back to the GTT where necessary. 942 * 943 * Not every buffer can be mmaped directly using the CPU (or WC), for 944 * example buffers that wrap stolen memory or are imported from other 945 * devices. For those, we have little choice but to use a GTT mmapping. 946 * However, if we use a slow GTT mmapping for reads where we expected fast 947 * access, that order of magnitude difference in throughput will be clearly 948 * expressed by angry users. 949 * 950 * We skip MAP_RAW because we want to avoid map_gtt's fence detiling. 951 */ 952 if (!map && !(flags & MAP_RAW)) { 953 if (brw) { 954 perf_debug("Fallback GTT mapping for %s with access flags %x\n", 955 bo->name, flags); 956 } 957 map = brw_bo_map_gtt(brw, bo, flags); 958 } 959 960 return map; 961 } 962 963 int 964 brw_bo_subdata(struct brw_bo *bo, uint64_t offset, 965 uint64_t size, const void *data) 966 { 967 struct brw_bufmgr *bufmgr = bo->bufmgr; 968 969 struct drm_i915_gem_pwrite pwrite = { 970 .handle = bo->gem_handle, 971 .offset = offset, 972 .size = size, 973 .data_ptr = (uint64_t) (uintptr_t) data, 974 }; 975 976 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); 977 if (ret != 0) { 978 ret = -errno; 979 DBG("%s:%d: Error writing data to buffer %d: " 980 "(%"PRIu64" %"PRIu64") %s .\n", 981 __FILE__, __LINE__, bo->gem_handle, offset, size, strerror(errno)); 982 } 983 984 return ret; 985 } 986 987 /** Waits for all GPU rendering with the object to have completed. */ 988 void 989 brw_bo_wait_rendering(struct brw_bo *bo) 990 { 991 /* We require a kernel recent enough for WAIT_IOCTL support. 992 * See intel_init_bufmgr() 993 */ 994 brw_bo_wait(bo, -1); 995 } 996 997 /** 998 * Waits on a BO for the given amount of time. 999 * 1000 * @bo: buffer object to wait for 1001 * @timeout_ns: amount of time to wait in nanoseconds. 1002 * If value is less than 0, an infinite wait will occur. 1003 * 1004 * Returns 0 if the wait was successful ie. the last batch referencing the 1005 * object has completed within the allotted time. Otherwise some negative return 1006 * value describes the error. Of particular interest is -ETIME when the wait has 1007 * failed to yield the desired result. 1008 * 1009 * Similar to brw_bo_wait_rendering except a timeout parameter allows 1010 * the operation to give up after a certain amount of time. Another subtle 1011 * difference is the internal locking semantics are different (this variant does 1012 * not hold the lock for the duration of the wait). This makes the wait subject 1013 * to a larger userspace race window. 1014 * 1015 * The implementation shall wait until the object is no longer actively 1016 * referenced within a batch buffer at the time of the call. The wait will 1017 * not guarantee that the buffer is re-issued via another thread, or an flinked 1018 * handle. Userspace must make sure this race does not occur if such precision 1019 * is important. 1020 * 1021 * Note that some kernels have broken the inifite wait for negative values 1022 * promise, upgrade to latest stable kernels if this is the case. 1023 */ 1024 int 1025 brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns) 1026 { 1027 struct brw_bufmgr *bufmgr = bo->bufmgr; 1028 1029 /* If we know it's idle, don't bother with the kernel round trip */ 1030 if (bo->idle && !bo->external) 1031 return 0; 1032 1033 struct drm_i915_gem_wait wait = { 1034 .bo_handle = bo->gem_handle, 1035 .timeout_ns = timeout_ns, 1036 }; 1037 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 1038 if (ret != 0) 1039 return -errno; 1040 1041 bo->idle = true; 1042 1043 return ret; 1044 } 1045 1046 void 1047 brw_bufmgr_destroy(struct brw_bufmgr *bufmgr) 1048 { 1049 mtx_destroy(&bufmgr->lock); 1050 1051 /* Free any cached buffer objects we were going to reuse */ 1052 for (int i = 0; i < bufmgr->num_buckets; i++) { 1053 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; 1054 1055 list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) { 1056 list_del(&bo->head); 1057 1058 bo_free(bo); 1059 } 1060 } 1061 1062 _mesa_hash_table_destroy(bufmgr->name_table, NULL); 1063 _mesa_hash_table_destroy(bufmgr->handle_table, NULL); 1064 1065 free(bufmgr); 1066 } 1067 1068 static int 1069 bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode, 1070 uint32_t stride) 1071 { 1072 struct brw_bufmgr *bufmgr = bo->bufmgr; 1073 struct drm_i915_gem_set_tiling set_tiling; 1074 int ret; 1075 1076 if (bo->global_name == 0 && 1077 tiling_mode == bo->tiling_mode && stride == bo->stride) 1078 return 0; 1079 1080 memset(&set_tiling, 0, sizeof(set_tiling)); 1081 do { 1082 /* set_tiling is slightly broken and overwrites the 1083 * input on the error path, so we have to open code 1084 * rmIoctl. 1085 */ 1086 set_tiling.handle = bo->gem_handle; 1087 set_tiling.tiling_mode = tiling_mode; 1088 set_tiling.stride = stride; 1089 1090 ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); 1091 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 1092 if (ret == -1) 1093 return -errno; 1094 1095 bo->tiling_mode = set_tiling.tiling_mode; 1096 bo->swizzle_mode = set_tiling.swizzle_mode; 1097 bo->stride = set_tiling.stride; 1098 return 0; 1099 } 1100 1101 int 1102 brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode, 1103 uint32_t *swizzle_mode) 1104 { 1105 *tiling_mode = bo->tiling_mode; 1106 *swizzle_mode = bo->swizzle_mode; 1107 return 0; 1108 } 1109 1110 static struct brw_bo * 1111 brw_bo_gem_create_from_prime_internal(struct brw_bufmgr *bufmgr, int prime_fd, 1112 int tiling_mode, uint32_t stride) 1113 { 1114 uint32_t handle; 1115 struct brw_bo *bo; 1116 1117 mtx_lock(&bufmgr->lock); 1118 int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle); 1119 if (ret) { 1120 DBG("create_from_prime: failed to obtain handle from fd: %s\n", 1121 strerror(errno)); 1122 mtx_unlock(&bufmgr->lock); 1123 return NULL; 1124 } 1125 1126 /* 1127 * See if the kernel has already returned this buffer to us. Just as 1128 * for named buffers, we must not create two bo's pointing at the same 1129 * kernel object 1130 */ 1131 bo = hash_find_bo(bufmgr->handle_table, handle); 1132 if (bo) { 1133 brw_bo_reference(bo); 1134 goto out; 1135 } 1136 1137 bo = calloc(1, sizeof(*bo)); 1138 if (!bo) 1139 goto out; 1140 1141 p_atomic_set(&bo->refcount, 1); 1142 1143 /* Determine size of bo. The fd-to-handle ioctl really should 1144 * return the size, but it doesn't. If we have kernel 3.12 or 1145 * later, we can lseek on the prime fd to get the size. Older 1146 * kernels will just fail, in which case we fall back to the 1147 * provided (estimated or guess size). */ 1148 ret = lseek(prime_fd, 0, SEEK_END); 1149 if (ret != -1) 1150 bo->size = ret; 1151 1152 bo->bufmgr = bufmgr; 1153 1154 bo->gem_handle = handle; 1155 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 1156 1157 bo->name = "prime"; 1158 bo->reusable = false; 1159 bo->external = true; 1160 1161 if (tiling_mode < 0) { 1162 struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; 1163 if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) 1164 goto err; 1165 1166 bo->tiling_mode = get_tiling.tiling_mode; 1167 bo->swizzle_mode = get_tiling.swizzle_mode; 1168 /* XXX stride is unknown */ 1169 } else { 1170 bo_set_tiling_internal(bo, tiling_mode, stride); 1171 } 1172 1173 out: 1174 mtx_unlock(&bufmgr->lock); 1175 return bo; 1176 1177 err: 1178 bo_free(bo); 1179 mtx_unlock(&bufmgr->lock); 1180 return NULL; 1181 } 1182 1183 struct brw_bo * 1184 brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, int prime_fd) 1185 { 1186 return brw_bo_gem_create_from_prime_internal(bufmgr, prime_fd, -1, 0); 1187 } 1188 1189 struct brw_bo * 1190 brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr, int prime_fd, 1191 uint32_t tiling_mode, uint32_t stride) 1192 { 1193 assert(tiling_mode == I915_TILING_NONE || 1194 tiling_mode == I915_TILING_X || 1195 tiling_mode == I915_TILING_Y); 1196 1197 return brw_bo_gem_create_from_prime_internal(bufmgr, prime_fd, 1198 tiling_mode, stride); 1199 } 1200 1201 static void 1202 brw_bo_make_external(struct brw_bo *bo) 1203 { 1204 struct brw_bufmgr *bufmgr = bo->bufmgr; 1205 1206 if (!bo->external) { 1207 mtx_lock(&bufmgr->lock); 1208 if (!bo->external) { 1209 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 1210 bo->external = true; 1211 } 1212 mtx_unlock(&bufmgr->lock); 1213 } 1214 } 1215 1216 int 1217 brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd) 1218 { 1219 struct brw_bufmgr *bufmgr = bo->bufmgr; 1220 1221 brw_bo_make_external(bo); 1222 1223 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle, 1224 DRM_CLOEXEC, prime_fd) != 0) 1225 return -errno; 1226 1227 bo->reusable = false; 1228 1229 return 0; 1230 } 1231 1232 uint32_t 1233 brw_bo_export_gem_handle(struct brw_bo *bo) 1234 { 1235 brw_bo_make_external(bo); 1236 1237 return bo->gem_handle; 1238 } 1239 1240 int 1241 brw_bo_flink(struct brw_bo *bo, uint32_t *name) 1242 { 1243 struct brw_bufmgr *bufmgr = bo->bufmgr; 1244 1245 if (!bo->global_name) { 1246 struct drm_gem_flink flink = { .handle = bo->gem_handle }; 1247 1248 if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink)) 1249 return -errno; 1250 1251 brw_bo_make_external(bo); 1252 mtx_lock(&bufmgr->lock); 1253 if (!bo->global_name) { 1254 bo->global_name = flink.name; 1255 _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); 1256 } 1257 mtx_unlock(&bufmgr->lock); 1258 1259 bo->reusable = false; 1260 } 1261 1262 *name = bo->global_name; 1263 return 0; 1264 } 1265 1266 /** 1267 * Enables unlimited caching of buffer objects for reuse. 1268 * 1269 * This is potentially very memory expensive, as the cache at each bucket 1270 * size is only bounded by how many buffers of that size we've managed to have 1271 * in flight at once. 1272 */ 1273 void 1274 brw_bufmgr_enable_reuse(struct brw_bufmgr *bufmgr) 1275 { 1276 bufmgr->bo_reuse = true; 1277 } 1278 1279 static void 1280 add_bucket(struct brw_bufmgr *bufmgr, int size) 1281 { 1282 unsigned int i = bufmgr->num_buckets; 1283 1284 assert(i < ARRAY_SIZE(bufmgr->cache_bucket)); 1285 1286 list_inithead(&bufmgr->cache_bucket[i].head); 1287 bufmgr->cache_bucket[i].size = size; 1288 bufmgr->num_buckets++; 1289 1290 assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); 1291 assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]); 1292 assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]); 1293 } 1294 1295 static void 1296 init_cache_buckets(struct brw_bufmgr *bufmgr) 1297 { 1298 uint64_t size, cache_max_size = 64 * 1024 * 1024; 1299 1300 /* OK, so power of two buckets was too wasteful of memory. 1301 * Give 3 other sizes between each power of two, to hopefully 1302 * cover things accurately enough. (The alternative is 1303 * probably to just go for exact matching of sizes, and assume 1304 * that for things like composited window resize the tiled 1305 * width/height alignment and rounding of sizes to pages will 1306 * get us useful cache hit rates anyway) 1307 */ 1308 add_bucket(bufmgr, 4096); 1309 add_bucket(bufmgr, 4096 * 2); 1310 add_bucket(bufmgr, 4096 * 3); 1311 1312 /* Initialize the linked lists for BO reuse cache. */ 1313 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 1314 add_bucket(bufmgr, size); 1315 1316 add_bucket(bufmgr, size + size * 1 / 4); 1317 add_bucket(bufmgr, size + size * 2 / 4); 1318 add_bucket(bufmgr, size + size * 3 / 4); 1319 } 1320 } 1321 1322 uint32_t 1323 brw_create_hw_context(struct brw_bufmgr *bufmgr) 1324 { 1325 struct drm_i915_gem_context_create create = { }; 1326 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 1327 if (ret != 0) { 1328 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno)); 1329 return 0; 1330 } 1331 1332 return create.ctx_id; 1333 } 1334 1335 int 1336 brw_hw_context_set_priority(struct brw_bufmgr *bufmgr, 1337 uint32_t ctx_id, 1338 int priority) 1339 { 1340 struct drm_i915_gem_context_param p = { 1341 .ctx_id = ctx_id, 1342 .param = I915_CONTEXT_PARAM_PRIORITY, 1343 .value = priority, 1344 }; 1345 int err; 1346 1347 err = 0; 1348 if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p)) 1349 err = -errno; 1350 1351 return err; 1352 } 1353 1354 void 1355 brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id) 1356 { 1357 struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id }; 1358 1359 if (ctx_id != 0 && 1360 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) { 1361 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 1362 strerror(errno)); 1363 } 1364 } 1365 1366 int 1367 brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset, uint64_t *result) 1368 { 1369 struct drm_i915_reg_read reg_read = { .offset = offset }; 1370 int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, ®_read); 1371 1372 *result = reg_read.val; 1373 return ret; 1374 } 1375 1376 static int 1377 gem_param(int fd, int name) 1378 { 1379 int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */ 1380 1381 struct drm_i915_getparam gp = { .param = name, .value = &v }; 1382 if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) 1383 return -1; 1384 1385 return v; 1386 } 1387 1388 /** 1389 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 1390 * and manage map buffer objections. 1391 * 1392 * \param fd File descriptor of the opened DRM device. 1393 */ 1394 struct brw_bufmgr * 1395 brw_bufmgr_init(struct gen_device_info *devinfo, int fd) 1396 { 1397 struct brw_bufmgr *bufmgr; 1398 1399 bufmgr = calloc(1, sizeof(*bufmgr)); 1400 if (bufmgr == NULL) 1401 return NULL; 1402 1403 /* Handles to buffer objects belong to the device fd and are not 1404 * reference counted by the kernel. If the same fd is used by 1405 * multiple parties (threads sharing the same screen bufmgr, or 1406 * even worse the same device fd passed to multiple libraries) 1407 * ownership of those handles is shared by those independent parties. 1408 * 1409 * Don't do this! Ensure that each library/bufmgr has its own device 1410 * fd so that its namespace does not clash with another. 1411 */ 1412 bufmgr->fd = fd; 1413 1414 if (mtx_init(&bufmgr->lock, mtx_plain) != 0) { 1415 free(bufmgr); 1416 return NULL; 1417 } 1418 1419 bufmgr->has_llc = devinfo->has_llc; 1420 bufmgr->has_mmap_wc = gem_param(fd, I915_PARAM_MMAP_VERSION) > 0; 1421 1422 init_cache_buckets(bufmgr); 1423 1424 bufmgr->name_table = 1425 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal); 1426 bufmgr->handle_table = 1427 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal); 1428 1429 return bufmgr; 1430 } 1431