1 /************************************************************************** 2 * 3 * Copyright 2007 Red Hat Inc. 4 * Copyright 2007 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30 /* 31 * Authors: Thomas Hellstrm <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric (at) anholt.net> 34 * Dave Airlie <airlied (at) linux.ie> 35 */ 36 37 #ifdef HAVE_CONFIG_H 38 #include "config.h" 39 #endif 40 41 #include <xf86drm.h> 42 #include <fcntl.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <unistd.h> 47 #include <assert.h> 48 #include <pthread.h> 49 #include <sys/ioctl.h> 50 #include <sys/mman.h> 51 #include <sys/stat.h> 52 #include <sys/types.h> 53 54 #include "errno.h" 55 #include "libdrm_lists.h" 56 #include "intel_bufmgr.h" 57 #include "intel_bufmgr_priv.h" 58 #include "intel_chipset.h" 59 #include "string.h" 60 61 #include "i915_drm.h" 62 63 #define DBG(...) do { \ 64 if (bufmgr_gem->bufmgr.debug) \ 65 fprintf(stderr, __VA_ARGS__); \ 66 } while (0) 67 68 typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 69 70 struct drm_intel_gem_bo_bucket { 71 drmMMListHead head; 72 unsigned long size; 73 }; 74 75 /* Only cache objects up to 64MB. Bigger than that, and the rounding of the 76 * size makes many operations fail that wouldn't otherwise. 77 */ 78 #define DRM_INTEL_GEM_BO_BUCKETS 14 79 typedef struct _drm_intel_bufmgr_gem { 80 drm_intel_bufmgr bufmgr; 81 82 int fd; 83 84 int max_relocs; 85 86 pthread_mutex_t lock; 87 88 struct drm_i915_gem_exec_object *exec_objects; 89 drm_intel_bo **exec_bos; 90 int exec_size; 91 int exec_count; 92 93 /** Array of lists of cached gem objects of power-of-two sizes */ 94 struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS]; 95 96 uint64_t gtt_size; 97 int available_fences; 98 int pci_device; 99 char bo_reuse; 100 } drm_intel_bufmgr_gem; 101 102 struct _drm_intel_bo_gem { 103 drm_intel_bo bo; 104 105 int refcount; 106 /** Boolean whether the mmap ioctl has been called for this buffer yet. */ 107 uint32_t gem_handle; 108 const char *name; 109 110 /** 111 * Kenel-assigned global name for this object 112 */ 113 unsigned int global_name; 114 115 /** 116 * Index of the buffer within the validation list while preparing a 117 * batchbuffer execution. 118 */ 119 int validate_index; 120 121 /** 122 * Current tiling mode 123 */ 124 uint32_t tiling_mode; 125 uint32_t swizzle_mode; 126 127 time_t free_time; 128 129 /** Array passed to the DRM containing relocation information. */ 130 struct drm_i915_gem_relocation_entry *relocs; 131 /** Array of bos corresponding to relocs[i].target_handle */ 132 drm_intel_bo **reloc_target_bo; 133 /** Number of entries in relocs */ 134 int reloc_count; 135 /** Mapped address for the buffer, saved across map/unmap cycles */ 136 void *mem_virtual; 137 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 138 void *gtt_virtual; 139 140 /** BO cache list */ 141 drmMMListHead head; 142 143 /** 144 * Boolean of whether this BO and its children have been included in 145 * the current drm_intel_bufmgr_check_aperture_space() total. 146 */ 147 char included_in_check_aperture; 148 149 /** 150 * Boolean of whether this buffer has been used as a relocation 151 * target and had its size accounted for, and thus can't have any 152 * further relocations added to it. 153 */ 154 char used_as_reloc_target; 155 156 /** 157 * Boolean of whether this buffer can be re-used 158 */ 159 char reusable; 160 161 /** 162 * Size in bytes of this buffer and its relocation descendents. 163 * 164 * Used to avoid costly tree walking in drm_intel_bufmgr_check_aperture in 165 * the common case. 166 */ 167 int reloc_tree_size; 168 /** 169 * Number of potential fence registers required by this buffer and its 170 * relocations. 171 */ 172 int reloc_tree_fences; 173 }; 174 175 static void drm_intel_gem_bo_reference_locked(drm_intel_bo *bo); 176 177 static unsigned int 178 drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count); 179 180 static unsigned int 181 drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count); 182 183 static int 184 drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t *tiling_mode, 185 uint32_t *swizzle_mode); 186 187 static int 188 drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode, 189 uint32_t stride); 190 191 static void 192 drm_intel_gem_bo_unreference(drm_intel_bo *bo); 193 194 static struct drm_intel_gem_bo_bucket * 195 drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 196 unsigned long size) 197 { 198 int i; 199 200 for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 201 struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i]; 202 if (bucket->size >= size) { 203 return bucket; 204 } 205 } 206 207 return NULL; 208 } 209 210 static void drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 211 { 212 int i, j; 213 214 for (i = 0; i < bufmgr_gem->exec_count; i++) { 215 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 216 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 217 218 if (bo_gem->relocs == NULL) { 219 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, bo_gem->name); 220 continue; 221 } 222 223 for (j = 0; j < bo_gem->reloc_count; j++) { 224 drm_intel_bo *target_bo = bo_gem->reloc_target_bo[j]; 225 drm_intel_bo_gem *target_gem = (drm_intel_bo_gem *)target_bo; 226 227 DBG("%2d: %d (%s)@0x%08llx -> %d (%s)@0x%08lx + 0x%08x\n", 228 i, 229 bo_gem->gem_handle, bo_gem->name, 230 (unsigned long long)bo_gem->relocs[j].offset, 231 target_gem->gem_handle, target_gem->name, target_bo->offset, 232 bo_gem->relocs[j].delta); 233 } 234 } 235 } 236 237 /** 238 * Adds the given buffer to the list of buffers to be validated (moved into the 239 * appropriate memory type) with the next batch submission. 240 * 241 * If a buffer is validated multiple times in a batch submission, it ends up 242 * with the intersection of the memory type flags and the union of the 243 * access flags. 244 */ 245 static void 246 drm_intel_add_validate_buffer(drm_intel_bo *bo) 247 { 248 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 249 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 250 int index; 251 252 if (bo_gem->validate_index != -1) 253 return; 254 255 /* Extend the array of validation entries as necessary. */ 256 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 257 int new_size = bufmgr_gem->exec_size * 2; 258 259 if (new_size == 0) 260 new_size = 5; 261 262 bufmgr_gem->exec_objects = 263 realloc(bufmgr_gem->exec_objects, 264 sizeof(*bufmgr_gem->exec_objects) * new_size); 265 bufmgr_gem->exec_bos = 266 realloc(bufmgr_gem->exec_bos, 267 sizeof(*bufmgr_gem->exec_bos) * new_size); 268 bufmgr_gem->exec_size = new_size; 269 } 270 271 index = bufmgr_gem->exec_count; 272 bo_gem->validate_index = index; 273 /* Fill in array entry */ 274 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 275 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 276 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 277 bufmgr_gem->exec_objects[index].alignment = 0; 278 bufmgr_gem->exec_objects[index].offset = 0; 279 bufmgr_gem->exec_bos[index] = bo; 280 drm_intel_gem_bo_reference_locked(bo); 281 bufmgr_gem->exec_count++; 282 } 283 284 285 #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 286 sizeof(uint32_t)) 287 288 static int 289 drm_intel_setup_reloc_list(drm_intel_bo *bo) 290 { 291 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 292 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 293 294 bo_gem->relocs = malloc(bufmgr_gem->max_relocs * 295 sizeof(struct drm_i915_gem_relocation_entry)); 296 bo_gem->reloc_target_bo = malloc(bufmgr_gem->max_relocs * 297 sizeof(drm_intel_bo *)); 298 299 return 0; 300 } 301 302 static int 303 drm_intel_gem_bo_busy(drm_intel_bo *bo) 304 { 305 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 306 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 307 struct drm_i915_gem_busy busy; 308 int ret; 309 310 memset(&busy, 0, sizeof(busy)); 311 busy.handle = bo_gem->gem_handle; 312 313 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 314 315 return (ret == 0 && busy.busy); 316 } 317 318 static drm_intel_bo * 319 drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name, 320 unsigned long size, unsigned int alignment, 321 int for_render) 322 { 323 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 324 drm_intel_bo_gem *bo_gem; 325 unsigned int page_size = getpagesize(); 326 int ret; 327 struct drm_intel_gem_bo_bucket *bucket; 328 int alloc_from_cache = 0; 329 unsigned long bo_size; 330 331 /* Round the allocated size up to a power of two number of pages. */ 332 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 333 334 /* If we don't have caching at this size, don't actually round the 335 * allocation up. 336 */ 337 if (bucket == NULL) { 338 bo_size = size; 339 if (bo_size < page_size) 340 bo_size = page_size; 341 } else { 342 bo_size = bucket->size; 343 } 344 345 pthread_mutex_lock(&bufmgr_gem->lock); 346 /* Get a buffer out of the cache if available */ 347 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 348 if (for_render) { 349 /* Allocate new render-target BOs from the tail (MRU) 350 * of the list, as it will likely be hot in the GPU cache 351 * and in the aperture for us. 352 */ 353 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.prev, head); 354 DRMLISTDEL(&bo_gem->head); 355 alloc_from_cache = 1; 356 } else { 357 /* For non-render-target BOs (where we're probably going to map it 358 * first thing in order to fill it with data), check if the 359 * last BO in the cache is unbusy, and only reuse in that case. 360 * Otherwise, allocating a new buffer is probably faster than 361 * waiting for the GPU to finish. 362 */ 363 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head); 364 365 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 366 alloc_from_cache = 1; 367 DRMLISTDEL(&bo_gem->head); 368 } 369 } 370 } 371 pthread_mutex_unlock(&bufmgr_gem->lock); 372 373 if (!alloc_from_cache) { 374 struct drm_i915_gem_create create; 375 376 bo_gem = calloc(1, sizeof(*bo_gem)); 377 if (!bo_gem) 378 return NULL; 379 380 bo_gem->bo.size = bo_size; 381 memset(&create, 0, sizeof(create)); 382 create.size = bo_size; 383 384 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CREATE, &create); 385 bo_gem->gem_handle = create.handle; 386 bo_gem->bo.handle = bo_gem->gem_handle; 387 if (ret != 0) { 388 free(bo_gem); 389 return NULL; 390 } 391 bo_gem->bo.bufmgr = bufmgr; 392 } 393 394 bo_gem->name = name; 395 bo_gem->refcount = 1; 396 bo_gem->validate_index = -1; 397 bo_gem->reloc_tree_size = bo_gem->bo.size; 398 bo_gem->reloc_tree_fences = 0; 399 bo_gem->used_as_reloc_target = 0; 400 bo_gem->tiling_mode = I915_TILING_NONE; 401 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 402 bo_gem->reusable = 1; 403 404 DBG("bo_create: buf %d (%s) %ldb\n", 405 bo_gem->gem_handle, bo_gem->name, size); 406 407 return &bo_gem->bo; 408 } 409 410 static drm_intel_bo * 411 drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, const char *name, 412 unsigned long size, unsigned int alignment) 413 { 414 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 1); 415 } 416 417 static drm_intel_bo * 418 drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, 419 unsigned long size, unsigned int alignment) 420 { 421 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 0); 422 } 423 424 /** 425 * Returns a drm_intel_bo wrapping the given buffer object handle. 426 * 427 * This can be used when one application needs to pass a buffer object 428 * to another. 429 */ 430 drm_intel_bo * 431 drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, const char *name, 432 unsigned int handle) 433 { 434 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 435 drm_intel_bo_gem *bo_gem; 436 int ret; 437 struct drm_gem_open open_arg; 438 struct drm_i915_gem_get_tiling get_tiling; 439 440 bo_gem = calloc(1, sizeof(*bo_gem)); 441 if (!bo_gem) 442 return NULL; 443 444 memset(&open_arg, 0, sizeof(open_arg)); 445 open_arg.name = handle; 446 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_OPEN, &open_arg); 447 if (ret != 0) { 448 fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", 449 name, handle, strerror(errno)); 450 free(bo_gem); 451 return NULL; 452 } 453 bo_gem->bo.size = open_arg.size; 454 bo_gem->bo.offset = 0; 455 bo_gem->bo.virtual = NULL; 456 bo_gem->bo.bufmgr = bufmgr; 457 bo_gem->name = name; 458 bo_gem->refcount = 1; 459 bo_gem->validate_index = -1; 460 bo_gem->gem_handle = open_arg.handle; 461 bo_gem->global_name = handle; 462 bo_gem->reusable = 0; 463 464 memset(&get_tiling, 0, sizeof(get_tiling)); 465 get_tiling.handle = bo_gem->gem_handle; 466 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); 467 if (ret != 0) { 468 drm_intel_gem_bo_unreference(&bo_gem->bo); 469 return NULL; 470 } 471 bo_gem->tiling_mode = get_tiling.tiling_mode; 472 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 473 if (bo_gem->tiling_mode == I915_TILING_NONE) 474 bo_gem->reloc_tree_fences = 0; 475 else 476 bo_gem->reloc_tree_fences = 1; 477 478 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 479 480 return &bo_gem->bo; 481 } 482 483 static void 484 drm_intel_gem_bo_reference(drm_intel_bo *bo) 485 { 486 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 487 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 488 489 assert(bo_gem->refcount > 0); 490 pthread_mutex_lock(&bufmgr_gem->lock); 491 bo_gem->refcount++; 492 pthread_mutex_unlock(&bufmgr_gem->lock); 493 } 494 495 static void 496 drm_intel_gem_bo_reference_locked(drm_intel_bo *bo) 497 { 498 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 499 500 assert(bo_gem->refcount > 0); 501 bo_gem->refcount++; 502 } 503 504 static void 505 drm_intel_gem_bo_free(drm_intel_bo *bo) 506 { 507 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 508 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 509 struct drm_gem_close close; 510 int ret; 511 512 if (bo_gem->mem_virtual) 513 munmap (bo_gem->mem_virtual, bo_gem->bo.size); 514 if (bo_gem->gtt_virtual) 515 munmap (bo_gem->gtt_virtual, bo_gem->bo.size); 516 517 /* Close this object */ 518 memset(&close, 0, sizeof(close)); 519 close.handle = bo_gem->gem_handle; 520 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 521 if (ret != 0) { 522 fprintf(stderr, 523 "DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 524 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 525 } 526 free(bo); 527 } 528 529 /** Frees all cached buffers significantly older than @time. */ 530 static void 531 drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 532 { 533 int i; 534 535 for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 536 struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i]; 537 538 while (!DRMLISTEMPTY(&bucket->head)) { 539 drm_intel_bo_gem *bo_gem; 540 541 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head); 542 if (time - bo_gem->free_time <= 1) 543 break; 544 545 DRMLISTDEL(&bo_gem->head); 546 547 drm_intel_gem_bo_free(&bo_gem->bo); 548 } 549 } 550 } 551 552 static void 553 drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo) 554 { 555 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 556 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 557 558 assert(bo_gem->refcount > 0); 559 if (--bo_gem->refcount == 0) { 560 struct drm_intel_gem_bo_bucket *bucket; 561 uint32_t tiling_mode; 562 563 if (bo_gem->relocs != NULL) { 564 int i; 565 566 /* Unreference all the target buffers */ 567 for (i = 0; i < bo_gem->reloc_count; i++) 568 drm_intel_gem_bo_unreference_locked(bo_gem->reloc_target_bo[i]); 569 free(bo_gem->reloc_target_bo); 570 free(bo_gem->relocs); 571 } 572 573 DBG("bo_unreference final: %d (%s)\n", 574 bo_gem->gem_handle, bo_gem->name); 575 576 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 577 /* Put the buffer into our internal cache for reuse if we can. */ 578 tiling_mode = I915_TILING_NONE; 579 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 580 drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0) 581 { 582 struct timespec time; 583 584 clock_gettime(CLOCK_MONOTONIC, &time); 585 bo_gem->free_time = time.tv_sec; 586 587 bo_gem->name = NULL; 588 bo_gem->validate_index = -1; 589 bo_gem->relocs = NULL; 590 bo_gem->reloc_target_bo = NULL; 591 bo_gem->reloc_count = 0; 592 593 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 594 595 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 596 } else { 597 drm_intel_gem_bo_free(bo); 598 } 599 } 600 } 601 602 static void 603 drm_intel_gem_bo_unreference(drm_intel_bo *bo) 604 { 605 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 606 607 pthread_mutex_lock(&bufmgr_gem->lock); 608 drm_intel_gem_bo_unreference_locked(bo); 609 pthread_mutex_unlock(&bufmgr_gem->lock); 610 } 611 612 static int 613 drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 614 { 615 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 616 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 617 struct drm_i915_gem_set_domain set_domain; 618 int ret; 619 620 pthread_mutex_lock(&bufmgr_gem->lock); 621 622 /* Allow recursive mapping. Mesa may recursively map buffers with 623 * nested display loops. 624 */ 625 if (!bo_gem->mem_virtual) { 626 struct drm_i915_gem_mmap mmap_arg; 627 628 DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 629 630 memset(&mmap_arg, 0, sizeof(mmap_arg)); 631 mmap_arg.handle = bo_gem->gem_handle; 632 mmap_arg.offset = 0; 633 mmap_arg.size = bo->size; 634 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); 635 if (ret != 0) { 636 fprintf(stderr, "%s:%d: Error mapping buffer %d (%s): %s .\n", 637 __FILE__, __LINE__, 638 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 639 pthread_mutex_unlock(&bufmgr_gem->lock); 640 return ret; 641 } 642 bo_gem->mem_virtual = (void *)(uintptr_t)mmap_arg.addr_ptr; 643 } 644 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 645 bo_gem->mem_virtual); 646 bo->virtual = bo_gem->mem_virtual; 647 648 set_domain.handle = bo_gem->gem_handle; 649 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 650 if (write_enable) 651 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 652 else 653 set_domain.write_domain = 0; 654 do { 655 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, 656 &set_domain); 657 } while (ret == -1 && errno == EINTR); 658 if (ret != 0) { 659 fprintf (stderr, "%s:%d: Error setting to CPU domain %d: %s\n", 660 __FILE__, __LINE__, bo_gem->gem_handle, strerror (errno)); 661 pthread_mutex_unlock(&bufmgr_gem->lock); 662 return ret; 663 } 664 665 pthread_mutex_unlock(&bufmgr_gem->lock); 666 667 return 0; 668 } 669 670 int 671 drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 672 { 673 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 674 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 675 struct drm_i915_gem_set_domain set_domain; 676 int ret; 677 678 pthread_mutex_lock(&bufmgr_gem->lock); 679 680 /* Get a mapping of the buffer if we haven't before. */ 681 if (bo_gem->gtt_virtual == NULL) { 682 struct drm_i915_gem_mmap_gtt mmap_arg; 683 684 DBG("bo_map_gtt: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 685 686 memset(&mmap_arg, 0, sizeof(mmap_arg)); 687 mmap_arg.handle = bo_gem->gem_handle; 688 689 /* Get the fake offset back... */ 690 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg); 691 if (ret != 0) { 692 fprintf(stderr, 693 "%s:%d: Error preparing buffer map %d (%s): %s .\n", 694 __FILE__, __LINE__, 695 bo_gem->gem_handle, bo_gem->name, 696 strerror(errno)); 697 pthread_mutex_unlock(&bufmgr_gem->lock); 698 return ret; 699 } 700 701 /* and mmap it */ 702 bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, 703 MAP_SHARED, bufmgr_gem->fd, 704 mmap_arg.offset); 705 if (bo_gem->gtt_virtual == MAP_FAILED) { 706 fprintf(stderr, 707 "%s:%d: Error mapping buffer %d (%s): %s .\n", 708 __FILE__, __LINE__, 709 bo_gem->gem_handle, bo_gem->name, 710 strerror(errno)); 711 pthread_mutex_unlock(&bufmgr_gem->lock); 712 return errno; 713 } 714 } 715 716 bo->virtual = bo_gem->gtt_virtual; 717 718 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 719 bo_gem->gtt_virtual); 720 721 /* Now move it to the GTT domain so that the CPU caches are flushed */ 722 set_domain.handle = bo_gem->gem_handle; 723 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 724 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 725 do { 726 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, 727 &set_domain); 728 } while (ret == -1 && errno == EINTR); 729 730 if (ret != 0) { 731 fprintf (stderr, "%s:%d: Error setting domain %d: %s\n", 732 __FILE__, __LINE__, bo_gem->gem_handle, strerror (errno)); 733 } 734 735 pthread_mutex_unlock(&bufmgr_gem->lock); 736 737 return 0; 738 } 739 740 int 741 drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 742 { 743 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 744 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 745 int ret = 0; 746 747 if (bo == NULL) 748 return 0; 749 750 assert(bo_gem->gtt_virtual != NULL); 751 752 pthread_mutex_lock(&bufmgr_gem->lock); 753 bo->virtual = NULL; 754 pthread_mutex_unlock(&bufmgr_gem->lock); 755 756 return ret; 757 } 758 759 static int 760 drm_intel_gem_bo_unmap(drm_intel_bo *bo) 761 { 762 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 763 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 764 struct drm_i915_gem_sw_finish sw_finish; 765 int ret; 766 767 if (bo == NULL) 768 return 0; 769 770 assert(bo_gem->mem_virtual != NULL); 771 772 pthread_mutex_lock(&bufmgr_gem->lock); 773 774 /* Cause a flush to happen if the buffer's pinned for scanout, so the 775 * results show up in a timely manner. 776 */ 777 sw_finish.handle = bo_gem->gem_handle; 778 do { 779 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SW_FINISH, 780 &sw_finish); 781 } while (ret == -1 && errno == EINTR); 782 783 bo->virtual = NULL; 784 pthread_mutex_unlock(&bufmgr_gem->lock); 785 return 0; 786 } 787 788 static int 789 drm_intel_gem_bo_subdata (drm_intel_bo *bo, unsigned long offset, 790 unsigned long size, const void *data) 791 { 792 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 793 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 794 struct drm_i915_gem_pwrite pwrite; 795 int ret; 796 797 memset (&pwrite, 0, sizeof (pwrite)); 798 pwrite.handle = bo_gem->gem_handle; 799 pwrite.offset = offset; 800 pwrite.size = size; 801 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 802 do { 803 ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); 804 } while (ret == -1 && errno == EINTR); 805 if (ret != 0) { 806 fprintf (stderr, "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 807 __FILE__, __LINE__, 808 bo_gem->gem_handle, (int) offset, (int) size, 809 strerror (errno)); 810 } 811 return 0; 812 } 813 814 static int 815 drm_intel_gem_get_pipe_from_crtc_id (drm_intel_bufmgr *bufmgr, int crtc_id) 816 { 817 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 818 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 819 int ret; 820 821 get_pipe_from_crtc_id.crtc_id = crtc_id; 822 ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 823 &get_pipe_from_crtc_id); 824 if (ret != 0) { 825 /* We return -1 here to signal that we don't 826 * know which pipe is associated with this crtc. 827 * This lets the caller know that this information 828 * isn't available; using the wrong pipe for 829 * vblank waiting can cause the chipset to lock up 830 */ 831 return -1; 832 } 833 834 return get_pipe_from_crtc_id.pipe; 835 } 836 837 static int 838 drm_intel_gem_bo_get_subdata (drm_intel_bo *bo, unsigned long offset, 839 unsigned long size, void *data) 840 { 841 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 842 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 843 struct drm_i915_gem_pread pread; 844 int ret; 845 846 memset (&pread, 0, sizeof (pread)); 847 pread.handle = bo_gem->gem_handle; 848 pread.offset = offset; 849 pread.size = size; 850 pread.data_ptr = (uint64_t) (uintptr_t) data; 851 do { 852 ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_I915_GEM_PREAD, &pread); 853 } while (ret == -1 && errno == EINTR); 854 if (ret != 0) { 855 fprintf (stderr, "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 856 __FILE__, __LINE__, 857 bo_gem->gem_handle, (int) offset, (int) size, 858 strerror (errno)); 859 } 860 return 0; 861 } 862 863 /** Waits for all GPU rendering to the object to have completed. */ 864 static void 865 drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 866 { 867 drm_intel_gem_bo_start_gtt_access(bo, 0); 868 } 869 870 /** 871 * Sets the object to the GTT read and possibly write domain, used by the X 872 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 873 * 874 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 875 * can do tiled pixmaps this way. 876 */ 877 void 878 drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 879 { 880 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 881 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 882 struct drm_i915_gem_set_domain set_domain; 883 int ret; 884 885 set_domain.handle = bo_gem->gem_handle; 886 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 887 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 888 do { 889 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); 890 } while (ret == -1 && errno == EINTR); 891 if (ret != 0) { 892 fprintf (stderr, "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 893 __FILE__, __LINE__, 894 bo_gem->gem_handle, set_domain.read_domains, set_domain.write_domain, 895 strerror (errno)); 896 } 897 } 898 899 static void 900 drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 901 { 902 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 903 int i; 904 905 free(bufmgr_gem->exec_objects); 906 free(bufmgr_gem->exec_bos); 907 908 pthread_mutex_destroy(&bufmgr_gem->lock); 909 910 /* Free any cached buffer objects we were going to reuse */ 911 for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 912 struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i]; 913 drm_intel_bo_gem *bo_gem; 914 915 while (!DRMLISTEMPTY(&bucket->head)) { 916 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head); 917 DRMLISTDEL(&bo_gem->head); 918 919 drm_intel_gem_bo_free(&bo_gem->bo); 920 } 921 } 922 923 free(bufmgr); 924 } 925 926 /** 927 * Adds the target buffer to the validation list and adds the relocation 928 * to the reloc_buffer's relocation list. 929 * 930 * The relocation entry at the given offset must already contain the 931 * precomputed relocation value, because the kernel will optimize out 932 * the relocation entry write when the buffer hasn't moved from the 933 * last known offset in target_bo. 934 */ 935 static int 936 drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 937 drm_intel_bo *target_bo, uint32_t target_offset, 938 uint32_t read_domains, uint32_t write_domain) 939 { 940 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 941 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 942 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo; 943 944 pthread_mutex_lock(&bufmgr_gem->lock); 945 946 /* Create a new relocation list if needed */ 947 if (bo_gem->relocs == NULL) 948 drm_intel_setup_reloc_list(bo); 949 950 /* Check overflow */ 951 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 952 953 /* Check args */ 954 assert (offset <= bo->size - 4); 955 assert ((write_domain & (write_domain-1)) == 0); 956 957 /* Make sure that we're not adding a reloc to something whose size has 958 * already been accounted for. 959 */ 960 assert(!bo_gem->used_as_reloc_target); 961 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 962 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 963 964 /* Flag the target to disallow further relocations in it. */ 965 target_bo_gem->used_as_reloc_target = 1; 966 967 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 968 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 969 bo_gem->relocs[bo_gem->reloc_count].target_handle = 970 target_bo_gem->gem_handle; 971 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 972 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 973 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; 974 975 bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo; 976 drm_intel_gem_bo_reference_locked(target_bo); 977 978 bo_gem->reloc_count++; 979 980 pthread_mutex_unlock(&bufmgr_gem->lock); 981 982 return 0; 983 } 984 985 /** 986 * Walk the tree of relocations rooted at BO and accumulate the list of 987 * validations to be performed and update the relocation buffers with 988 * index values into the validation list. 989 */ 990 static void 991 drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 992 { 993 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 994 int i; 995 996 if (bo_gem->relocs == NULL) 997 return; 998 999 for (i = 0; i < bo_gem->reloc_count; i++) { 1000 drm_intel_bo *target_bo = bo_gem->reloc_target_bo[i]; 1001 1002 /* Continue walking the tree depth-first. */ 1003 drm_intel_gem_bo_process_reloc(target_bo); 1004 1005 /* Add the target to the validate list */ 1006 drm_intel_add_validate_buffer(target_bo); 1007 } 1008 } 1009 1010 static void 1011 drm_intel_update_buffer_offsets (drm_intel_bufmgr_gem *bufmgr_gem) 1012 { 1013 int i; 1014 1015 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1016 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1017 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1018 1019 /* Update the buffer offset */ 1020 if (bufmgr_gem->exec_objects[i].offset != bo->offset) { 1021 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1022 bo_gem->gem_handle, bo_gem->name, bo->offset, 1023 (unsigned long long)bufmgr_gem->exec_objects[i].offset); 1024 bo->offset = bufmgr_gem->exec_objects[i].offset; 1025 } 1026 } 1027 } 1028 1029 static int 1030 drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 1031 drm_clip_rect_t *cliprects, int num_cliprects, 1032 int DR4) 1033 { 1034 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1035 struct drm_i915_gem_execbuffer execbuf; 1036 int ret, i; 1037 1038 pthread_mutex_lock(&bufmgr_gem->lock); 1039 /* Update indices and set up the validate list. */ 1040 drm_intel_gem_bo_process_reloc(bo); 1041 1042 /* Add the batch buffer to the validation list. There are no relocations 1043 * pointing to it. 1044 */ 1045 drm_intel_add_validate_buffer(bo); 1046 1047 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec_objects; 1048 execbuf.buffer_count = bufmgr_gem->exec_count; 1049 execbuf.batch_start_offset = 0; 1050 execbuf.batch_len = used; 1051 execbuf.cliprects_ptr = (uintptr_t)cliprects; 1052 execbuf.num_cliprects = num_cliprects; 1053 execbuf.DR1 = 0; 1054 execbuf.DR4 = DR4; 1055 1056 do { 1057 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER, &execbuf); 1058 } while (ret != 0 && errno == EAGAIN); 1059 1060 if (ret != 0 && errno == ENOMEM) { 1061 fprintf(stderr, "Execbuffer fails to pin. Estimate: %u. Actual: %u. Available: %u\n", 1062 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1063 bufmgr_gem->exec_count), 1064 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1065 bufmgr_gem->exec_count), 1066 (unsigned int) bufmgr_gem->gtt_size); 1067 } 1068 drm_intel_update_buffer_offsets (bufmgr_gem); 1069 1070 if (bufmgr_gem->bufmgr.debug) 1071 drm_intel_gem_dump_validation_list(bufmgr_gem); 1072 1073 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1074 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1075 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1076 1077 /* Disconnect the buffer from the validate list */ 1078 bo_gem->validate_index = -1; 1079 drm_intel_gem_bo_unreference_locked(bo); 1080 bufmgr_gem->exec_bos[i] = NULL; 1081 } 1082 bufmgr_gem->exec_count = 0; 1083 pthread_mutex_unlock(&bufmgr_gem->lock); 1084 1085 return 0; 1086 } 1087 1088 static int 1089 drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 1090 { 1091 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1092 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1093 struct drm_i915_gem_pin pin; 1094 int ret; 1095 1096 memset(&pin, 0, sizeof(pin)); 1097 pin.handle = bo_gem->gem_handle; 1098 pin.alignment = alignment; 1099 1100 do { 1101 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_PIN, &pin); 1102 } while (ret == -1 && errno == EINTR); 1103 1104 if (ret != 0) 1105 return -errno; 1106 1107 bo->offset = pin.offset; 1108 return 0; 1109 } 1110 1111 static int 1112 drm_intel_gem_bo_unpin(drm_intel_bo *bo) 1113 { 1114 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1115 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1116 struct drm_i915_gem_unpin unpin; 1117 int ret; 1118 1119 memset(&unpin, 0, sizeof(unpin)); 1120 unpin.handle = bo_gem->gem_handle; 1121 1122 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 1123 if (ret != 0) 1124 return -errno; 1125 1126 return 0; 1127 } 1128 1129 static int 1130 drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode, 1131 uint32_t stride) 1132 { 1133 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1134 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1135 struct drm_i915_gem_set_tiling set_tiling; 1136 int ret; 1137 1138 if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode) 1139 return 0; 1140 1141 /* If we're going from non-tiling to tiling, bump fence count */ 1142 if (bo_gem->tiling_mode == I915_TILING_NONE) 1143 bo_gem->reloc_tree_fences++; 1144 1145 memset(&set_tiling, 0, sizeof(set_tiling)); 1146 set_tiling.handle = bo_gem->gem_handle; 1147 set_tiling.tiling_mode = *tiling_mode; 1148 set_tiling.stride = stride; 1149 1150 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); 1151 if (ret != 0) { 1152 *tiling_mode = bo_gem->tiling_mode; 1153 return -errno; 1154 } 1155 bo_gem->tiling_mode = set_tiling.tiling_mode; 1156 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 1157 1158 /* If we're going from tiling to non-tiling, drop fence count */ 1159 if (bo_gem->tiling_mode == I915_TILING_NONE) 1160 bo_gem->reloc_tree_fences--; 1161 1162 *tiling_mode = bo_gem->tiling_mode; 1163 return 0; 1164 } 1165 1166 static int 1167 drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t *tiling_mode, 1168 uint32_t *swizzle_mode) 1169 { 1170 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1171 1172 *tiling_mode = bo_gem->tiling_mode; 1173 *swizzle_mode = bo_gem->swizzle_mode; 1174 return 0; 1175 } 1176 1177 static int 1178 drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t *name) 1179 { 1180 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1181 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1182 struct drm_gem_flink flink; 1183 int ret; 1184 1185 if (!bo_gem->global_name) { 1186 memset(&flink, 0, sizeof(flink)); 1187 flink.handle = bo_gem->gem_handle; 1188 1189 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 1190 if (ret != 0) 1191 return -errno; 1192 bo_gem->global_name = flink.name; 1193 bo_gem->reusable = 0; 1194 } 1195 1196 *name = bo_gem->global_name; 1197 return 0; 1198 } 1199 1200 /** 1201 * Enables unlimited caching of buffer objects for reuse. 1202 * 1203 * This is potentially very memory expensive, as the cache at each bucket 1204 * size is only bounded by how many buffers of that size we've managed to have 1205 * in flight at once. 1206 */ 1207 void 1208 drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 1209 { 1210 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 1211 1212 bufmgr_gem->bo_reuse = 1; 1213 } 1214 1215 /** 1216 * Return the additional aperture space required by the tree of buffer objects 1217 * rooted at bo. 1218 */ 1219 static int 1220 drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 1221 { 1222 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1223 int i; 1224 int total = 0; 1225 1226 if (bo == NULL || bo_gem->included_in_check_aperture) 1227 return 0; 1228 1229 total += bo->size; 1230 bo_gem->included_in_check_aperture = 1; 1231 1232 for (i = 0; i < bo_gem->reloc_count; i++) 1233 total += drm_intel_gem_bo_get_aperture_space(bo_gem->reloc_target_bo[i]); 1234 1235 return total; 1236 } 1237 1238 /** 1239 * Count the number of buffers in this list that need a fence reg 1240 * 1241 * If the count is greater than the number of available regs, we'll have 1242 * to ask the caller to resubmit a batch with fewer tiled buffers. 1243 * 1244 * This function over-counts if the same buffer is used multiple times. 1245 */ 1246 static unsigned int 1247 drm_intel_gem_total_fences(drm_intel_bo **bo_array, int count) 1248 { 1249 int i; 1250 unsigned int total = 0; 1251 1252 for (i = 0; i < count; i++) { 1253 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i]; 1254 1255 if (bo_gem == NULL) 1256 continue; 1257 1258 total += bo_gem->reloc_tree_fences; 1259 } 1260 return total; 1261 } 1262 1263 /** 1264 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 1265 * for the next drm_intel_bufmgr_check_aperture_space() call. 1266 */ 1267 static void 1268 drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 1269 { 1270 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1271 int i; 1272 1273 if (bo == NULL || !bo_gem->included_in_check_aperture) 1274 return; 1275 1276 bo_gem->included_in_check_aperture = 0; 1277 1278 for (i = 0; i < bo_gem->reloc_count; i++) 1279 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->reloc_target_bo[i]); 1280 } 1281 1282 /** 1283 * Return a conservative estimate for the amount of aperture required 1284 * for a collection of buffers. This may double-count some buffers. 1285 */ 1286 static unsigned int 1287 drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 1288 { 1289 int i; 1290 unsigned int total = 0; 1291 1292 for (i = 0; i < count; i++) { 1293 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i]; 1294 if (bo_gem != NULL) 1295 total += bo_gem->reloc_tree_size; 1296 } 1297 return total; 1298 } 1299 1300 /** 1301 * Return the amount of aperture needed for a collection of buffers. 1302 * This avoids double counting any buffers, at the cost of looking 1303 * at every buffer in the set. 1304 */ 1305 static unsigned int 1306 drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 1307 { 1308 int i; 1309 unsigned int total = 0; 1310 1311 for (i = 0; i < count; i++) { 1312 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 1313 /* For the first buffer object in the array, we get an accurate count 1314 * back for its reloc_tree size (since nothing had been flagged as 1315 * being counted yet). We can save that value out as a more 1316 * conservative reloc_tree_size that avoids double-counting target 1317 * buffers. Since the first buffer happens to usually be the batch 1318 * buffer in our callers, this can pull us back from doing the tree 1319 * walk on every new batch emit. 1320 */ 1321 if (i == 0) { 1322 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i]; 1323 bo_gem->reloc_tree_size = total; 1324 } 1325 } 1326 1327 for (i = 0; i < count; i++) 1328 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 1329 return total; 1330 } 1331 1332 /** 1333 * Return -1 if the batchbuffer should be flushed before attempting to 1334 * emit rendering referencing the buffers pointed to by bo_array. 1335 * 1336 * This is required because if we try to emit a batchbuffer with relocations 1337 * to a tree of buffers that won't simultaneously fit in the aperture, 1338 * the rendering will return an error at a point where the software is not 1339 * prepared to recover from it. 1340 * 1341 * However, we also want to emit the batchbuffer significantly before we reach 1342 * the limit, as a series of batchbuffers each of which references buffers 1343 * covering almost all of the aperture means that at each emit we end up 1344 * waiting to evict a buffer from the last rendering, and we get synchronous 1345 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 1346 * get better parallelism. 1347 */ 1348 static int 1349 drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 1350 { 1351 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo_array[0]->bufmgr; 1352 unsigned int total = 0; 1353 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 1354 int total_fences; 1355 1356 /* Check for fence reg constraints if necessary */ 1357 if (bufmgr_gem->available_fences) { 1358 total_fences = drm_intel_gem_total_fences(bo_array, count); 1359 if (total_fences > bufmgr_gem->available_fences) 1360 return -1; 1361 } 1362 1363 total = drm_intel_gem_estimate_batch_space(bo_array, count); 1364 1365 if (total > threshold) 1366 total = drm_intel_gem_compute_batch_space(bo_array, count); 1367 1368 if (total > threshold) { 1369 DBG("check_space: overflowed available aperture, %dkb vs %dkb\n", 1370 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 1371 return -1; 1372 } else { 1373 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024 , 1374 (int)bufmgr_gem->gtt_size / 1024); 1375 return 0; 1376 } 1377 } 1378 1379 /* 1380 * Disable buffer reuse for objects which are shared with the kernel 1381 * as scanout buffers 1382 */ 1383 static int 1384 drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 1385 { 1386 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1387 1388 bo_gem->reusable = 0; 1389 return 0; 1390 } 1391 1392 /** 1393 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 1394 * and manage map buffer objections. 1395 * 1396 * \param fd File descriptor of the opened DRM device. 1397 */ 1398 drm_intel_bufmgr * 1399 drm_intel_bufmgr_gem_init(int fd, int batch_size) 1400 { 1401 drm_intel_bufmgr_gem *bufmgr_gem; 1402 struct drm_i915_gem_get_aperture aperture; 1403 drm_i915_getparam_t gp; 1404 int ret, i; 1405 unsigned long size; 1406 1407 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 1408 bufmgr_gem->fd = fd; 1409 1410 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 1411 free(bufmgr_gem); 1412 return NULL; 1413 } 1414 1415 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 1416 1417 if (ret == 0) 1418 bufmgr_gem->gtt_size = aperture.aper_available_size; 1419 else { 1420 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 1421 strerror(errno)); 1422 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 1423 fprintf(stderr, "Assuming %dkB available aperture size.\n" 1424 "May lead to reduced performance or incorrect rendering.\n", 1425 (int)bufmgr_gem->gtt_size / 1024); 1426 } 1427 1428 gp.param = I915_PARAM_CHIPSET_ID; 1429 gp.value = &bufmgr_gem->pci_device; 1430 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 1431 if (ret) { 1432 fprintf(stderr, "get chip id failed: %d\n", ret); 1433 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 1434 } 1435 1436 if (!IS_I965G(bufmgr_gem)) { 1437 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 1438 gp.value = &bufmgr_gem->available_fences; 1439 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 1440 if (ret) { 1441 fprintf(stderr, "get fences failed: %d\n", ret); 1442 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 1443 bufmgr_gem->available_fences = 0; 1444 } 1445 } 1446 1447 /* Let's go with one relocation per every 2 dwords (but round down a bit 1448 * since a power of two will mean an extra page allocation for the reloc 1449 * buffer). 1450 * 1451 * Every 4 was too few for the blender benchmark. 1452 */ 1453 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 1454 1455 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 1456 bufmgr_gem->bufmgr.bo_alloc_for_render = drm_intel_gem_bo_alloc_for_render; 1457 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 1458 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 1459 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 1460 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 1461 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 1462 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 1463 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 1464 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 1465 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 1466 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 1467 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 1468 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 1469 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 1470 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 1471 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 1472 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 1473 bufmgr_gem->bufmgr.debug = 0; 1474 bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space; 1475 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 1476 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = drm_intel_gem_get_pipe_from_crtc_id; 1477 /* Initialize the linked lists for BO reuse cache. */ 1478 for (i = 0, size = 4096; i < DRM_INTEL_GEM_BO_BUCKETS; i++, size *= 2) { 1479 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 1480 bufmgr_gem->cache_bucket[i].size = size; 1481 } 1482 1483 return &bufmgr_gem->bufmgr; 1484 } 1485 1486