Home | History | Annotate | Download | only in intel
      1 /**************************************************************************
      2  *
      3  * Copyright  2007 Red Hat Inc.
      4  * Copyright  2007-2012 Intel Corporation
      5  * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
      6  * All Rights Reserved.
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the
     10  * "Software"), to deal in the Software without restriction, including
     11  * without limitation the rights to use, copy, modify, merge, publish,
     12  * distribute, sub license, and/or sell copies of the Software, and to
     13  * permit persons to whom the Software is furnished to do so, subject to
     14  * the following conditions:
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     19  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     23  *
     24  * The above copyright notice and this permission notice (including the
     25  * next paragraph) shall be included in all copies or substantial portions
     26  * of the Software.
     27  *
     28  *
     29  **************************************************************************/
     30 /*
     31  * Authors: Thomas Hellstrm <thomas-at-tungstengraphics-dot-com>
     32  *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
     33  *	    Eric Anholt <eric (at) anholt.net>
     34  *	    Dave Airlie <airlied (at) linux.ie>
     35  */
     36 
     37 #ifdef HAVE_CONFIG_H
     38 #include "config.h"
     39 #endif
     40 
     41 #include <xf86drm.h>
     42 #include <xf86atomic.h>
     43 #include <fcntl.h>
     44 #include <stdio.h>
     45 #include <stdlib.h>
     46 #include <string.h>
     47 #include <unistd.h>
     48 #include <assert.h>
     49 #include <pthread.h>
     50 #include <sys/ioctl.h>
     51 #include <sys/stat.h>
     52 #include <sys/types.h>
     53 #include <stdbool.h>
     54 
     55 #include "errno.h"
     56 #ifndef ETIME
     57 #define ETIME ETIMEDOUT
     58 #endif
     59 #include "libdrm.h"
     60 #include "libdrm_lists.h"
     61 #include "intel_bufmgr.h"
     62 #include "intel_bufmgr_priv.h"
     63 #include "intel_chipset.h"
     64 #include "intel_aub.h"
     65 #include "string.h"
     66 
     67 #include "i915_drm.h"
     68 
     69 #ifdef HAVE_VALGRIND
     70 #include <valgrind.h>
     71 #include <memcheck.h>
     72 #define VG(x) x
     73 #else
     74 #define VG(x)
     75 #endif
     76 
     77 #define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s)))
     78 
     79 #define DBG(...) do {					\
     80 	if (bufmgr_gem->bufmgr.debug)			\
     81 		fprintf(stderr, __VA_ARGS__);		\
     82 } while (0)
     83 
     84 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
     85 
     86 typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
     87 
     88 struct drm_intel_gem_bo_bucket {
     89 	drmMMListHead head;
     90 	unsigned long size;
     91 };
     92 
     93 typedef struct _drm_intel_bufmgr_gem {
     94 	drm_intel_bufmgr bufmgr;
     95 
     96 	atomic_t refcount;
     97 
     98 	int fd;
     99 
    100 	int max_relocs;
    101 
    102 	pthread_mutex_t lock;
    103 
    104 	struct drm_i915_gem_exec_object *exec_objects;
    105 	struct drm_i915_gem_exec_object2 *exec2_objects;
    106 	drm_intel_bo **exec_bos;
    107 	int exec_size;
    108 	int exec_count;
    109 
    110 	/** Array of lists of cached gem objects of power-of-two sizes */
    111 	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
    112 	int num_buckets;
    113 	time_t time;
    114 
    115 	drmMMListHead managers;
    116 
    117 	drmMMListHead named;
    118 	drmMMListHead vma_cache;
    119 	int vma_count, vma_open, vma_max;
    120 
    121 	uint64_t gtt_size;
    122 	int available_fences;
    123 	int pci_device;
    124 	int gen;
    125 	unsigned int has_bsd : 1;
    126 	unsigned int has_blt : 1;
    127 	unsigned int has_relaxed_fencing : 1;
    128 	unsigned int has_llc : 1;
    129 	unsigned int has_wait_timeout : 1;
    130 	unsigned int bo_reuse : 1;
    131 	unsigned int no_exec : 1;
    132 	unsigned int has_vebox : 1;
    133 	bool fenced_relocs;
    134 
    135 	char *aub_filename;
    136 	FILE *aub_file;
    137 	uint32_t aub_offset;
    138 } drm_intel_bufmgr_gem;
    139 
    140 #define DRM_INTEL_RELOC_FENCE (1<<0)
    141 
    142 typedef struct _drm_intel_reloc_target_info {
    143 	drm_intel_bo *bo;
    144 	int flags;
    145 } drm_intel_reloc_target;
    146 
    147 struct _drm_intel_bo_gem {
    148 	drm_intel_bo bo;
    149 
    150 	atomic_t refcount;
    151 	uint32_t gem_handle;
    152 	const char *name;
    153 
    154 	/**
    155 	 * Kenel-assigned global name for this object
    156          *
    157          * List contains both flink named and prime fd'd objects
    158 	 */
    159 	unsigned int global_name;
    160 	drmMMListHead name_list;
    161 
    162 	/**
    163 	 * Index of the buffer within the validation list while preparing a
    164 	 * batchbuffer execution.
    165 	 */
    166 	int validate_index;
    167 
    168 	/**
    169 	 * Current tiling mode
    170 	 */
    171 	uint32_t tiling_mode;
    172 	uint32_t swizzle_mode;
    173 	unsigned long stride;
    174 
    175 	time_t free_time;
    176 
    177 	/** Array passed to the DRM containing relocation information. */
    178 	struct drm_i915_gem_relocation_entry *relocs;
    179 	/**
    180 	 * Array of info structs corresponding to relocs[i].target_handle etc
    181 	 */
    182 	drm_intel_reloc_target *reloc_target_info;
    183 	/** Number of entries in relocs */
    184 	int reloc_count;
    185 	/** Mapped address for the buffer, saved across map/unmap cycles */
    186 	void *mem_virtual;
    187 	/** GTT virtual address for the buffer, saved across map/unmap cycles */
    188 	void *gtt_virtual;
    189 	/**
    190 	 * Virtual address of the buffer allocated by user, used for userptr
    191 	 * objects only.
    192 	 */
    193 	void *user_virtual;
    194 	int map_count;
    195 	drmMMListHead vma_list;
    196 
    197 	/** BO cache list */
    198 	drmMMListHead head;
    199 
    200 	/**
    201 	 * Boolean of whether this BO and its children have been included in
    202 	 * the current drm_intel_bufmgr_check_aperture_space() total.
    203 	 */
    204 	bool included_in_check_aperture;
    205 
    206 	/**
    207 	 * Boolean of whether this buffer has been used as a relocation
    208 	 * target and had its size accounted for, and thus can't have any
    209 	 * further relocations added to it.
    210 	 */
    211 	bool used_as_reloc_target;
    212 
    213 	/**
    214 	 * Boolean of whether we have encountered an error whilst building the relocation tree.
    215 	 */
    216 	bool has_error;
    217 
    218 	/**
    219 	 * Boolean of whether this buffer can be re-used
    220 	 */
    221 	bool reusable;
    222 
    223 	/**
    224 	 * Boolean of whether the GPU is definitely not accessing the buffer.
    225 	 *
    226 	 * This is only valid when reusable, since non-reusable
    227 	 * buffers are those that have been shared wth other
    228 	 * processes, so we don't know their state.
    229 	 */
    230 	bool idle;
    231 
    232 	/**
    233 	 * Boolean of whether this buffer was allocated with userptr
    234 	 */
    235 	bool is_userptr;
    236 
    237 	/**
    238 	 * Size in bytes of this buffer and its relocation descendents.
    239 	 *
    240 	 * Used to avoid costly tree walking in
    241 	 * drm_intel_bufmgr_check_aperture in the common case.
    242 	 */
    243 	int reloc_tree_size;
    244 
    245 	/**
    246 	 * Number of potential fence registers required by this buffer and its
    247 	 * relocations.
    248 	 */
    249 	int reloc_tree_fences;
    250 
    251 	/** Flags that we may need to do the SW_FINSIH ioctl on unmap. */
    252 	bool mapped_cpu_write;
    253 
    254 	uint32_t aub_offset;
    255 
    256 	drm_intel_aub_annotation *aub_annotations;
    257 	unsigned aub_annotation_count;
    258 };
    259 
    260 static unsigned int
    261 drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
    262 
    263 static unsigned int
    264 drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
    265 
    266 static int
    267 drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
    268 			    uint32_t * swizzle_mode);
    269 
    270 static int
    271 drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
    272 				     uint32_t tiling_mode,
    273 				     uint32_t stride);
    274 
    275 static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
    276 						      time_t time);
    277 
    278 static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
    279 
    280 static void drm_intel_gem_bo_free(drm_intel_bo *bo);
    281 
    282 static unsigned long
    283 drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
    284 			   uint32_t *tiling_mode)
    285 {
    286 	unsigned long min_size, max_size;
    287 	unsigned long i;
    288 
    289 	if (*tiling_mode == I915_TILING_NONE)
    290 		return size;
    291 
    292 	/* 965+ just need multiples of page size for tiling */
    293 	if (bufmgr_gem->gen >= 4)
    294 		return ROUND_UP_TO(size, 4096);
    295 
    296 	/* Older chips need powers of two, of at least 512k or 1M */
    297 	if (bufmgr_gem->gen == 3) {
    298 		min_size = 1024*1024;
    299 		max_size = 128*1024*1024;
    300 	} else {
    301 		min_size = 512*1024;
    302 		max_size = 64*1024*1024;
    303 	}
    304 
    305 	if (size > max_size) {
    306 		*tiling_mode = I915_TILING_NONE;
    307 		return size;
    308 	}
    309 
    310 	/* Do we need to allocate every page for the fence? */
    311 	if (bufmgr_gem->has_relaxed_fencing)
    312 		return ROUND_UP_TO(size, 4096);
    313 
    314 	for (i = min_size; i < size; i <<= 1)
    315 		;
    316 
    317 	return i;
    318 }
    319 
    320 /*
    321  * Round a given pitch up to the minimum required for X tiling on a
    322  * given chip.  We use 512 as the minimum to allow for a later tiling
    323  * change.
    324  */
    325 static unsigned long
    326 drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
    327 			    unsigned long pitch, uint32_t *tiling_mode)
    328 {
    329 	unsigned long tile_width;
    330 	unsigned long i;
    331 
    332 	/* If untiled, then just align it so that we can do rendering
    333 	 * to it with the 3D engine.
    334 	 */
    335 	if (*tiling_mode == I915_TILING_NONE)
    336 		return ALIGN(pitch, 64);
    337 
    338 	if (*tiling_mode == I915_TILING_X
    339 			|| (IS_915(bufmgr_gem->pci_device)
    340 			    && *tiling_mode == I915_TILING_Y))
    341 		tile_width = 512;
    342 	else
    343 		tile_width = 128;
    344 
    345 	/* 965 is flexible */
    346 	if (bufmgr_gem->gen >= 4)
    347 		return ROUND_UP_TO(pitch, tile_width);
    348 
    349 	/* The older hardware has a maximum pitch of 8192 with tiled
    350 	 * surfaces, so fallback to untiled if it's too large.
    351 	 */
    352 	if (pitch > 8192) {
    353 		*tiling_mode = I915_TILING_NONE;
    354 		return ALIGN(pitch, 64);
    355 	}
    356 
    357 	/* Pre-965 needs power of two tile width */
    358 	for (i = tile_width; i < pitch; i <<= 1)
    359 		;
    360 
    361 	return i;
    362 }
    363 
    364 static struct drm_intel_gem_bo_bucket *
    365 drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
    366 				 unsigned long size)
    367 {
    368 	int i;
    369 
    370 	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
    371 		struct drm_intel_gem_bo_bucket *bucket =
    372 		    &bufmgr_gem->cache_bucket[i];
    373 		if (bucket->size >= size) {
    374 			return bucket;
    375 		}
    376 	}
    377 
    378 	return NULL;
    379 }
    380 
    381 static void
    382 drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
    383 {
    384 	int i, j;
    385 
    386 	for (i = 0; i < bufmgr_gem->exec_count; i++) {
    387 		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
    388 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
    389 
    390 		if (bo_gem->relocs == NULL) {
    391 			DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
    392 			    bo_gem->name);
    393 			continue;
    394 		}
    395 
    396 		for (j = 0; j < bo_gem->reloc_count; j++) {
    397 			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
    398 			drm_intel_bo_gem *target_gem =
    399 			    (drm_intel_bo_gem *) target_bo;
    400 
    401 			DBG("%2d: %d (%s)@0x%08llx -> "
    402 			    "%d (%s)@0x%08lx + 0x%08x\n",
    403 			    i,
    404 			    bo_gem->gem_handle, bo_gem->name,
    405 			    (unsigned long long)bo_gem->relocs[j].offset,
    406 			    target_gem->gem_handle,
    407 			    target_gem->name,
    408 			    target_bo->offset64,
    409 			    bo_gem->relocs[j].delta);
    410 		}
    411 	}
    412 }
    413 
    414 static inline void
    415 drm_intel_gem_bo_reference(drm_intel_bo *bo)
    416 {
    417 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
    418 
    419 	atomic_inc(&bo_gem->refcount);
    420 }
    421 
    422 /**
    423  * Adds the given buffer to the list of buffers to be validated (moved into the
    424  * appropriate memory type) with the next batch submission.
    425  *
    426  * If a buffer is validated multiple times in a batch submission, it ends up
    427  * with the intersection of the memory type flags and the union of the
    428  * access flags.
    429  */
    430 static void
    431 drm_intel_add_validate_buffer(drm_intel_bo *bo)
    432 {
    433 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
    434 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
    435 	int index;
    436 
    437 	if (bo_gem->validate_index != -1)
    438 		return;
    439 
    440 	/* Extend the array of validation entries as necessary. */
    441 	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
    442 		int new_size = bufmgr_gem->exec_size * 2;
    443 
    444 		if (new_size == 0)
    445 			new_size = 5;
    446 
    447 		bufmgr_gem->exec_objects =
    448 		    realloc(bufmgr_gem->exec_objects,
    449 			    sizeof(*bufmgr_gem->exec_objects) * new_size);
    450 		bufmgr_gem->exec_bos =
    451 		    realloc(bufmgr_gem->exec_bos,
    452 			    sizeof(*bufmgr_gem->exec_bos) * new_size);
    453 		bufmgr_gem->exec_size = new_size;
    454 	}
    455 
    456 	index = bufmgr_gem->exec_count;
    457 	bo_gem->validate_index = index;
    458 	/* Fill in array entry */
    459 	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
    460 	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
    461 	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
    462 	bufmgr_gem->exec_objects[index].alignment = 0;
    463 	bufmgr_gem->exec_objects[index].offset = 0;
    464 	bufmgr_gem->exec_bos[index] = bo;
    465 	bufmgr_gem->exec_count++;
    466 }
    467 
    468 static void
    469 drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
    470 {
    471 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
    472 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
    473 	int index;
    474 
    475 	if (bo_gem->validate_index != -1) {
    476 		if (need_fence)
    477 			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
    478 				EXEC_OBJECT_NEEDS_FENCE;
    479 		return;
    480 	}
    481 
    482 	/* Extend the array of validation entries as necessary. */
    483 	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
    484 		int new_size = bufmgr_gem->exec_size * 2;
    485 
    486 		if (new_size == 0)
    487 			new_size = 5;
    488 
    489 		bufmgr_gem->exec2_objects =
    490 			realloc(bufmgr_gem->exec2_objects,
    491 				sizeof(*bufmgr_gem->exec2_objects) * new_size);
    492 		bufmgr_gem->exec_bos =
    493 			realloc(bufmgr_gem->exec_bos,
    494 				sizeof(*bufmgr_gem->exec_bos) * new_size);
    495 		bufmgr_gem->exec_size = new_size;
    496 	}
    497 
    498 	index = bufmgr_gem->exec_count;
    499 	bo_gem->validate_index = index;
    500 	/* Fill in array entry */
    501 	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
    502 	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
    503 	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
    504 	bufmgr_gem->exec2_objects[index].alignment = 0;
    505 	bufmgr_gem->exec2_objects[index].offset = 0;
    506 	bufmgr_gem->exec_bos[index] = bo;
    507 	bufmgr_gem->exec2_objects[index].flags = 0;
    508 	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
    509 	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
    510 	if (need_fence) {
    511 		bufmgr_gem->exec2_objects[index].flags |=
    512 			EXEC_OBJECT_NEEDS_FENCE;
    513 	}
    514 	bufmgr_gem->exec_count++;
    515 }
    516 
    517 #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
    518 	sizeof(uint32_t))
    519 
    520 static void
    521 drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
    522 				      drm_intel_bo_gem *bo_gem)
    523 {
    524 	int size;
    525 
    526 	assert(!bo_gem->used_as_reloc_target);
    527 
    528 	/* The older chipsets are far-less flexible in terms of tiling,
    529 	 * and require tiled buffer to be size aligned in the aperture.
    530 	 * This means that in the worst possible case we will need a hole
    531 	 * twice as large as the object in order for it to fit into the
    532 	 * aperture. Optimal packing is for wimps.
    533 	 */
    534 	size = bo_gem->bo.size;
    535 	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
    536 		int min_size;
    537 
    538 		if (bufmgr_gem->has_relaxed_fencing) {
    539 			if (bufmgr_gem->gen == 3)
    540 				min_size = 1024*1024;
    541 			else
    542 				min_size = 512*1024;
    543 
    544 			while (min_size < size)
    545 				min_size *= 2;
    546 		} else
    547 			min_size = size;
    548 
    549 		/* Account for worst-case alignment. */
    550 		size = 2 * min_size;
    551 	}
    552 
    553 	bo_gem->reloc_tree_size = size;
    554 }
    555 
    556 static int
    557 drm_intel_setup_reloc_list(drm_intel_bo *bo)
    558 {
    559 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
    560 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
    561 	unsigned int max_relocs = bufmgr_gem->max_relocs;
    562 
    563 	if (bo->size / 4 < max_relocs)
    564 		max_relocs = bo->size / 4;
    565 
    566 	bo_gem->relocs = malloc(max_relocs *
    567 				sizeof(struct drm_i915_gem_relocation_entry));
    568 	bo_gem->reloc_target_info = malloc(max_relocs *
    569 					   sizeof(drm_intel_reloc_target));
    570 	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
    571 		bo_gem->has_error = true;
    572 
    573 		free (bo_gem->relocs);
    574 		bo_gem->relocs = NULL;
    575 
    576 		free (bo_gem->reloc_target_info);
    577 		bo_gem->reloc_target_info = NULL;
    578 
    579 		return 1;
    580 	}
    581 
    582 	return 0;
    583 }
    584 
    585 static int
    586 drm_intel_gem_bo_busy(drm_intel_bo *bo)
    587 {
    588 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
    589 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
    590 	struct drm_i915_gem_busy busy;
    591 	int ret;
    592 
    593 	if (bo_gem->reusable && bo_gem->idle)
    594 		return false;
    595 
    596 	VG_CLEAR(busy);
    597 	busy.handle = bo_gem->gem_handle;
    598 
    599 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
    600 	if (ret == 0) {
    601 		bo_gem->idle = !busy.busy;
    602 		return busy.busy;
    603 	} else {
    604 		return false;
    605 	}
    606 	return (ret == 0 && busy.busy);
    607 }
    608 
    609 static int
    610 drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
    611 				  drm_intel_bo_gem *bo_gem, int state)
    612 {
    613 	struct drm_i915_gem_madvise madv;
    614 
    615 	VG_CLEAR(madv);
    616 	madv.handle = bo_gem->gem_handle;
    617 	madv.madv = state;
    618 	madv.retained = 1;
    619 	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
    620 
    621 	return madv.retained;
    622 }
    623 
    624 static int
    625 drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
    626 {
    627 	return drm_intel_gem_bo_madvise_internal
    628 		((drm_intel_bufmgr_gem *) bo->bufmgr,
    629 		 (drm_intel_bo_gem *) bo,
    630 		 madv);
    631 }
    632 
    633 /* drop the oldest entries that have been purged by the kernel */
    634 static void
    635 drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
    636 				    struct drm_intel_gem_bo_bucket *bucket)
    637 {
    638 	while (!DRMLISTEMPTY(&bucket->head)) {
    639 		drm_intel_bo_gem *bo_gem;
    640 
    641 		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
    642 				      bucket->head.next, head);
    643 		if (drm_intel_gem_bo_madvise_internal
    644 		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
    645 			break;
    646 
    647 		DRMLISTDEL(&bo_gem->head);
    648 		drm_intel_gem_bo_free(&bo_gem->bo);
    649 	}
    650 }
    651 
    652 static drm_intel_bo *
    653 drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
    654 				const char *name,
    655 				unsigned long size,
    656 				unsigned long flags,
    657 				uint32_t tiling_mode,
    658 				unsigned long stride)
    659 {
    660 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
    661 	drm_intel_bo_gem *bo_gem;
    662 	unsigned int page_size = getpagesize();
    663 	int ret;
    664 	struct drm_intel_gem_bo_bucket *bucket;
    665 	bool alloc_from_cache;
    666 	unsigned long bo_size;
    667 	bool for_render = false;
    668 
    669 	if (flags & BO_ALLOC_FOR_RENDER)
    670 		for_render = true;
    671 
    672 	/* Round the allocated size up to a power of two number of pages. */
    673 	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
    674 
    675 	/* If we don't have caching at this size, don't actually round the
    676 	 * allocation up.
    677 	 */
    678 	if (bucket == NULL) {
    679 		bo_size = size;
    680 		if (bo_size < page_size)
    681 			bo_size = page_size;
    682 	} else {
    683 		bo_size = bucket->size;
    684 	}
    685 
    686 	pthread_mutex_lock(&bufmgr_gem->lock);
    687 	/* Get a buffer out of the cache if available */
    688 retry:
    689 	alloc_from_cache = false;
    690 	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
    691 		if (for_render) {
    692 			/* Allocate new render-target BOs from the tail (MRU)
    693 			 * of the list, as it will likely be hot in the GPU
    694 			 * cache and in the aperture for us.
    695 			 */
    696 			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
    697 					      bucket->head.prev, head);
    698 			DRMLISTDEL(&bo_gem->head);
    699 			alloc_from_cache = true;
    700 		} else {
    701 			/* For non-render-target BOs (where we're probably
    702 			 * going to map it first thing in order to fill it
    703 			 * with data), check if the last BO in the cache is
    704 			 * unbusy, and only reuse in that case. Otherwise,
    705 			 * allocating a new buffer is probably faster than
    706 			 * waiting for the GPU to finish.
    707 			 */
    708 			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
    709 					      bucket->head.next, head);
    710 			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
    711 				alloc_from_cache = true;
    712 				DRMLISTDEL(&bo_gem->head);
    713 			}
    714 		}
    715 
    716 		if (alloc_from_cache) {
    717 			if (!drm_intel_gem_bo_madvise_internal
    718 			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
    719 				drm_intel_gem_bo_free(&bo_gem->bo);
    720 				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
    721 								    bucket);
    722 				goto retry;
    723 			}
    724 
    725 			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
    726 								 tiling_mode,
    727 								 stride)) {
    728 				drm_intel_gem_bo_free(&bo_gem->bo);
    729 				goto retry;
    730 			}
    731 		}
    732 	}
    733 	pthread_mutex_unlock(&bufmgr_gem->lock);
    734 
    735 	if (!alloc_from_cache) {
    736 		struct drm_i915_gem_create create;
    737 
    738 		bo_gem = calloc(1, sizeof(*bo_gem));
    739 		if (!bo_gem)
    740 			return NULL;
    741 
    742 		bo_gem->bo.size = bo_size;
    743 
    744 		VG_CLEAR(create);
    745 		create.size = bo_size;
    746 
    747 		ret = drmIoctl(bufmgr_gem->fd,
    748 			       DRM_IOCTL_I915_GEM_CREATE,
    749 			       &create);
    750 		bo_gem->gem_handle = create.handle;
    751 		bo_gem->bo.handle = bo_gem->gem_handle;
    752 		if (ret != 0) {
    753 			free(bo_gem);
    754 			return NULL;
    755 		}
    756 		bo_gem->bo.bufmgr = bufmgr;
    757 
    758 		bo_gem->tiling_mode = I915_TILING_NONE;
    759 		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
    760 		bo_gem->stride = 0;
    761 
    762 		/* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
    763 		   list (vma_list), so better set the list head here */
    764 		DRMINITLISTHEAD(&bo_gem->name_list);
    765 		DRMINITLISTHEAD(&bo_gem->vma_list);
    766 		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
    767 							 tiling_mode,
    768 							 stride)) {
    769 		    drm_intel_gem_bo_free(&bo_gem->bo);
    770 		    return NULL;
    771 		}
    772 	}
    773 
    774 	bo_gem->name = name;
    775 	atomic_set(&bo_gem->refcount, 1);
    776 	bo_gem->validate_index = -1;
    777 	bo_gem->reloc_tree_fences = 0;
    778 	bo_gem->used_as_reloc_target = false;
    779 	bo_gem->has_error = false;
    780 	bo_gem->reusable = true;
    781 	bo_gem->aub_annotations = NULL;
    782 	bo_gem->aub_annotation_count = 0;
    783 
    784 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
    785 
    786 	DBG("bo_create: buf %d (%s) %ldb\n",
    787 	    bo_gem->gem_handle, bo_gem->name, size);
    788 
    789 	return &bo_gem->bo;
    790 }
    791 
    792 static drm_intel_bo *
    793 drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
    794 				  const char *name,
    795 				  unsigned long size,
    796 				  unsigned int alignment)
    797 {
    798 	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
    799 					       BO_ALLOC_FOR_RENDER,
    800 					       I915_TILING_NONE, 0);
    801 }
    802 
    803 static drm_intel_bo *
    804 drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
    805 		       const char *name,
    806 		       unsigned long size,
    807 		       unsigned int alignment)
    808 {
    809 	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
    810 					       I915_TILING_NONE, 0);
    811 }
    812 
    813 static drm_intel_bo *
    814 drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
    815 			     int x, int y, int cpp, uint32_t *tiling_mode,
    816 			     unsigned long *pitch, unsigned long flags)
    817 {
    818 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
    819 	unsigned long size, stride;
    820 	uint32_t tiling;
    821 
    822 	do {
    823 		unsigned long aligned_y, height_alignment;
    824 
    825 		tiling = *tiling_mode;
    826 
    827 		/* If we're tiled, our allocations are in 8 or 32-row blocks,
    828 		 * so failure to align our height means that we won't allocate
    829 		 * enough pages.
    830 		 *
    831 		 * If we're untiled, we still have to align to 2 rows high
    832 		 * because the data port accesses 2x2 blocks even if the
    833 		 * bottom row isn't to be rendered, so failure to align means
    834 		 * we could walk off the end of the GTT and fault.  This is
    835 		 * documented on 965, and may be the case on older chipsets
    836 		 * too so we try to be careful.
    837 		 */
    838 		aligned_y = y;
    839 		height_alignment = 2;
    840 
    841 		if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
    842 			height_alignment = 16;
    843 		else if (tiling == I915_TILING_X
    844 			|| (IS_915(bufmgr_gem->pci_device)
    845 			    && tiling == I915_TILING_Y))
    846 			height_alignment = 8;
    847 		else if (tiling == I915_TILING_Y)
    848 			height_alignment = 32;
    849 		aligned_y = ALIGN(y, height_alignment);
    850 
    851 		stride = x * cpp;
    852 		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
    853 		size = stride * aligned_y;
    854 		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
    855 	} while (*tiling_mode != tiling);
    856 	*pitch = stride;
    857 
    858 	if (tiling == I915_TILING_NONE)
    859 		stride = 0;
    860 
    861 	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
    862 					       tiling, stride);
    863 }
    864 
    865 static drm_intel_bo *
    866 drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
    867 				const char *name,
    868 				void *addr,
    869 				uint32_t tiling_mode,
    870 				uint32_t stride,
    871 				unsigned long size,
    872 				unsigned long flags)
    873 {
    874 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
    875 	drm_intel_bo_gem *bo_gem;
    876 	int ret;
    877 	struct drm_i915_gem_userptr userptr;
    878 
    879 	/* Tiling with userptr surfaces is not supported
    880 	 * on all hardware so refuse it for time being.
    881 	 */
    882 	if (tiling_mode != I915_TILING_NONE)
    883 		return NULL;
    884 
    885 	bo_gem = calloc(1, sizeof(*bo_gem));
    886 	if (!bo_gem)
    887 		return NULL;
    888 
    889 	bo_gem->bo.size = size;
    890 
    891 	VG_CLEAR(userptr);
    892 	userptr.user_ptr = (__u64)((unsigned long)addr);
    893 	userptr.user_size = size;
    894 	userptr.flags = flags;
    895 
    896 	ret = drmIoctl(bufmgr_gem->fd,
    897 			DRM_IOCTL_I915_GEM_USERPTR,
    898 			&userptr);
    899 	if (ret != 0) {
    900 		DBG("bo_create_userptr: "
    901 		    "ioctl failed with user ptr %p size 0x%lx, "
    902 		    "user flags 0x%lx\n", addr, size, flags);
    903 		free(bo_gem);
    904 		return NULL;
    905 	}
    906 
    907 	bo_gem->gem_handle = userptr.handle;
    908 	bo_gem->bo.handle = bo_gem->gem_handle;
    909 	bo_gem->bo.bufmgr    = bufmgr;
    910 	bo_gem->is_userptr   = true;
    911 	bo_gem->bo.virtual   = addr;
    912 	/* Save the address provided by user */
    913 	bo_gem->user_virtual = addr;
    914 	bo_gem->tiling_mode  = I915_TILING_NONE;
    915 	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
    916 	bo_gem->stride       = 0;
    917 
    918 	DRMINITLISTHEAD(&bo_gem->name_list);
    919 	DRMINITLISTHEAD(&bo_gem->vma_list);
    920 
    921 	bo_gem->name = name;
    922 	atomic_set(&bo_gem->refcount, 1);
    923 	bo_gem->validate_index = -1;
    924 	bo_gem->reloc_tree_fences = 0;
    925 	bo_gem->used_as_reloc_target = false;
    926 	bo_gem->has_error = false;
    927 	bo_gem->reusable = false;
    928 
    929 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
    930 
    931 	DBG("bo_create_userptr: "
    932 	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
    933 		addr, bo_gem->gem_handle, bo_gem->name,
    934 		size, stride, tiling_mode);
    935 
    936 	return &bo_gem->bo;
    937 }
    938 
    939 /**
    940  * Returns a drm_intel_bo wrapping the given buffer object handle.
    941  *
    942  * This can be used when one application needs to pass a buffer object
    943  * to another.
    944  */
    945 drm_public drm_intel_bo *
    946 drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
    947 				  const char *name,
    948 				  unsigned int handle)
    949 {
    950 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
    951 	drm_intel_bo_gem *bo_gem;
    952 	int ret;
    953 	struct drm_gem_open open_arg;
    954 	struct drm_i915_gem_get_tiling get_tiling;
    955 	drmMMListHead *list;
    956 
    957 	/* At the moment most applications only have a few named bo.
    958 	 * For instance, in a DRI client only the render buffers passed
    959 	 * between X and the client are named. And since X returns the
    960 	 * alternating names for the front/back buffer a linear search
    961 	 * provides a sufficiently fast match.
    962 	 */
    963 	pthread_mutex_lock(&bufmgr_gem->lock);
    964 	for (list = bufmgr_gem->named.next;
    965 	     list != &bufmgr_gem->named;
    966 	     list = list->next) {
    967 		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
    968 		if (bo_gem->global_name == handle) {
    969 			drm_intel_gem_bo_reference(&bo_gem->bo);
    970 			pthread_mutex_unlock(&bufmgr_gem->lock);
    971 			return &bo_gem->bo;
    972 		}
    973 	}
    974 
    975 	VG_CLEAR(open_arg);
    976 	open_arg.name = handle;
    977 	ret = drmIoctl(bufmgr_gem->fd,
    978 		       DRM_IOCTL_GEM_OPEN,
    979 		       &open_arg);
    980 	if (ret != 0) {
    981 		DBG("Couldn't reference %s handle 0x%08x: %s\n",
    982 		    name, handle, strerror(errno));
    983 		pthread_mutex_unlock(&bufmgr_gem->lock);
    984 		return NULL;
    985 	}
    986         /* Now see if someone has used a prime handle to get this
    987          * object from the kernel before by looking through the list
    988          * again for a matching gem_handle
    989          */
    990 	for (list = bufmgr_gem->named.next;
    991 	     list != &bufmgr_gem->named;
    992 	     list = list->next) {
    993 		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
    994 		if (bo_gem->gem_handle == open_arg.handle) {
    995 			drm_intel_gem_bo_reference(&bo_gem->bo);
    996 			pthread_mutex_unlock(&bufmgr_gem->lock);
    997 			return &bo_gem->bo;
    998 		}
    999 	}
   1000 
   1001 	bo_gem = calloc(1, sizeof(*bo_gem));
   1002 	if (!bo_gem) {
   1003 		pthread_mutex_unlock(&bufmgr_gem->lock);
   1004 		return NULL;
   1005 	}
   1006 
   1007 	bo_gem->bo.size = open_arg.size;
   1008 	bo_gem->bo.offset = 0;
   1009 	bo_gem->bo.offset64 = 0;
   1010 	bo_gem->bo.virtual = NULL;
   1011 	bo_gem->bo.bufmgr = bufmgr;
   1012 	bo_gem->name = name;
   1013 	atomic_set(&bo_gem->refcount, 1);
   1014 	bo_gem->validate_index = -1;
   1015 	bo_gem->gem_handle = open_arg.handle;
   1016 	bo_gem->bo.handle = open_arg.handle;
   1017 	bo_gem->global_name = handle;
   1018 	bo_gem->reusable = false;
   1019 
   1020 	VG_CLEAR(get_tiling);
   1021 	get_tiling.handle = bo_gem->gem_handle;
   1022 	ret = drmIoctl(bufmgr_gem->fd,
   1023 		       DRM_IOCTL_I915_GEM_GET_TILING,
   1024 		       &get_tiling);
   1025 	if (ret != 0) {
   1026 		drm_intel_gem_bo_unreference(&bo_gem->bo);
   1027 		pthread_mutex_unlock(&bufmgr_gem->lock);
   1028 		return NULL;
   1029 	}
   1030 	bo_gem->tiling_mode = get_tiling.tiling_mode;
   1031 	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
   1032 	/* XXX stride is unknown */
   1033 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
   1034 
   1035 	DRMINITLISTHEAD(&bo_gem->vma_list);
   1036 	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
   1037 	pthread_mutex_unlock(&bufmgr_gem->lock);
   1038 	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
   1039 
   1040 	return &bo_gem->bo;
   1041 }
   1042 
   1043 static void
   1044 drm_intel_gem_bo_free(drm_intel_bo *bo)
   1045 {
   1046 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1047 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1048 	struct drm_gem_close close;
   1049 	int ret;
   1050 
   1051 	DRMLISTDEL(&bo_gem->vma_list);
   1052 	if (bo_gem->mem_virtual) {
   1053 		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
   1054 		drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
   1055 		bufmgr_gem->vma_count--;
   1056 	}
   1057 	if (bo_gem->gtt_virtual) {
   1058 		drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
   1059 		bufmgr_gem->vma_count--;
   1060 	}
   1061 
   1062 	/* Close this object */
   1063 	VG_CLEAR(close);
   1064 	close.handle = bo_gem->gem_handle;
   1065 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
   1066 	if (ret != 0) {
   1067 		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
   1068 		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
   1069 	}
   1070 	free(bo_gem->aub_annotations);
   1071 	free(bo);
   1072 }
   1073 
   1074 static void
   1075 drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
   1076 {
   1077 #if HAVE_VALGRIND
   1078 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1079 
   1080 	if (bo_gem->mem_virtual)
   1081 		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
   1082 
   1083 	if (bo_gem->gtt_virtual)
   1084 		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
   1085 #endif
   1086 }
   1087 
   1088 /** Frees all cached buffers significantly older than @time. */
   1089 static void
   1090 drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
   1091 {
   1092 	int i;
   1093 
   1094 	if (bufmgr_gem->time == time)
   1095 		return;
   1096 
   1097 	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
   1098 		struct drm_intel_gem_bo_bucket *bucket =
   1099 		    &bufmgr_gem->cache_bucket[i];
   1100 
   1101 		while (!DRMLISTEMPTY(&bucket->head)) {
   1102 			drm_intel_bo_gem *bo_gem;
   1103 
   1104 			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
   1105 					      bucket->head.next, head);
   1106 			if (time - bo_gem->free_time <= 1)
   1107 				break;
   1108 
   1109 			DRMLISTDEL(&bo_gem->head);
   1110 
   1111 			drm_intel_gem_bo_free(&bo_gem->bo);
   1112 		}
   1113 	}
   1114 
   1115 	bufmgr_gem->time = time;
   1116 }
   1117 
   1118 static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
   1119 {
   1120 	int limit;
   1121 
   1122 	DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
   1123 	    bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
   1124 
   1125 	if (bufmgr_gem->vma_max < 0)
   1126 		return;
   1127 
   1128 	/* We may need to evict a few entries in order to create new mmaps */
   1129 	limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
   1130 	if (limit < 0)
   1131 		limit = 0;
   1132 
   1133 	while (bufmgr_gem->vma_count > limit) {
   1134 		drm_intel_bo_gem *bo_gem;
   1135 
   1136 		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
   1137 				      bufmgr_gem->vma_cache.next,
   1138 				      vma_list);
   1139 		assert(bo_gem->map_count == 0);
   1140 		DRMLISTDELINIT(&bo_gem->vma_list);
   1141 
   1142 		if (bo_gem->mem_virtual) {
   1143 			drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
   1144 			bo_gem->mem_virtual = NULL;
   1145 			bufmgr_gem->vma_count--;
   1146 		}
   1147 		if (bo_gem->gtt_virtual) {
   1148 			drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
   1149 			bo_gem->gtt_virtual = NULL;
   1150 			bufmgr_gem->vma_count--;
   1151 		}
   1152 	}
   1153 }
   1154 
   1155 static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem,
   1156 				       drm_intel_bo_gem *bo_gem)
   1157 {
   1158 	bufmgr_gem->vma_open--;
   1159 	DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
   1160 	if (bo_gem->mem_virtual)
   1161 		bufmgr_gem->vma_count++;
   1162 	if (bo_gem->gtt_virtual)
   1163 		bufmgr_gem->vma_count++;
   1164 	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
   1165 }
   1166 
   1167 static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem,
   1168 				      drm_intel_bo_gem *bo_gem)
   1169 {
   1170 	bufmgr_gem->vma_open++;
   1171 	DRMLISTDEL(&bo_gem->vma_list);
   1172 	if (bo_gem->mem_virtual)
   1173 		bufmgr_gem->vma_count--;
   1174 	if (bo_gem->gtt_virtual)
   1175 		bufmgr_gem->vma_count--;
   1176 	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
   1177 }
   1178 
   1179 static void
   1180 drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
   1181 {
   1182 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1183 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1184 	struct drm_intel_gem_bo_bucket *bucket;
   1185 	int i;
   1186 
   1187 	/* Unreference all the target buffers */
   1188 	for (i = 0; i < bo_gem->reloc_count; i++) {
   1189 		if (bo_gem->reloc_target_info[i].bo != bo) {
   1190 			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
   1191 								  reloc_target_info[i].bo,
   1192 								  time);
   1193 		}
   1194 	}
   1195 	bo_gem->reloc_count = 0;
   1196 	bo_gem->used_as_reloc_target = false;
   1197 
   1198 	DBG("bo_unreference final: %d (%s)\n",
   1199 	    bo_gem->gem_handle, bo_gem->name);
   1200 
   1201 	/* release memory associated with this object */
   1202 	if (bo_gem->reloc_target_info) {
   1203 		free(bo_gem->reloc_target_info);
   1204 		bo_gem->reloc_target_info = NULL;
   1205 	}
   1206 	if (bo_gem->relocs) {
   1207 		free(bo_gem->relocs);
   1208 		bo_gem->relocs = NULL;
   1209 	}
   1210 
   1211 	/* Clear any left-over mappings */
   1212 	if (bo_gem->map_count) {
   1213 		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
   1214 		bo_gem->map_count = 0;
   1215 		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
   1216 		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
   1217 	}
   1218 
   1219 	DRMLISTDEL(&bo_gem->name_list);
   1220 
   1221 	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
   1222 	/* Put the buffer into our internal cache for reuse if we can. */
   1223 	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
   1224 	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
   1225 					      I915_MADV_DONTNEED)) {
   1226 		bo_gem->free_time = time;
   1227 
   1228 		bo_gem->name = NULL;
   1229 		bo_gem->validate_index = -1;
   1230 
   1231 		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
   1232 	} else {
   1233 		drm_intel_gem_bo_free(bo);
   1234 	}
   1235 }
   1236 
   1237 static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
   1238 						      time_t time)
   1239 {
   1240 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1241 
   1242 	assert(atomic_read(&bo_gem->refcount) > 0);
   1243 	if (atomic_dec_and_test(&bo_gem->refcount))
   1244 		drm_intel_gem_bo_unreference_final(bo, time);
   1245 }
   1246 
   1247 static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
   1248 {
   1249 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1250 
   1251 	assert(atomic_read(&bo_gem->refcount) > 0);
   1252 
   1253 	if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
   1254 		drm_intel_bufmgr_gem *bufmgr_gem =
   1255 		    (drm_intel_bufmgr_gem *) bo->bufmgr;
   1256 		struct timespec time;
   1257 
   1258 		clock_gettime(CLOCK_MONOTONIC, &time);
   1259 
   1260 		pthread_mutex_lock(&bufmgr_gem->lock);
   1261 
   1262 		if (atomic_dec_and_test(&bo_gem->refcount)) {
   1263 			drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
   1264 			drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
   1265 		}
   1266 
   1267 		pthread_mutex_unlock(&bufmgr_gem->lock);
   1268 	}
   1269 }
   1270 
   1271 static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
   1272 {
   1273 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1274 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1275 	struct drm_i915_gem_set_domain set_domain;
   1276 	int ret;
   1277 
   1278 	if (bo_gem->is_userptr) {
   1279 		/* Return the same user ptr */
   1280 		bo->virtual = bo_gem->user_virtual;
   1281 		return 0;
   1282 	}
   1283 
   1284 	pthread_mutex_lock(&bufmgr_gem->lock);
   1285 
   1286 	if (bo_gem->map_count++ == 0)
   1287 		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
   1288 
   1289 	if (!bo_gem->mem_virtual) {
   1290 		struct drm_i915_gem_mmap mmap_arg;
   1291 
   1292 		DBG("bo_map: %d (%s), map_count=%d\n",
   1293 		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
   1294 
   1295 		VG_CLEAR(mmap_arg);
   1296 		mmap_arg.handle = bo_gem->gem_handle;
   1297 		mmap_arg.offset = 0;
   1298 		mmap_arg.size = bo->size;
   1299 		ret = drmIoctl(bufmgr_gem->fd,
   1300 			       DRM_IOCTL_I915_GEM_MMAP,
   1301 			       &mmap_arg);
   1302 		if (ret != 0) {
   1303 			ret = -errno;
   1304 			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
   1305 			    __FILE__, __LINE__, bo_gem->gem_handle,
   1306 			    bo_gem->name, strerror(errno));
   1307 			if (--bo_gem->map_count == 0)
   1308 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
   1309 			pthread_mutex_unlock(&bufmgr_gem->lock);
   1310 			return ret;
   1311 		}
   1312 		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
   1313 		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
   1314 	}
   1315 	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
   1316 	    bo_gem->mem_virtual);
   1317 	bo->virtual = bo_gem->mem_virtual;
   1318 
   1319 	VG_CLEAR(set_domain);
   1320 	set_domain.handle = bo_gem->gem_handle;
   1321 	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
   1322 	if (write_enable)
   1323 		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
   1324 	else
   1325 		set_domain.write_domain = 0;
   1326 	ret = drmIoctl(bufmgr_gem->fd,
   1327 		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
   1328 		       &set_domain);
   1329 	if (ret != 0) {
   1330 		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
   1331 		    __FILE__, __LINE__, bo_gem->gem_handle,
   1332 		    strerror(errno));
   1333 	}
   1334 
   1335 	if (write_enable)
   1336 		bo_gem->mapped_cpu_write = true;
   1337 
   1338 	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
   1339 	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
   1340 	pthread_mutex_unlock(&bufmgr_gem->lock);
   1341 
   1342 	return 0;
   1343 }
   1344 
   1345 static int
   1346 map_gtt(drm_intel_bo *bo)
   1347 {
   1348 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1349 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1350 	int ret;
   1351 
   1352 	if (bo_gem->is_userptr)
   1353 		return -EINVAL;
   1354 
   1355 	if (bo_gem->map_count++ == 0)
   1356 		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
   1357 
   1358 	/* Get a mapping of the buffer if we haven't before. */
   1359 	if (bo_gem->gtt_virtual == NULL) {
   1360 		struct drm_i915_gem_mmap_gtt mmap_arg;
   1361 
   1362 		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
   1363 		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
   1364 
   1365 		VG_CLEAR(mmap_arg);
   1366 		mmap_arg.handle = bo_gem->gem_handle;
   1367 
   1368 		/* Get the fake offset back... */
   1369 		ret = drmIoctl(bufmgr_gem->fd,
   1370 			       DRM_IOCTL_I915_GEM_MMAP_GTT,
   1371 			       &mmap_arg);
   1372 		if (ret != 0) {
   1373 			ret = -errno;
   1374 			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
   1375 			    __FILE__, __LINE__,
   1376 			    bo_gem->gem_handle, bo_gem->name,
   1377 			    strerror(errno));
   1378 			if (--bo_gem->map_count == 0)
   1379 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
   1380 			return ret;
   1381 		}
   1382 
   1383 		/* and mmap it */
   1384 		bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
   1385 					       MAP_SHARED, bufmgr_gem->fd,
   1386 					       mmap_arg.offset);
   1387 		if (bo_gem->gtt_virtual == MAP_FAILED) {
   1388 			bo_gem->gtt_virtual = NULL;
   1389 			ret = -errno;
   1390 			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
   1391 			    __FILE__, __LINE__,
   1392 			    bo_gem->gem_handle, bo_gem->name,
   1393 			    strerror(errno));
   1394 			if (--bo_gem->map_count == 0)
   1395 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
   1396 			return ret;
   1397 		}
   1398 	}
   1399 
   1400 	bo->virtual = bo_gem->gtt_virtual;
   1401 
   1402 	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
   1403 	    bo_gem->gtt_virtual);
   1404 
   1405 	return 0;
   1406 }
   1407 
   1408 drm_public int
   1409 drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
   1410 {
   1411 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1412 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1413 	struct drm_i915_gem_set_domain set_domain;
   1414 	int ret;
   1415 
   1416 	pthread_mutex_lock(&bufmgr_gem->lock);
   1417 
   1418 	ret = map_gtt(bo);
   1419 	if (ret) {
   1420 		pthread_mutex_unlock(&bufmgr_gem->lock);
   1421 		return ret;
   1422 	}
   1423 
   1424 	/* Now move it to the GTT domain so that the GPU and CPU
   1425 	 * caches are flushed and the GPU isn't actively using the
   1426 	 * buffer.
   1427 	 *
   1428 	 * The pagefault handler does this domain change for us when
   1429 	 * it has unbound the BO from the GTT, but it's up to us to
   1430 	 * tell it when we're about to use things if we had done
   1431 	 * rendering and it still happens to be bound to the GTT.
   1432 	 */
   1433 	VG_CLEAR(set_domain);
   1434 	set_domain.handle = bo_gem->gem_handle;
   1435 	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
   1436 	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
   1437 	ret = drmIoctl(bufmgr_gem->fd,
   1438 		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
   1439 		       &set_domain);
   1440 	if (ret != 0) {
   1441 		DBG("%s:%d: Error setting domain %d: %s\n",
   1442 		    __FILE__, __LINE__, bo_gem->gem_handle,
   1443 		    strerror(errno));
   1444 	}
   1445 
   1446 	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
   1447 	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
   1448 	pthread_mutex_unlock(&bufmgr_gem->lock);
   1449 
   1450 	return 0;
   1451 }
   1452 
   1453 /**
   1454  * Performs a mapping of the buffer object like the normal GTT
   1455  * mapping, but avoids waiting for the GPU to be done reading from or
   1456  * rendering to the buffer.
   1457  *
   1458  * This is used in the implementation of GL_ARB_map_buffer_range: The
   1459  * user asks to create a buffer, then does a mapping, fills some
   1460  * space, runs a drawing command, then asks to map it again without
   1461  * synchronizing because it guarantees that it won't write over the
   1462  * data that the GPU is busy using (or, more specifically, that if it
   1463  * does write over the data, it acknowledges that rendering is
   1464  * undefined).
   1465  */
   1466 
   1467 drm_public int
   1468 drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
   1469 {
   1470 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1471 #ifdef HAVE_VALGRIND
   1472 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1473 #endif
   1474 	int ret;
   1475 
   1476 	/* If the CPU cache isn't coherent with the GTT, then use a
   1477 	 * regular synchronized mapping.  The problem is that we don't
   1478 	 * track where the buffer was last used on the CPU side in
   1479 	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
   1480 	 * we would potentially corrupt the buffer even when the user
   1481 	 * does reasonable things.
   1482 	 */
   1483 	if (!bufmgr_gem->has_llc)
   1484 		return drm_intel_gem_bo_map_gtt(bo);
   1485 
   1486 	pthread_mutex_lock(&bufmgr_gem->lock);
   1487 
   1488 	ret = map_gtt(bo);
   1489 	if (ret == 0) {
   1490 		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
   1491 		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
   1492 	}
   1493 
   1494 	pthread_mutex_unlock(&bufmgr_gem->lock);
   1495 
   1496 	return ret;
   1497 }
   1498 
   1499 static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
   1500 {
   1501 	drm_intel_bufmgr_gem *bufmgr_gem;
   1502 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1503 	int ret = 0;
   1504 
   1505 	if (bo == NULL)
   1506 		return 0;
   1507 
   1508 	if (bo_gem->is_userptr)
   1509 		return 0;
   1510 
   1511 	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1512 
   1513 	pthread_mutex_lock(&bufmgr_gem->lock);
   1514 
   1515 	if (bo_gem->map_count <= 0) {
   1516 		DBG("attempted to unmap an unmapped bo\n");
   1517 		pthread_mutex_unlock(&bufmgr_gem->lock);
   1518 		/* Preserve the old behaviour of just treating this as a
   1519 		 * no-op rather than reporting the error.
   1520 		 */
   1521 		return 0;
   1522 	}
   1523 
   1524 	if (bo_gem->mapped_cpu_write) {
   1525 		struct drm_i915_gem_sw_finish sw_finish;
   1526 
   1527 		/* Cause a flush to happen if the buffer's pinned for
   1528 		 * scanout, so the results show up in a timely manner.
   1529 		 * Unlike GTT set domains, this only does work if the
   1530 		 * buffer should be scanout-related.
   1531 		 */
   1532 		VG_CLEAR(sw_finish);
   1533 		sw_finish.handle = bo_gem->gem_handle;
   1534 		ret = drmIoctl(bufmgr_gem->fd,
   1535 			       DRM_IOCTL_I915_GEM_SW_FINISH,
   1536 			       &sw_finish);
   1537 		ret = ret == -1 ? -errno : 0;
   1538 
   1539 		bo_gem->mapped_cpu_write = false;
   1540 	}
   1541 
   1542 	/* We need to unmap after every innovation as we cannot track
   1543 	 * an open vma for every bo as that will exhaasut the system
   1544 	 * limits and cause later failures.
   1545 	 */
   1546 	if (--bo_gem->map_count == 0) {
   1547 		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
   1548 		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
   1549 		bo->virtual = NULL;
   1550 	}
   1551 	pthread_mutex_unlock(&bufmgr_gem->lock);
   1552 
   1553 	return ret;
   1554 }
   1555 
   1556 drm_public int
   1557 drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
   1558 {
   1559 	return drm_intel_gem_bo_unmap(bo);
   1560 }
   1561 
   1562 static int
   1563 drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
   1564 			 unsigned long size, const void *data)
   1565 {
   1566 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1567 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1568 	struct drm_i915_gem_pwrite pwrite;
   1569 	int ret;
   1570 
   1571 	if (bo_gem->is_userptr)
   1572 		return -EINVAL;
   1573 
   1574 	VG_CLEAR(pwrite);
   1575 	pwrite.handle = bo_gem->gem_handle;
   1576 	pwrite.offset = offset;
   1577 	pwrite.size = size;
   1578 	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
   1579 	ret = drmIoctl(bufmgr_gem->fd,
   1580 		       DRM_IOCTL_I915_GEM_PWRITE,
   1581 		       &pwrite);
   1582 	if (ret != 0) {
   1583 		ret = -errno;
   1584 		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
   1585 		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
   1586 		    (int)size, strerror(errno));
   1587 	}
   1588 
   1589 	return ret;
   1590 }
   1591 
   1592 static int
   1593 drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
   1594 {
   1595 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
   1596 	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
   1597 	int ret;
   1598 
   1599 	VG_CLEAR(get_pipe_from_crtc_id);
   1600 	get_pipe_from_crtc_id.crtc_id = crtc_id;
   1601 	ret = drmIoctl(bufmgr_gem->fd,
   1602 		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
   1603 		       &get_pipe_from_crtc_id);
   1604 	if (ret != 0) {
   1605 		/* We return -1 here to signal that we don't
   1606 		 * know which pipe is associated with this crtc.
   1607 		 * This lets the caller know that this information
   1608 		 * isn't available; using the wrong pipe for
   1609 		 * vblank waiting can cause the chipset to lock up
   1610 		 */
   1611 		return -1;
   1612 	}
   1613 
   1614 	return get_pipe_from_crtc_id.pipe;
   1615 }
   1616 
   1617 static int
   1618 drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
   1619 			     unsigned long size, void *data)
   1620 {
   1621 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1622 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1623 	struct drm_i915_gem_pread pread;
   1624 	int ret;
   1625 
   1626 	if (bo_gem->is_userptr)
   1627 		return -EINVAL;
   1628 
   1629 	VG_CLEAR(pread);
   1630 	pread.handle = bo_gem->gem_handle;
   1631 	pread.offset = offset;
   1632 	pread.size = size;
   1633 	pread.data_ptr = (uint64_t) (uintptr_t) data;
   1634 	ret = drmIoctl(bufmgr_gem->fd,
   1635 		       DRM_IOCTL_I915_GEM_PREAD,
   1636 		       &pread);
   1637 	if (ret != 0) {
   1638 		ret = -errno;
   1639 		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
   1640 		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
   1641 		    (int)size, strerror(errno));
   1642 	}
   1643 
   1644 	return ret;
   1645 }
   1646 
   1647 /** Waits for all GPU rendering with the object to have completed. */
   1648 static void
   1649 drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
   1650 {
   1651 	drm_intel_gem_bo_start_gtt_access(bo, 1);
   1652 }
   1653 
   1654 /**
   1655  * Waits on a BO for the given amount of time.
   1656  *
   1657  * @bo: buffer object to wait for
   1658  * @timeout_ns: amount of time to wait in nanoseconds.
   1659  *   If value is less than 0, an infinite wait will occur.
   1660  *
   1661  * Returns 0 if the wait was successful ie. the last batch referencing the
   1662  * object has completed within the allotted time. Otherwise some negative return
   1663  * value describes the error. Of particular interest is -ETIME when the wait has
   1664  * failed to yield the desired result.
   1665  *
   1666  * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
   1667  * the operation to give up after a certain amount of time. Another subtle
   1668  * difference is the internal locking semantics are different (this variant does
   1669  * not hold the lock for the duration of the wait). This makes the wait subject
   1670  * to a larger userspace race window.
   1671  *
   1672  * The implementation shall wait until the object is no longer actively
   1673  * referenced within a batch buffer at the time of the call. The wait will
   1674  * not guarantee that the buffer is re-issued via another thread, or an flinked
   1675  * handle. Userspace must make sure this race does not occur if such precision
   1676  * is important.
   1677  */
   1678 drm_public int
   1679 drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
   1680 {
   1681 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1682 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1683 	struct drm_i915_gem_wait wait;
   1684 	int ret;
   1685 
   1686 	if (!bufmgr_gem->has_wait_timeout) {
   1687 		DBG("%s:%d: Timed wait is not supported. Falling back to "
   1688 		    "infinite wait\n", __FILE__, __LINE__);
   1689 		if (timeout_ns) {
   1690 			drm_intel_gem_bo_wait_rendering(bo);
   1691 			return 0;
   1692 		} else {
   1693 			return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
   1694 		}
   1695 	}
   1696 
   1697 	wait.bo_handle = bo_gem->gem_handle;
   1698 	wait.timeout_ns = timeout_ns;
   1699 	wait.flags = 0;
   1700 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
   1701 	if (ret == -1)
   1702 		return -errno;
   1703 
   1704 	return ret;
   1705 }
   1706 
   1707 /**
   1708  * Sets the object to the GTT read and possibly write domain, used by the X
   1709  * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
   1710  *
   1711  * In combination with drm_intel_gem_bo_pin() and manual fence management, we
   1712  * can do tiled pixmaps this way.
   1713  */
   1714 drm_public void
   1715 drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
   1716 {
   1717 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1718 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1719 	struct drm_i915_gem_set_domain set_domain;
   1720 	int ret;
   1721 
   1722 	VG_CLEAR(set_domain);
   1723 	set_domain.handle = bo_gem->gem_handle;
   1724 	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
   1725 	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
   1726 	ret = drmIoctl(bufmgr_gem->fd,
   1727 		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
   1728 		       &set_domain);
   1729 	if (ret != 0) {
   1730 		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
   1731 		    __FILE__, __LINE__, bo_gem->gem_handle,
   1732 		    set_domain.read_domains, set_domain.write_domain,
   1733 		    strerror(errno));
   1734 	}
   1735 }
   1736 
   1737 static void
   1738 drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
   1739 {
   1740 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
   1741 	int i;
   1742 
   1743 	free(bufmgr_gem->exec2_objects);
   1744 	free(bufmgr_gem->exec_objects);
   1745 	free(bufmgr_gem->exec_bos);
   1746 	free(bufmgr_gem->aub_filename);
   1747 
   1748 	pthread_mutex_destroy(&bufmgr_gem->lock);
   1749 
   1750 	/* Free any cached buffer objects we were going to reuse */
   1751 	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
   1752 		struct drm_intel_gem_bo_bucket *bucket =
   1753 		    &bufmgr_gem->cache_bucket[i];
   1754 		drm_intel_bo_gem *bo_gem;
   1755 
   1756 		while (!DRMLISTEMPTY(&bucket->head)) {
   1757 			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
   1758 					      bucket->head.next, head);
   1759 			DRMLISTDEL(&bo_gem->head);
   1760 
   1761 			drm_intel_gem_bo_free(&bo_gem->bo);
   1762 		}
   1763 	}
   1764 
   1765 	free(bufmgr);
   1766 }
   1767 
   1768 /**
   1769  * Adds the target buffer to the validation list and adds the relocation
   1770  * to the reloc_buffer's relocation list.
   1771  *
   1772  * The relocation entry at the given offset must already contain the
   1773  * precomputed relocation value, because the kernel will optimize out
   1774  * the relocation entry write when the buffer hasn't moved from the
   1775  * last known offset in target_bo.
   1776  */
   1777 static int
   1778 do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
   1779 		 drm_intel_bo *target_bo, uint32_t target_offset,
   1780 		 uint32_t read_domains, uint32_t write_domain,
   1781 		 bool need_fence)
   1782 {
   1783 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1784 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1785 	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
   1786 	bool fenced_command;
   1787 
   1788 	if (bo_gem->has_error)
   1789 		return -ENOMEM;
   1790 
   1791 	if (target_bo_gem->has_error) {
   1792 		bo_gem->has_error = true;
   1793 		return -ENOMEM;
   1794 	}
   1795 
   1796 	/* We never use HW fences for rendering on 965+ */
   1797 	if (bufmgr_gem->gen >= 4)
   1798 		need_fence = false;
   1799 
   1800 	fenced_command = need_fence;
   1801 	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
   1802 		need_fence = false;
   1803 
   1804 	/* Create a new relocation list if needed */
   1805 	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
   1806 		return -ENOMEM;
   1807 
   1808 	/* Check overflow */
   1809 	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
   1810 
   1811 	/* Check args */
   1812 	assert(offset <= bo->size - 4);
   1813 	assert((write_domain & (write_domain - 1)) == 0);
   1814 
   1815 	/* An object needing a fence is a tiled buffer, so it won't have
   1816 	 * relocs to other buffers.
   1817 	 */
   1818 	if (need_fence) {
   1819 		assert(target_bo_gem->reloc_count == 0);
   1820 		target_bo_gem->reloc_tree_fences = 1;
   1821 	}
   1822 
   1823 	/* Make sure that we're not adding a reloc to something whose size has
   1824 	 * already been accounted for.
   1825 	 */
   1826 	assert(!bo_gem->used_as_reloc_target);
   1827 	if (target_bo_gem != bo_gem) {
   1828 		target_bo_gem->used_as_reloc_target = true;
   1829 		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
   1830 		bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
   1831 	}
   1832 
   1833 	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
   1834 	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
   1835 	bo_gem->relocs[bo_gem->reloc_count].target_handle =
   1836 	    target_bo_gem->gem_handle;
   1837 	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
   1838 	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
   1839 	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
   1840 
   1841 	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
   1842 	if (target_bo != bo)
   1843 		drm_intel_gem_bo_reference(target_bo);
   1844 	if (fenced_command)
   1845 		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
   1846 			DRM_INTEL_RELOC_FENCE;
   1847 	else
   1848 		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
   1849 
   1850 	bo_gem->reloc_count++;
   1851 
   1852 	return 0;
   1853 }
   1854 
   1855 static int
   1856 drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
   1857 			    drm_intel_bo *target_bo, uint32_t target_offset,
   1858 			    uint32_t read_domains, uint32_t write_domain)
   1859 {
   1860 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
   1861 
   1862 	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
   1863 				read_domains, write_domain,
   1864 				!bufmgr_gem->fenced_relocs);
   1865 }
   1866 
   1867 static int
   1868 drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
   1869 				  drm_intel_bo *target_bo,
   1870 				  uint32_t target_offset,
   1871 				  uint32_t read_domains, uint32_t write_domain)
   1872 {
   1873 	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
   1874 				read_domains, write_domain, true);
   1875 }
   1876 
   1877 drm_public int
   1878 drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
   1879 {
   1880 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1881 
   1882 	return bo_gem->reloc_count;
   1883 }
   1884 
   1885 /**
   1886  * Removes existing relocation entries in the BO after "start".
   1887  *
   1888  * This allows a user to avoid a two-step process for state setup with
   1889  * counting up all the buffer objects and doing a
   1890  * drm_intel_bufmgr_check_aperture_space() before emitting any of the
   1891  * relocations for the state setup.  Instead, save the state of the
   1892  * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
   1893  * state, and then check if it still fits in the aperture.
   1894  *
   1895  * Any further drm_intel_bufmgr_check_aperture_space() queries
   1896  * involving this buffer in the tree are undefined after this call.
   1897  */
   1898 drm_public void
   1899 drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
   1900 {
   1901 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   1902 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1903 	int i;
   1904 	struct timespec time;
   1905 
   1906 	clock_gettime(CLOCK_MONOTONIC, &time);
   1907 
   1908 	assert(bo_gem->reloc_count >= start);
   1909 
   1910 	/* Unreference the cleared target buffers */
   1911 	pthread_mutex_lock(&bufmgr_gem->lock);
   1912 
   1913 	for (i = start; i < bo_gem->reloc_count; i++) {
   1914 		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
   1915 		if (&target_bo_gem->bo != bo) {
   1916 			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
   1917 			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
   1918 								  time.tv_sec);
   1919 		}
   1920 	}
   1921 	bo_gem->reloc_count = start;
   1922 
   1923 	pthread_mutex_unlock(&bufmgr_gem->lock);
   1924 
   1925 }
   1926 
   1927 /**
   1928  * Walk the tree of relocations rooted at BO and accumulate the list of
   1929  * validations to be performed and update the relocation buffers with
   1930  * index values into the validation list.
   1931  */
   1932 static void
   1933 drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
   1934 {
   1935 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1936 	int i;
   1937 
   1938 	if (bo_gem->relocs == NULL)
   1939 		return;
   1940 
   1941 	for (i = 0; i < bo_gem->reloc_count; i++) {
   1942 		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
   1943 
   1944 		if (target_bo == bo)
   1945 			continue;
   1946 
   1947 		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
   1948 
   1949 		/* Continue walking the tree depth-first. */
   1950 		drm_intel_gem_bo_process_reloc(target_bo);
   1951 
   1952 		/* Add the target to the validate list */
   1953 		drm_intel_add_validate_buffer(target_bo);
   1954 	}
   1955 }
   1956 
   1957 static void
   1958 drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
   1959 {
   1960 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
   1961 	int i;
   1962 
   1963 	if (bo_gem->relocs == NULL)
   1964 		return;
   1965 
   1966 	for (i = 0; i < bo_gem->reloc_count; i++) {
   1967 		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
   1968 		int need_fence;
   1969 
   1970 		if (target_bo == bo)
   1971 			continue;
   1972 
   1973 		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
   1974 
   1975 		/* Continue walking the tree depth-first. */
   1976 		drm_intel_gem_bo_process_reloc2(target_bo);
   1977 
   1978 		need_fence = (bo_gem->reloc_target_info[i].flags &
   1979 			      DRM_INTEL_RELOC_FENCE);
   1980 
   1981 		/* Add the target to the validate list */
   1982 		drm_intel_add_validate_buffer2(target_bo, need_fence);
   1983 	}
   1984 }
   1985 
   1986 
   1987 static void
   1988 drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
   1989 {
   1990 	int i;
   1991 
   1992 	for (i = 0; i < bufmgr_gem->exec_count; i++) {
   1993 		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
   1994 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   1995 
   1996 		/* Update the buffer offset */
   1997 		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
   1998 			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
   1999 			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
   2000 			    (unsigned long long)bufmgr_gem->exec_objects[i].
   2001 			    offset);
   2002 			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
   2003 			bo->offset = bufmgr_gem->exec_objects[i].offset;
   2004 		}
   2005 	}
   2006 }
   2007 
   2008 static void
   2009 drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
   2010 {
   2011 	int i;
   2012 
   2013 	for (i = 0; i < bufmgr_gem->exec_count; i++) {
   2014 		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
   2015 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
   2016 
   2017 		/* Update the buffer offset */
   2018 		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
   2019 			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
   2020 			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
   2021 			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
   2022 			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
   2023 			bo->offset = bufmgr_gem->exec2_objects[i].offset;
   2024 		}
   2025 	}
   2026 }
   2027 
   2028 static void
   2029 aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data)
   2030 {
   2031 	fwrite(&data, 1, 4, bufmgr_gem->aub_file);
   2032 }
   2033 
   2034 static void
   2035 aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size)
   2036 {
   2037 	fwrite(data, 1, size, bufmgr_gem->aub_file);
   2038 }
   2039 
   2040 static void
   2041 aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size)
   2042 {
   2043 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   2044 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2045 	uint32_t *data;
   2046 	unsigned int i;
   2047 
   2048 	data = malloc(bo->size);
   2049 	drm_intel_bo_get_subdata(bo, offset, size, data);
   2050 
   2051 	/* Easy mode: write out bo with no relocations */
   2052 	if (!bo_gem->reloc_count) {
   2053 		aub_out_data(bufmgr_gem, data, size);
   2054 		free(data);
   2055 		return;
   2056 	}
   2057 
   2058 	/* Otherwise, handle the relocations while writing. */
   2059 	for (i = 0; i < size / 4; i++) {
   2060 		int r;
   2061 		for (r = 0; r < bo_gem->reloc_count; r++) {
   2062 			struct drm_i915_gem_relocation_entry *reloc;
   2063 			drm_intel_reloc_target *info;
   2064 
   2065 			reloc = &bo_gem->relocs[r];
   2066 			info = &bo_gem->reloc_target_info[r];
   2067 
   2068 			if (reloc->offset == offset + i * 4) {
   2069 				drm_intel_bo_gem *target_gem;
   2070 				uint32_t val;
   2071 
   2072 				target_gem = (drm_intel_bo_gem *)info->bo;
   2073 
   2074 				val = reloc->delta;
   2075 				val += target_gem->aub_offset;
   2076 
   2077 				aub_out(bufmgr_gem, val);
   2078 				data[i] = val;
   2079 				break;
   2080 			}
   2081 		}
   2082 		if (r == bo_gem->reloc_count) {
   2083 			/* no relocation, just the data */
   2084 			aub_out(bufmgr_gem, data[i]);
   2085 		}
   2086 	}
   2087 
   2088 	free(data);
   2089 }
   2090 
   2091 static void
   2092 aub_bo_get_address(drm_intel_bo *bo)
   2093 {
   2094 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   2095 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2096 
   2097 	/* Give the object a graphics address in the AUB file.  We
   2098 	 * don't just use the GEM object address because we do AUB
   2099 	 * dumping before execution -- we want to successfully log
   2100 	 * when the hardware might hang, and we might even want to aub
   2101 	 * capture for a driver trying to execute on a different
   2102 	 * generation of hardware by disabling the actual kernel exec
   2103 	 * call.
   2104 	 */
   2105 	bo_gem->aub_offset = bufmgr_gem->aub_offset;
   2106 	bufmgr_gem->aub_offset += bo->size;
   2107 	/* XXX: Handle aperture overflow. */
   2108 	assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024);
   2109 }
   2110 
   2111 static void
   2112 aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
   2113 		      uint32_t offset, uint32_t size)
   2114 {
   2115 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   2116 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2117 
   2118 	aub_out(bufmgr_gem,
   2119 		CMD_AUB_TRACE_HEADER_BLOCK |
   2120 		((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
   2121 	aub_out(bufmgr_gem,
   2122 		AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE);
   2123 	aub_out(bufmgr_gem, subtype);
   2124 	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
   2125 	aub_out(bufmgr_gem, size);
   2126 	if (bufmgr_gem->gen >= 8)
   2127 		aub_out(bufmgr_gem, 0);
   2128 	aub_write_bo_data(bo, offset, size);
   2129 }
   2130 
   2131 /**
   2132  * Break up large objects into multiple writes.  Otherwise a 128kb VBO
   2133  * would overflow the 16 bits of size field in the packet header and
   2134  * everything goes badly after that.
   2135  */
   2136 static void
   2137 aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
   2138 			    uint32_t offset, uint32_t size)
   2139 {
   2140 	uint32_t block_size;
   2141 	uint32_t sub_offset;
   2142 
   2143 	for (sub_offset = 0; sub_offset < size; sub_offset += block_size) {
   2144 		block_size = size - sub_offset;
   2145 
   2146 		if (block_size > 8 * 4096)
   2147 			block_size = 8 * 4096;
   2148 
   2149 		aub_write_trace_block(bo, type, subtype, offset + sub_offset,
   2150 				      block_size);
   2151 	}
   2152 }
   2153 
   2154 static void
   2155 aub_write_bo(drm_intel_bo *bo)
   2156 {
   2157 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2158 	uint32_t offset = 0;
   2159 	unsigned i;
   2160 
   2161 	aub_bo_get_address(bo);
   2162 
   2163 	/* Write out each annotated section separately. */
   2164 	for (i = 0; i < bo_gem->aub_annotation_count; ++i) {
   2165 		drm_intel_aub_annotation *annotation =
   2166 			&bo_gem->aub_annotations[i];
   2167 		uint32_t ending_offset = annotation->ending_offset;
   2168 		if (ending_offset > bo->size)
   2169 			ending_offset = bo->size;
   2170 		if (ending_offset > offset) {
   2171 			aub_write_large_trace_block(bo, annotation->type,
   2172 						    annotation->subtype,
   2173 						    offset,
   2174 						    ending_offset - offset);
   2175 			offset = ending_offset;
   2176 		}
   2177 	}
   2178 
   2179 	/* Write out any remaining unannotated data */
   2180 	if (offset < bo->size) {
   2181 		aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
   2182 					    offset, bo->size - offset);
   2183 	}
   2184 }
   2185 
   2186 /*
   2187  * Make a ringbuffer on fly and dump it
   2188  */
   2189 static void
   2190 aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
   2191 			  uint32_t batch_buffer, int ring_flag)
   2192 {
   2193 	uint32_t ringbuffer[4096];
   2194 	int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
   2195 	int ring_count = 0;
   2196 
   2197 	if (ring_flag == I915_EXEC_BSD)
   2198 		ring = AUB_TRACE_TYPE_RING_PRB1;
   2199 	else if (ring_flag == I915_EXEC_BLT)
   2200 		ring = AUB_TRACE_TYPE_RING_PRB2;
   2201 
   2202 	/* Make a ring buffer to execute our batchbuffer. */
   2203 	memset(ringbuffer, 0, sizeof(ringbuffer));
   2204 	if (bufmgr_gem->gen >= 8) {
   2205 		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2);
   2206 		ringbuffer[ring_count++] = batch_buffer;
   2207 		ringbuffer[ring_count++] = 0;
   2208 	} else {
   2209 		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
   2210 		ringbuffer[ring_count++] = batch_buffer;
   2211 	}
   2212 
   2213 	/* Write out the ring.  This appears to trigger execution of
   2214 	 * the ring in the simulator.
   2215 	 */
   2216 	aub_out(bufmgr_gem,
   2217 		CMD_AUB_TRACE_HEADER_BLOCK |
   2218 		((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
   2219 	aub_out(bufmgr_gem,
   2220 		AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
   2221 	aub_out(bufmgr_gem, 0); /* general/surface subtype */
   2222 	aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
   2223 	aub_out(bufmgr_gem, ring_count * 4);
   2224 	if (bufmgr_gem->gen >= 8)
   2225 		aub_out(bufmgr_gem, 0);
   2226 
   2227 	/* FIXME: Need some flush operations here? */
   2228 	aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4);
   2229 
   2230 	/* Update offset pointer */
   2231 	bufmgr_gem->aub_offset += 4096;
   2232 }
   2233 
   2234 drm_public void
   2235 drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
   2236 			      int x1, int y1, int width, int height,
   2237 			      enum aub_dump_bmp_format format,
   2238 			      int pitch, int offset)
   2239 {
   2240 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   2241 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
   2242 	uint32_t cpp;
   2243 
   2244 	switch (format) {
   2245 	case AUB_DUMP_BMP_FORMAT_8BIT:
   2246 		cpp = 1;
   2247 		break;
   2248 	case AUB_DUMP_BMP_FORMAT_ARGB_4444:
   2249 		cpp = 2;
   2250 		break;
   2251 	case AUB_DUMP_BMP_FORMAT_ARGB_0888:
   2252 	case AUB_DUMP_BMP_FORMAT_ARGB_8888:
   2253 		cpp = 4;
   2254 		break;
   2255 	default:
   2256 		printf("Unknown AUB dump format %d\n", format);
   2257 		return;
   2258 	}
   2259 
   2260 	if (!bufmgr_gem->aub_file)
   2261 		return;
   2262 
   2263 	aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
   2264 	aub_out(bufmgr_gem, (y1 << 16) | x1);
   2265 	aub_out(bufmgr_gem,
   2266 		(format << 24) |
   2267 		(cpp << 19) |
   2268 		pitch / 4);
   2269 	aub_out(bufmgr_gem, (height << 16) | width);
   2270 	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
   2271 	aub_out(bufmgr_gem,
   2272 		((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) |
   2273 		((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0));
   2274 }
   2275 
   2276 static void
   2277 aub_exec(drm_intel_bo *bo, int ring_flag, int used)
   2278 {
   2279 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   2280 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2281 	int i;
   2282 	bool batch_buffer_needs_annotations;
   2283 
   2284 	if (!bufmgr_gem->aub_file)
   2285 		return;
   2286 
   2287 	/* If batch buffer is not annotated, annotate it the best we
   2288 	 * can.
   2289 	 */
   2290 	batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0;
   2291 	if (batch_buffer_needs_annotations) {
   2292 		drm_intel_aub_annotation annotations[2] = {
   2293 			{ AUB_TRACE_TYPE_BATCH, 0, used },
   2294 			{ AUB_TRACE_TYPE_NOTYPE, 0, bo->size }
   2295 		};
   2296 		drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2);
   2297 	}
   2298 
   2299 	/* Write out all buffers to AUB memory */
   2300 	for (i = 0; i < bufmgr_gem->exec_count; i++) {
   2301 		aub_write_bo(bufmgr_gem->exec_bos[i]);
   2302 	}
   2303 
   2304 	/* Remove any annotations we added */
   2305 	if (batch_buffer_needs_annotations)
   2306 		drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0);
   2307 
   2308 	/* Dump ring buffer */
   2309 	aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag);
   2310 
   2311 	fflush(bufmgr_gem->aub_file);
   2312 
   2313 	/*
   2314 	 * One frame has been dumped. So reset the aub_offset for the next frame.
   2315 	 *
   2316 	 * FIXME: Can we do this?
   2317 	 */
   2318 	bufmgr_gem->aub_offset = 0x10000;
   2319 }
   2320 
   2321 static int
   2322 drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
   2323 		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
   2324 {
   2325 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   2326 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2327 	struct drm_i915_gem_execbuffer execbuf;
   2328 	int ret, i;
   2329 
   2330 	if (bo_gem->has_error)
   2331 		return -ENOMEM;
   2332 
   2333 	pthread_mutex_lock(&bufmgr_gem->lock);
   2334 	/* Update indices and set up the validate list. */
   2335 	drm_intel_gem_bo_process_reloc(bo);
   2336 
   2337 	/* Add the batch buffer to the validation list.  There are no
   2338 	 * relocations pointing to it.
   2339 	 */
   2340 	drm_intel_add_validate_buffer(bo);
   2341 
   2342 	VG_CLEAR(execbuf);
   2343 	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
   2344 	execbuf.buffer_count = bufmgr_gem->exec_count;
   2345 	execbuf.batch_start_offset = 0;
   2346 	execbuf.batch_len = used;
   2347 	execbuf.cliprects_ptr = (uintptr_t) cliprects;
   2348 	execbuf.num_cliprects = num_cliprects;
   2349 	execbuf.DR1 = 0;
   2350 	execbuf.DR4 = DR4;
   2351 
   2352 	ret = drmIoctl(bufmgr_gem->fd,
   2353 		       DRM_IOCTL_I915_GEM_EXECBUFFER,
   2354 		       &execbuf);
   2355 	if (ret != 0) {
   2356 		ret = -errno;
   2357 		if (errno == ENOSPC) {
   2358 			DBG("Execbuffer fails to pin. "
   2359 			    "Estimate: %u. Actual: %u. Available: %u\n",
   2360 			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
   2361 							       bufmgr_gem->
   2362 							       exec_count),
   2363 			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
   2364 							      bufmgr_gem->
   2365 							      exec_count),
   2366 			    (unsigned int)bufmgr_gem->gtt_size);
   2367 		}
   2368 	}
   2369 	drm_intel_update_buffer_offsets(bufmgr_gem);
   2370 
   2371 	if (bufmgr_gem->bufmgr.debug)
   2372 		drm_intel_gem_dump_validation_list(bufmgr_gem);
   2373 
   2374 	for (i = 0; i < bufmgr_gem->exec_count; i++) {
   2375 		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
   2376 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2377 
   2378 		bo_gem->idle = false;
   2379 
   2380 		/* Disconnect the buffer from the validate list */
   2381 		bo_gem->validate_index = -1;
   2382 		bufmgr_gem->exec_bos[i] = NULL;
   2383 	}
   2384 	bufmgr_gem->exec_count = 0;
   2385 	pthread_mutex_unlock(&bufmgr_gem->lock);
   2386 
   2387 	return ret;
   2388 }
   2389 
   2390 static int
   2391 do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
   2392 	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
   2393 	 unsigned int flags)
   2394 {
   2395 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
   2396 	struct drm_i915_gem_execbuffer2 execbuf;
   2397 	int ret = 0;
   2398 	int i;
   2399 
   2400 	switch (flags & 0x7) {
   2401 	default:
   2402 		return -EINVAL;
   2403 	case I915_EXEC_BLT:
   2404 		if (!bufmgr_gem->has_blt)
   2405 			return -EINVAL;
   2406 		break;
   2407 	case I915_EXEC_BSD:
   2408 		if (!bufmgr_gem->has_bsd)
   2409 			return -EINVAL;
   2410 		break;
   2411 	case I915_EXEC_VEBOX:
   2412 		if (!bufmgr_gem->has_vebox)
   2413 			return -EINVAL;
   2414 		break;
   2415 	case I915_EXEC_RENDER:
   2416 	case I915_EXEC_DEFAULT:
   2417 		break;
   2418 	}
   2419 
   2420 	pthread_mutex_lock(&bufmgr_gem->lock);
   2421 	/* Update indices and set up the validate list. */
   2422 	drm_intel_gem_bo_process_reloc2(bo);
   2423 
   2424 	/* Add the batch buffer to the validation list.  There are no relocations
   2425 	 * pointing to it.
   2426 	 */
   2427 	drm_intel_add_validate_buffer2(bo, 0);
   2428 
   2429 	VG_CLEAR(execbuf);
   2430 	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
   2431 	execbuf.buffer_count = bufmgr_gem->exec_count;
   2432 	execbuf.batch_start_offset = 0;
   2433 	execbuf.batch_len = used;
   2434 	execbuf.cliprects_ptr = (uintptr_t)cliprects;
   2435 	execbuf.num_cliprects = num_cliprects;
   2436 	execbuf.DR1 = 0;
   2437 	execbuf.DR4 = DR4;
   2438 	execbuf.flags = flags;
   2439 	if (ctx == NULL)
   2440 		i915_execbuffer2_set_context_id(execbuf, 0);
   2441 	else
   2442 		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
   2443 	execbuf.rsvd2 = 0;
   2444 
   2445 	aub_exec(bo, flags, used);
   2446 
   2447 	if (bufmgr_gem->no_exec)
   2448 		goto skip_execution;
   2449 
   2450 	ret = drmIoctl(bufmgr_gem->fd,
   2451 		       DRM_IOCTL_I915_GEM_EXECBUFFER2,
   2452 		       &execbuf);
   2453 	if (ret != 0) {
   2454 		ret = -errno;
   2455 		if (ret == -ENOSPC) {
   2456 			DBG("Execbuffer fails to pin. "
   2457 			    "Estimate: %u. Actual: %u. Available: %u\n",
   2458 			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
   2459 							       bufmgr_gem->exec_count),
   2460 			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
   2461 							      bufmgr_gem->exec_count),
   2462 			    (unsigned int) bufmgr_gem->gtt_size);
   2463 		}
   2464 	}
   2465 	drm_intel_update_buffer_offsets2(bufmgr_gem);
   2466 
   2467 skip_execution:
   2468 	if (bufmgr_gem->bufmgr.debug)
   2469 		drm_intel_gem_dump_validation_list(bufmgr_gem);
   2470 
   2471 	for (i = 0; i < bufmgr_gem->exec_count; i++) {
   2472 		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
   2473 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
   2474 
   2475 		bo_gem->idle = false;
   2476 
   2477 		/* Disconnect the buffer from the validate list */
   2478 		bo_gem->validate_index = -1;
   2479 		bufmgr_gem->exec_bos[i] = NULL;
   2480 	}
   2481 	bufmgr_gem->exec_count = 0;
   2482 	pthread_mutex_unlock(&bufmgr_gem->lock);
   2483 
   2484 	return ret;
   2485 }
   2486 
   2487 static int
   2488 drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
   2489 		       drm_clip_rect_t *cliprects, int num_cliprects,
   2490 		       int DR4)
   2491 {
   2492 	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
   2493 			I915_EXEC_RENDER);
   2494 }
   2495 
   2496 static int
   2497 drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
   2498 			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
   2499 			unsigned int flags)
   2500 {
   2501 	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
   2502 			flags);
   2503 }
   2504 
   2505 drm_public int
   2506 drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
   2507 			      int used, unsigned int flags)
   2508 {
   2509 	return do_exec2(bo, used, ctx, NULL, 0, 0, flags);
   2510 }
   2511 
   2512 static int
   2513 drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
   2514 {
   2515 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   2516 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2517 	struct drm_i915_gem_pin pin;
   2518 	int ret;
   2519 
   2520 	VG_CLEAR(pin);
   2521 	pin.handle = bo_gem->gem_handle;
   2522 	pin.alignment = alignment;
   2523 
   2524 	ret = drmIoctl(bufmgr_gem->fd,
   2525 		       DRM_IOCTL_I915_GEM_PIN,
   2526 		       &pin);
   2527 	if (ret != 0)
   2528 		return -errno;
   2529 
   2530 	bo->offset64 = pin.offset;
   2531 	bo->offset = pin.offset;
   2532 	return 0;
   2533 }
   2534 
   2535 static int
   2536 drm_intel_gem_bo_unpin(drm_intel_bo *bo)
   2537 {
   2538 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   2539 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2540 	struct drm_i915_gem_unpin unpin;
   2541 	int ret;
   2542 
   2543 	VG_CLEAR(unpin);
   2544 	unpin.handle = bo_gem->gem_handle;
   2545 
   2546 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
   2547 	if (ret != 0)
   2548 		return -errno;
   2549 
   2550 	return 0;
   2551 }
   2552 
   2553 static int
   2554 drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
   2555 				     uint32_t tiling_mode,
   2556 				     uint32_t stride)
   2557 {
   2558 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   2559 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2560 	struct drm_i915_gem_set_tiling set_tiling;
   2561 	int ret;
   2562 
   2563 	if (bo_gem->global_name == 0 &&
   2564 	    tiling_mode == bo_gem->tiling_mode &&
   2565 	    stride == bo_gem->stride)
   2566 		return 0;
   2567 
   2568 	memset(&set_tiling, 0, sizeof(set_tiling));
   2569 	do {
   2570 		/* set_tiling is slightly broken and overwrites the
   2571 		 * input on the error path, so we have to open code
   2572 		 * rmIoctl.
   2573 		 */
   2574 		set_tiling.handle = bo_gem->gem_handle;
   2575 		set_tiling.tiling_mode = tiling_mode;
   2576 		set_tiling.stride = stride;
   2577 
   2578 		ret = ioctl(bufmgr_gem->fd,
   2579 			    DRM_IOCTL_I915_GEM_SET_TILING,
   2580 			    &set_tiling);
   2581 	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
   2582 	if (ret == -1)
   2583 		return -errno;
   2584 
   2585 	bo_gem->tiling_mode = set_tiling.tiling_mode;
   2586 	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
   2587 	bo_gem->stride = set_tiling.stride;
   2588 	return 0;
   2589 }
   2590 
   2591 static int
   2592 drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
   2593 			    uint32_t stride)
   2594 {
   2595 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   2596 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2597 	int ret;
   2598 
   2599 	/* Tiling with userptr surfaces is not supported
   2600 	 * on all hardware so refuse it for time being.
   2601 	 */
   2602 	if (bo_gem->is_userptr)
   2603 		return -EINVAL;
   2604 
   2605 	/* Linear buffers have no stride. By ensuring that we only ever use
   2606 	 * stride 0 with linear buffers, we simplify our code.
   2607 	 */
   2608 	if (*tiling_mode == I915_TILING_NONE)
   2609 		stride = 0;
   2610 
   2611 	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
   2612 	if (ret == 0)
   2613 		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
   2614 
   2615 	*tiling_mode = bo_gem->tiling_mode;
   2616 	return ret;
   2617 }
   2618 
   2619 static int
   2620 drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
   2621 			    uint32_t * swizzle_mode)
   2622 {
   2623 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2624 
   2625 	*tiling_mode = bo_gem->tiling_mode;
   2626 	*swizzle_mode = bo_gem->swizzle_mode;
   2627 	return 0;
   2628 }
   2629 
   2630 drm_public drm_intel_bo *
   2631 drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
   2632 {
   2633 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
   2634 	int ret;
   2635 	uint32_t handle;
   2636 	drm_intel_bo_gem *bo_gem;
   2637 	struct drm_i915_gem_get_tiling get_tiling;
   2638 	drmMMListHead *list;
   2639 
   2640 	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
   2641 
   2642 	/*
   2643 	 * See if the kernel has already returned this buffer to us. Just as
   2644 	 * for named buffers, we must not create two bo's pointing at the same
   2645 	 * kernel object
   2646 	 */
   2647 	pthread_mutex_lock(&bufmgr_gem->lock);
   2648 	for (list = bufmgr_gem->named.next;
   2649 	     list != &bufmgr_gem->named;
   2650 	     list = list->next) {
   2651 		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
   2652 		if (bo_gem->gem_handle == handle) {
   2653 			drm_intel_gem_bo_reference(&bo_gem->bo);
   2654 			pthread_mutex_unlock(&bufmgr_gem->lock);
   2655 			return &bo_gem->bo;
   2656 		}
   2657 	}
   2658 
   2659 	if (ret) {
   2660 	  fprintf(stderr,"ret is %d %d\n", ret, errno);
   2661 	  pthread_mutex_unlock(&bufmgr_gem->lock);
   2662 		return NULL;
   2663 	}
   2664 
   2665 	bo_gem = calloc(1, sizeof(*bo_gem));
   2666 	if (!bo_gem) {
   2667 		pthread_mutex_unlock(&bufmgr_gem->lock);
   2668 		return NULL;
   2669 	}
   2670 	/* Determine size of bo.  The fd-to-handle ioctl really should
   2671 	 * return the size, but it doesn't.  If we have kernel 3.12 or
   2672 	 * later, we can lseek on the prime fd to get the size.  Older
   2673 	 * kernels will just fail, in which case we fall back to the
   2674 	 * provided (estimated or guess size). */
   2675 	ret = lseek(prime_fd, 0, SEEK_END);
   2676 	if (ret != -1)
   2677 		bo_gem->bo.size = ret;
   2678 	else
   2679 		bo_gem->bo.size = size;
   2680 
   2681 	bo_gem->bo.handle = handle;
   2682 	bo_gem->bo.bufmgr = bufmgr;
   2683 
   2684 	bo_gem->gem_handle = handle;
   2685 
   2686 	atomic_set(&bo_gem->refcount, 1);
   2687 
   2688 	bo_gem->name = "prime";
   2689 	bo_gem->validate_index = -1;
   2690 	bo_gem->reloc_tree_fences = 0;
   2691 	bo_gem->used_as_reloc_target = false;
   2692 	bo_gem->has_error = false;
   2693 	bo_gem->reusable = false;
   2694 
   2695 	DRMINITLISTHEAD(&bo_gem->vma_list);
   2696 	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
   2697 	pthread_mutex_unlock(&bufmgr_gem->lock);
   2698 
   2699 	VG_CLEAR(get_tiling);
   2700 	get_tiling.handle = bo_gem->gem_handle;
   2701 	ret = drmIoctl(bufmgr_gem->fd,
   2702 		       DRM_IOCTL_I915_GEM_GET_TILING,
   2703 		       &get_tiling);
   2704 	if (ret != 0) {
   2705 		drm_intel_gem_bo_unreference(&bo_gem->bo);
   2706 		return NULL;
   2707 	}
   2708 	bo_gem->tiling_mode = get_tiling.tiling_mode;
   2709 	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
   2710 	/* XXX stride is unknown */
   2711 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
   2712 
   2713 	return &bo_gem->bo;
   2714 }
   2715 
   2716 drm_public int
   2717 drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
   2718 {
   2719 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   2720 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2721 
   2722 	pthread_mutex_lock(&bufmgr_gem->lock);
   2723         if (DRMLISTEMPTY(&bo_gem->name_list))
   2724                 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
   2725 	pthread_mutex_unlock(&bufmgr_gem->lock);
   2726 
   2727 	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
   2728 			       DRM_CLOEXEC, prime_fd) != 0)
   2729 		return -errno;
   2730 
   2731 	bo_gem->reusable = false;
   2732 
   2733 	return 0;
   2734 }
   2735 
   2736 static int
   2737 drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
   2738 {
   2739 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
   2740 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2741 	int ret;
   2742 
   2743 	if (!bo_gem->global_name) {
   2744 		struct drm_gem_flink flink;
   2745 
   2746 		VG_CLEAR(flink);
   2747 		flink.handle = bo_gem->gem_handle;
   2748 
   2749 		pthread_mutex_lock(&bufmgr_gem->lock);
   2750 
   2751 		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
   2752 		if (ret != 0) {
   2753 			pthread_mutex_unlock(&bufmgr_gem->lock);
   2754 			return -errno;
   2755 		}
   2756 
   2757 		bo_gem->global_name = flink.name;
   2758 		bo_gem->reusable = false;
   2759 
   2760                 if (DRMLISTEMPTY(&bo_gem->name_list))
   2761                         DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
   2762 		pthread_mutex_unlock(&bufmgr_gem->lock);
   2763 	}
   2764 
   2765 	*name = bo_gem->global_name;
   2766 	return 0;
   2767 }
   2768 
   2769 /**
   2770  * Enables unlimited caching of buffer objects for reuse.
   2771  *
   2772  * This is potentially very memory expensive, as the cache at each bucket
   2773  * size is only bounded by how many buffers of that size we've managed to have
   2774  * in flight at once.
   2775  */
   2776 drm_public void
   2777 drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
   2778 {
   2779 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
   2780 
   2781 	bufmgr_gem->bo_reuse = true;
   2782 }
   2783 
   2784 /**
   2785  * Enable use of fenced reloc type.
   2786  *
   2787  * New code should enable this to avoid unnecessary fence register
   2788  * allocation.  If this option is not enabled, all relocs will have fence
   2789  * register allocated.
   2790  */
   2791 drm_public void
   2792 drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
   2793 {
   2794 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
   2795 
   2796 	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
   2797 		bufmgr_gem->fenced_relocs = true;
   2798 }
   2799 
   2800 /**
   2801  * Return the additional aperture space required by the tree of buffer objects
   2802  * rooted at bo.
   2803  */
   2804 static int
   2805 drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
   2806 {
   2807 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2808 	int i;
   2809 	int total = 0;
   2810 
   2811 	if (bo == NULL || bo_gem->included_in_check_aperture)
   2812 		return 0;
   2813 
   2814 	total += bo->size;
   2815 	bo_gem->included_in_check_aperture = true;
   2816 
   2817 	for (i = 0; i < bo_gem->reloc_count; i++)
   2818 		total +=
   2819 		    drm_intel_gem_bo_get_aperture_space(bo_gem->
   2820 							reloc_target_info[i].bo);
   2821 
   2822 	return total;
   2823 }
   2824 
   2825 /**
   2826  * Count the number of buffers in this list that need a fence reg
   2827  *
   2828  * If the count is greater than the number of available regs, we'll have
   2829  * to ask the caller to resubmit a batch with fewer tiled buffers.
   2830  *
   2831  * This function over-counts if the same buffer is used multiple times.
   2832  */
   2833 static unsigned int
   2834 drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
   2835 {
   2836 	int i;
   2837 	unsigned int total = 0;
   2838 
   2839 	for (i = 0; i < count; i++) {
   2840 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
   2841 
   2842 		if (bo_gem == NULL)
   2843 			continue;
   2844 
   2845 		total += bo_gem->reloc_tree_fences;
   2846 	}
   2847 	return total;
   2848 }
   2849 
   2850 /**
   2851  * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
   2852  * for the next drm_intel_bufmgr_check_aperture_space() call.
   2853  */
   2854 static void
   2855 drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
   2856 {
   2857 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2858 	int i;
   2859 
   2860 	if (bo == NULL || !bo_gem->included_in_check_aperture)
   2861 		return;
   2862 
   2863 	bo_gem->included_in_check_aperture = false;
   2864 
   2865 	for (i = 0; i < bo_gem->reloc_count; i++)
   2866 		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
   2867 							   reloc_target_info[i].bo);
   2868 }
   2869 
   2870 /**
   2871  * Return a conservative estimate for the amount of aperture required
   2872  * for a collection of buffers. This may double-count some buffers.
   2873  */
   2874 static unsigned int
   2875 drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
   2876 {
   2877 	int i;
   2878 	unsigned int total = 0;
   2879 
   2880 	for (i = 0; i < count; i++) {
   2881 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
   2882 		if (bo_gem != NULL)
   2883 			total += bo_gem->reloc_tree_size;
   2884 	}
   2885 	return total;
   2886 }
   2887 
   2888 /**
   2889  * Return the amount of aperture needed for a collection of buffers.
   2890  * This avoids double counting any buffers, at the cost of looking
   2891  * at every buffer in the set.
   2892  */
   2893 static unsigned int
   2894 drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
   2895 {
   2896 	int i;
   2897 	unsigned int total = 0;
   2898 
   2899 	for (i = 0; i < count; i++) {
   2900 		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
   2901 		/* For the first buffer object in the array, we get an
   2902 		 * accurate count back for its reloc_tree size (since nothing
   2903 		 * had been flagged as being counted yet).  We can save that
   2904 		 * value out as a more conservative reloc_tree_size that
   2905 		 * avoids double-counting target buffers.  Since the first
   2906 		 * buffer happens to usually be the batch buffer in our
   2907 		 * callers, this can pull us back from doing the tree
   2908 		 * walk on every new batch emit.
   2909 		 */
   2910 		if (i == 0) {
   2911 			drm_intel_bo_gem *bo_gem =
   2912 			    (drm_intel_bo_gem *) bo_array[i];
   2913 			bo_gem->reloc_tree_size = total;
   2914 		}
   2915 	}
   2916 
   2917 	for (i = 0; i < count; i++)
   2918 		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
   2919 	return total;
   2920 }
   2921 
   2922 /**
   2923  * Return -1 if the batchbuffer should be flushed before attempting to
   2924  * emit rendering referencing the buffers pointed to by bo_array.
   2925  *
   2926  * This is required because if we try to emit a batchbuffer with relocations
   2927  * to a tree of buffers that won't simultaneously fit in the aperture,
   2928  * the rendering will return an error at a point where the software is not
   2929  * prepared to recover from it.
   2930  *
   2931  * However, we also want to emit the batchbuffer significantly before we reach
   2932  * the limit, as a series of batchbuffers each of which references buffers
   2933  * covering almost all of the aperture means that at each emit we end up
   2934  * waiting to evict a buffer from the last rendering, and we get synchronous
   2935  * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
   2936  * get better parallelism.
   2937  */
   2938 static int
   2939 drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
   2940 {
   2941 	drm_intel_bufmgr_gem *bufmgr_gem =
   2942 	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
   2943 	unsigned int total = 0;
   2944 	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
   2945 	int total_fences;
   2946 
   2947 	/* Check for fence reg constraints if necessary */
   2948 	if (bufmgr_gem->available_fences) {
   2949 		total_fences = drm_intel_gem_total_fences(bo_array, count);
   2950 		if (total_fences > bufmgr_gem->available_fences)
   2951 			return -ENOSPC;
   2952 	}
   2953 
   2954 	total = drm_intel_gem_estimate_batch_space(bo_array, count);
   2955 
   2956 	if (total > threshold)
   2957 		total = drm_intel_gem_compute_batch_space(bo_array, count);
   2958 
   2959 	if (total > threshold) {
   2960 		DBG("check_space: overflowed available aperture, "
   2961 		    "%dkb vs %dkb\n",
   2962 		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
   2963 		return -ENOSPC;
   2964 	} else {
   2965 		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
   2966 		    (int)bufmgr_gem->gtt_size / 1024);
   2967 		return 0;
   2968 	}
   2969 }
   2970 
   2971 /*
   2972  * Disable buffer reuse for objects which are shared with the kernel
   2973  * as scanout buffers
   2974  */
   2975 static int
   2976 drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
   2977 {
   2978 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2979 
   2980 	bo_gem->reusable = false;
   2981 	return 0;
   2982 }
   2983 
   2984 static int
   2985 drm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
   2986 {
   2987 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2988 
   2989 	return bo_gem->reusable;
   2990 }
   2991 
   2992 static int
   2993 _drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
   2994 {
   2995 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   2996 	int i;
   2997 
   2998 	for (i = 0; i < bo_gem->reloc_count; i++) {
   2999 		if (bo_gem->reloc_target_info[i].bo == target_bo)
   3000 			return 1;
   3001 		if (bo == bo_gem->reloc_target_info[i].bo)
   3002 			continue;
   3003 		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
   3004 						target_bo))
   3005 			return 1;
   3006 	}
   3007 
   3008 	return 0;
   3009 }
   3010 
   3011 /** Return true if target_bo is referenced by bo's relocation tree. */
   3012 static int
   3013 drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
   3014 {
   3015 	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
   3016 
   3017 	if (bo == NULL || target_bo == NULL)
   3018 		return 0;
   3019 	if (target_bo_gem->used_as_reloc_target)
   3020 		return _drm_intel_gem_bo_references(bo, target_bo);
   3021 	return 0;
   3022 }
   3023 
   3024 static void
   3025 add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
   3026 {
   3027 	unsigned int i = bufmgr_gem->num_buckets;
   3028 
   3029 	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
   3030 
   3031 	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
   3032 	bufmgr_gem->cache_bucket[i].size = size;
   3033 	bufmgr_gem->num_buckets++;
   3034 }
   3035 
   3036 static void
   3037 init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
   3038 {
   3039 	unsigned long size, cache_max_size = 64 * 1024 * 1024;
   3040 
   3041 	/* OK, so power of two buckets was too wasteful of memory.
   3042 	 * Give 3 other sizes between each power of two, to hopefully
   3043 	 * cover things accurately enough.  (The alternative is
   3044 	 * probably to just go for exact matching of sizes, and assume
   3045 	 * that for things like composited window resize the tiled
   3046 	 * width/height alignment and rounding of sizes to pages will
   3047 	 * get us useful cache hit rates anyway)
   3048 	 */
   3049 	add_bucket(bufmgr_gem, 4096);
   3050 	add_bucket(bufmgr_gem, 4096 * 2);
   3051 	add_bucket(bufmgr_gem, 4096 * 3);
   3052 
   3053 	/* Initialize the linked lists for BO reuse cache. */
   3054 	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
   3055 		add_bucket(bufmgr_gem, size);
   3056 
   3057 		add_bucket(bufmgr_gem, size + size * 1 / 4);
   3058 		add_bucket(bufmgr_gem, size + size * 2 / 4);
   3059 		add_bucket(bufmgr_gem, size + size * 3 / 4);
   3060 	}
   3061 }
   3062 
   3063 drm_public void
   3064 drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
   3065 {
   3066 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
   3067 
   3068 	bufmgr_gem->vma_max = limit;
   3069 
   3070 	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
   3071 }
   3072 
   3073 /**
   3074  * Get the PCI ID for the device.  This can be overridden by setting the
   3075  * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
   3076  */
   3077 static int
   3078 get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
   3079 {
   3080 	char *devid_override;
   3081 	int devid;
   3082 	int ret;
   3083 	drm_i915_getparam_t gp;
   3084 
   3085 	if (geteuid() == getuid()) {
   3086 		devid_override = getenv("INTEL_DEVID_OVERRIDE");
   3087 		if (devid_override) {
   3088 			bufmgr_gem->no_exec = true;
   3089 			return strtod(devid_override, NULL);
   3090 		}
   3091 	}
   3092 
   3093 	VG_CLEAR(devid);
   3094 	VG_CLEAR(gp);
   3095 	gp.param = I915_PARAM_CHIPSET_ID;
   3096 	gp.value = &devid;
   3097 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
   3098 	if (ret) {
   3099 		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
   3100 		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
   3101 	}
   3102 	return devid;
   3103 }
   3104 
   3105 drm_public int
   3106 drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
   3107 {
   3108 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
   3109 
   3110 	return bufmgr_gem->pci_device;
   3111 }
   3112 
   3113 /**
   3114  * Sets the AUB filename.
   3115  *
   3116  * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump()
   3117  * for it to have any effect.
   3118  */
   3119 drm_public void
   3120 drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr,
   3121 				      const char *filename)
   3122 {
   3123 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
   3124 
   3125 	free(bufmgr_gem->aub_filename);
   3126 	if (filename)
   3127 		bufmgr_gem->aub_filename = strdup(filename);
   3128 }
   3129 
   3130 /**
   3131  * Sets up AUB dumping.
   3132  *
   3133  * This is a trace file format that can be used with the simulator.
   3134  * Packets are emitted in a format somewhat like GPU command packets.
   3135  * You can set up a GTT and upload your objects into the referenced
   3136  * space, then send off batchbuffers and get BMPs out the other end.
   3137  */
   3138 drm_public void
   3139 drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
   3140 {
   3141 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
   3142 	int entry = 0x200003;
   3143 	int i;
   3144 	int gtt_size = 0x10000;
   3145 	const char *filename;
   3146 
   3147 	if (!enable) {
   3148 		if (bufmgr_gem->aub_file) {
   3149 			fclose(bufmgr_gem->aub_file);
   3150 			bufmgr_gem->aub_file = NULL;
   3151 		}
   3152 		return;
   3153 	}
   3154 
   3155 	if (geteuid() != getuid())
   3156 		return;
   3157 
   3158 	if (bufmgr_gem->aub_filename)
   3159 		filename = bufmgr_gem->aub_filename;
   3160 	else
   3161 		filename = "intel.aub";
   3162 	bufmgr_gem->aub_file = fopen(filename, "w+");
   3163 	if (!bufmgr_gem->aub_file)
   3164 		return;
   3165 
   3166 	/* Start allocating objects from just after the GTT. */
   3167 	bufmgr_gem->aub_offset = gtt_size;
   3168 
   3169 	/* Start with a (required) version packet. */
   3170 	aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2));
   3171 	aub_out(bufmgr_gem,
   3172 		(4 << AUB_HEADER_MAJOR_SHIFT) |
   3173 		(0 << AUB_HEADER_MINOR_SHIFT));
   3174 	for (i = 0; i < 8; i++) {
   3175 		aub_out(bufmgr_gem, 0); /* app name */
   3176 	}
   3177 	aub_out(bufmgr_gem, 0); /* timestamp */
   3178 	aub_out(bufmgr_gem, 0); /* timestamp */
   3179 	aub_out(bufmgr_gem, 0); /* comment len */
   3180 
   3181 	/* Set up the GTT. The max we can handle is 256M */
   3182 	aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
   3183 	/* Need to use GTT_ENTRY type for recent emulator */
   3184 	aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_GTT_ENTRY | 0 | AUB_TRACE_OP_DATA_WRITE);
   3185 	aub_out(bufmgr_gem, 0); /* subtype */
   3186 	aub_out(bufmgr_gem, 0); /* offset */
   3187 	aub_out(bufmgr_gem, gtt_size); /* size */
   3188 	if (bufmgr_gem->gen >= 8)
   3189 		aub_out(bufmgr_gem, 0);
   3190 	for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) {
   3191 		aub_out(bufmgr_gem, entry);
   3192 	}
   3193 }
   3194 
   3195 drm_public drm_intel_context *
   3196 drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
   3197 {
   3198 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
   3199 	struct drm_i915_gem_context_create create;
   3200 	drm_intel_context *context = NULL;
   3201 	int ret;
   3202 
   3203 	context = calloc(1, sizeof(*context));
   3204 	if (!context)
   3205 		return NULL;
   3206 
   3207 	VG_CLEAR(create);
   3208 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
   3209 	if (ret != 0) {
   3210 		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
   3211 		    strerror(errno));
   3212 		free(context);
   3213 		return NULL;
   3214 	}
   3215 
   3216 	context->ctx_id = create.ctx_id;
   3217 	context->bufmgr = bufmgr;
   3218 
   3219 	return context;
   3220 }
   3221 
   3222 drm_public void
   3223 drm_intel_gem_context_destroy(drm_intel_context *ctx)
   3224 {
   3225 	drm_intel_bufmgr_gem *bufmgr_gem;
   3226 	struct drm_i915_gem_context_destroy destroy;
   3227 	int ret;
   3228 
   3229 	if (ctx == NULL)
   3230 		return;
   3231 
   3232 	VG_CLEAR(destroy);
   3233 
   3234 	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
   3235 	destroy.ctx_id = ctx->ctx_id;
   3236 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
   3237 		       &destroy);
   3238 	if (ret != 0)
   3239 		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
   3240 			strerror(errno));
   3241 
   3242 	free(ctx);
   3243 }
   3244 
   3245 drm_public int
   3246 drm_intel_get_reset_stats(drm_intel_context *ctx,
   3247 			  uint32_t *reset_count,
   3248 			  uint32_t *active,
   3249 			  uint32_t *pending)
   3250 {
   3251 	drm_intel_bufmgr_gem *bufmgr_gem;
   3252 	struct drm_i915_reset_stats stats;
   3253 	int ret;
   3254 
   3255 	if (ctx == NULL)
   3256 		return -EINVAL;
   3257 
   3258 	memset(&stats, 0, sizeof(stats));
   3259 
   3260 	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
   3261 	stats.ctx_id = ctx->ctx_id;
   3262 	ret = drmIoctl(bufmgr_gem->fd,
   3263 		       DRM_IOCTL_I915_GET_RESET_STATS,
   3264 		       &stats);
   3265 	if (ret == 0) {
   3266 		if (reset_count != NULL)
   3267 			*reset_count = stats.reset_count;
   3268 
   3269 		if (active != NULL)
   3270 			*active = stats.batch_active;
   3271 
   3272 		if (pending != NULL)
   3273 			*pending = stats.batch_pending;
   3274 	}
   3275 
   3276 	return ret;
   3277 }
   3278 
   3279 drm_public int
   3280 drm_intel_reg_read(drm_intel_bufmgr *bufmgr,
   3281 		   uint32_t offset,
   3282 		   uint64_t *result)
   3283 {
   3284 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
   3285 	struct drm_i915_reg_read reg_read;
   3286 	int ret;
   3287 
   3288 	VG_CLEAR(reg_read);
   3289 	reg_read.offset = offset;
   3290 
   3291 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
   3292 
   3293 	*result = reg_read.val;
   3294 	return ret;
   3295 }
   3296 
   3297 
   3298 /**
   3299  * Annotate the given bo for use in aub dumping.
   3300  *
   3301  * \param annotations is an array of drm_intel_aub_annotation objects
   3302  * describing the type of data in various sections of the bo.  Each
   3303  * element of the array specifies the type and subtype of a section of
   3304  * the bo, and the past-the-end offset of that section.  The elements
   3305  * of \c annotations must be sorted so that ending_offset is
   3306  * increasing.
   3307  *
   3308  * \param count is the number of elements in the \c annotations array.
   3309  * If \c count is zero, then \c annotations will not be dereferenced.
   3310  *
   3311  * Annotations are copied into a private data structure, so caller may
   3312  * re-use the memory pointed to by \c annotations after the call
   3313  * returns.
   3314  *
   3315  * Annotations are stored for the lifetime of the bo; to reset to the
   3316  * default state (no annotations), call this function with a \c count
   3317  * of zero.
   3318  */
   3319 drm_public void
   3320 drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
   3321 					 drm_intel_aub_annotation *annotations,
   3322 					 unsigned count)
   3323 {
   3324 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
   3325 	unsigned size = sizeof(*annotations) * count;
   3326 	drm_intel_aub_annotation *new_annotations =
   3327 		count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL;
   3328 	if (new_annotations == NULL) {
   3329 		free(bo_gem->aub_annotations);
   3330 		bo_gem->aub_annotations = NULL;
   3331 		bo_gem->aub_annotation_count = 0;
   3332 		return;
   3333 	}
   3334 	memcpy(new_annotations, annotations, size);
   3335 	bo_gem->aub_annotations = new_annotations;
   3336 	bo_gem->aub_annotation_count = count;
   3337 }
   3338 
   3339 static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
   3340 static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
   3341 
   3342 static drm_intel_bufmgr_gem *
   3343 drm_intel_bufmgr_gem_find(int fd)
   3344 {
   3345 	drm_intel_bufmgr_gem *bufmgr_gem;
   3346 
   3347 	DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
   3348 		if (bufmgr_gem->fd == fd) {
   3349 			atomic_inc(&bufmgr_gem->refcount);
   3350 			return bufmgr_gem;
   3351 		}
   3352 	}
   3353 
   3354 	return NULL;
   3355 }
   3356 
   3357 static void
   3358 drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr)
   3359 {
   3360 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
   3361 
   3362 	if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) {
   3363 		pthread_mutex_lock(&bufmgr_list_mutex);
   3364 
   3365 		if (atomic_dec_and_test(&bufmgr_gem->refcount)) {
   3366 			DRMLISTDEL(&bufmgr_gem->managers);
   3367 			drm_intel_bufmgr_gem_destroy(bufmgr);
   3368 		}
   3369 
   3370 		pthread_mutex_unlock(&bufmgr_list_mutex);
   3371 	}
   3372 }
   3373 
   3374 static bool
   3375 has_userptr(drm_intel_bufmgr_gem *bufmgr_gem)
   3376 {
   3377 	int ret;
   3378 	void *ptr;
   3379 	long pgsz;
   3380 	struct drm_i915_gem_userptr userptr;
   3381 	struct drm_gem_close close_bo;
   3382 
   3383 	pgsz = sysconf(_SC_PAGESIZE);
   3384 	assert(pgsz > 0);
   3385 
   3386 	ret = posix_memalign(&ptr, pgsz, pgsz);
   3387 	if (ret) {
   3388 		DBG("Failed to get a page (%ld) for userptr detection!\n",
   3389 			pgsz);
   3390 		return false;
   3391 	}
   3392 
   3393 	memset(&userptr, 0, sizeof(userptr));
   3394 	userptr.user_ptr = (__u64)(unsigned long)ptr;
   3395 	userptr.user_size = pgsz;
   3396 
   3397 retry:
   3398 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
   3399 	if (ret) {
   3400 		if (errno == ENODEV && userptr.flags == 0) {
   3401 			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
   3402 			goto retry;
   3403 		}
   3404 		free(ptr);
   3405 		return false;
   3406 	}
   3407 
   3408 	close_bo.handle = userptr.handle;
   3409 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
   3410 	free(ptr);
   3411 	if (ret) {
   3412 		fprintf(stderr, "Failed to release test userptr object! (%d) "
   3413 				"i915 kernel driver may not be sane!\n", errno);
   3414 		return false;
   3415 	}
   3416 
   3417 	return true;
   3418 }
   3419 
   3420 /**
   3421  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
   3422  * and manage map buffer objections.
   3423  *
   3424  * \param fd File descriptor of the opened DRM device.
   3425  */
   3426 drm_public drm_intel_bufmgr *
   3427 drm_intel_bufmgr_gem_init(int fd, int batch_size)
   3428 {
   3429 	drm_intel_bufmgr_gem *bufmgr_gem;
   3430 	struct drm_i915_gem_get_aperture aperture;
   3431 	drm_i915_getparam_t gp;
   3432 	int ret, tmp;
   3433 	bool exec2 = false;
   3434 
   3435 	pthread_mutex_lock(&bufmgr_list_mutex);
   3436 
   3437 	bufmgr_gem = drm_intel_bufmgr_gem_find(fd);
   3438 	if (bufmgr_gem)
   3439 		goto exit;
   3440 
   3441 	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
   3442 	if (bufmgr_gem == NULL)
   3443 		goto exit;
   3444 
   3445 	bufmgr_gem->fd = fd;
   3446 	atomic_set(&bufmgr_gem->refcount, 1);
   3447 
   3448 	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
   3449 		free(bufmgr_gem);
   3450 		bufmgr_gem = NULL;
   3451 		goto exit;
   3452 	}
   3453 
   3454 	ret = drmIoctl(bufmgr_gem->fd,
   3455 		       DRM_IOCTL_I915_GEM_GET_APERTURE,
   3456 		       &aperture);
   3457 
   3458 	if (ret == 0)
   3459 		bufmgr_gem->gtt_size = aperture.aper_available_size;
   3460 	else {
   3461 		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
   3462 			strerror(errno));
   3463 		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
   3464 		fprintf(stderr, "Assuming %dkB available aperture size.\n"
   3465 			"May lead to reduced performance or incorrect "
   3466 			"rendering.\n",
   3467 			(int)bufmgr_gem->gtt_size / 1024);
   3468 	}
   3469 
   3470 	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
   3471 
   3472 	if (IS_GEN2(bufmgr_gem->pci_device))
   3473 		bufmgr_gem->gen = 2;
   3474 	else if (IS_GEN3(bufmgr_gem->pci_device))
   3475 		bufmgr_gem->gen = 3;
   3476 	else if (IS_GEN4(bufmgr_gem->pci_device))
   3477 		bufmgr_gem->gen = 4;
   3478 	else if (IS_GEN5(bufmgr_gem->pci_device))
   3479 		bufmgr_gem->gen = 5;
   3480 	else if (IS_GEN6(bufmgr_gem->pci_device))
   3481 		bufmgr_gem->gen = 6;
   3482 	else if (IS_GEN7(bufmgr_gem->pci_device))
   3483 		bufmgr_gem->gen = 7;
   3484 	else if (IS_GEN8(bufmgr_gem->pci_device))
   3485 		bufmgr_gem->gen = 8;
   3486 	else if (IS_GEN9(bufmgr_gem->pci_device))
   3487 		bufmgr_gem->gen = 9;
   3488 	else {
   3489 		free(bufmgr_gem);
   3490 		bufmgr_gem = NULL;
   3491 		goto exit;
   3492 	}
   3493 
   3494 	if (IS_GEN3(bufmgr_gem->pci_device) &&
   3495 	    bufmgr_gem->gtt_size > 256*1024*1024) {
   3496 		/* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
   3497 		 * be used for tiled blits. To simplify the accounting, just
   3498 		 * substract the unmappable part (fixed to 256MB on all known
   3499 		 * gen3 devices) if the kernel advertises it. */
   3500 		bufmgr_gem->gtt_size -= 256*1024*1024;
   3501 	}
   3502 
   3503 	VG_CLEAR(gp);
   3504 	gp.value = &tmp;
   3505 
   3506 	gp.param = I915_PARAM_HAS_EXECBUF2;
   3507 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
   3508 	if (!ret)
   3509 		exec2 = true;
   3510 
   3511 	gp.param = I915_PARAM_HAS_BSD;
   3512 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
   3513 	bufmgr_gem->has_bsd = ret == 0;
   3514 
   3515 	gp.param = I915_PARAM_HAS_BLT;
   3516 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
   3517 	bufmgr_gem->has_blt = ret == 0;
   3518 
   3519 	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
   3520 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
   3521 	bufmgr_gem->has_relaxed_fencing = ret == 0;
   3522 
   3523 	if (has_userptr(bufmgr_gem))
   3524 		bufmgr_gem->bufmgr.bo_alloc_userptr =
   3525 			drm_intel_gem_bo_alloc_userptr;
   3526 
   3527 	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
   3528 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
   3529 	bufmgr_gem->has_wait_timeout = ret == 0;
   3530 
   3531 	gp.param = I915_PARAM_HAS_LLC;
   3532 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
   3533 	if (ret != 0) {
   3534 		/* Kernel does not supports HAS_LLC query, fallback to GPU
   3535 		 * generation detection and assume that we have LLC on GEN6/7
   3536 		 */
   3537 		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
   3538 				IS_GEN7(bufmgr_gem->pci_device));
   3539 	} else
   3540 		bufmgr_gem->has_llc = *gp.value;
   3541 
   3542 	gp.param = I915_PARAM_HAS_VEBOX;
   3543 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
   3544 	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
   3545 
   3546 	if (bufmgr_gem->gen < 4) {
   3547 		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
   3548 		gp.value = &bufmgr_gem->available_fences;
   3549 		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
   3550 		if (ret) {
   3551 			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
   3552 				errno);
   3553 			fprintf(stderr, "param: %d, val: %d\n", gp.param,
   3554 				*gp.value);
   3555 			bufmgr_gem->available_fences = 0;
   3556 		} else {
   3557 			/* XXX The kernel reports the total number of fences,
   3558 			 * including any that may be pinned.
   3559 			 *
   3560 			 * We presume that there will be at least one pinned
   3561 			 * fence for the scanout buffer, but there may be more
   3562 			 * than one scanout and the user may be manually
   3563 			 * pinning buffers. Let's move to execbuffer2 and
   3564 			 * thereby forget the insanity of using fences...
   3565 			 */
   3566 			bufmgr_gem->available_fences -= 2;
   3567 			if (bufmgr_gem->available_fences < 0)
   3568 				bufmgr_gem->available_fences = 0;
   3569 		}
   3570 	}
   3571 
   3572 	/* Let's go with one relocation per every 2 dwords (but round down a bit
   3573 	 * since a power of two will mean an extra page allocation for the reloc
   3574 	 * buffer).
   3575 	 *
   3576 	 * Every 4 was too few for the blender benchmark.
   3577 	 */
   3578 	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
   3579 
   3580 	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
   3581 	bufmgr_gem->bufmgr.bo_alloc_for_render =
   3582 	    drm_intel_gem_bo_alloc_for_render;
   3583 	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
   3584 	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
   3585 	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
   3586 	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
   3587 	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
   3588 	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
   3589 	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
   3590 	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
   3591 	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
   3592 	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
   3593 	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
   3594 	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
   3595 	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
   3596 	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
   3597 	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
   3598 	/* Use the new one if available */
   3599 	if (exec2) {
   3600 		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
   3601 		bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
   3602 	} else
   3603 		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
   3604 	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
   3605 	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
   3606 	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref;
   3607 	bufmgr_gem->bufmgr.debug = 0;
   3608 	bufmgr_gem->bufmgr.check_aperture_space =
   3609 	    drm_intel_gem_check_aperture_space;
   3610 	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
   3611 	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
   3612 	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
   3613 	    drm_intel_gem_get_pipe_from_crtc_id;
   3614 	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
   3615 
   3616 	DRMINITLISTHEAD(&bufmgr_gem->named);
   3617 	init_cache_buckets(bufmgr_gem);
   3618 
   3619 	DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
   3620 	bufmgr_gem->vma_max = -1; /* unlimited by default */
   3621 
   3622 	DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
   3623 
   3624 exit:
   3625 	pthread_mutex_unlock(&bufmgr_list_mutex);
   3626 
   3627 	return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL;
   3628 }
   3629