Home | History | Annotate | Download | only in amdgpu
      1 /*
      2  * Copyright 2014 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22  */
     23 
     24 #ifdef HAVE_CONFIG_H
     25 #include "config.h"
     26 #endif
     27 
     28 #include <stdlib.h>
     29 #include <stdio.h>
     30 #include <string.h>
     31 #include <errno.h>
     32 #include <pthread.h>
     33 #include <sched.h>
     34 #include <sys/ioctl.h>
     35 #ifdef HAVE_ALLOCA_H
     36 # include <alloca.h>
     37 #endif
     38 
     39 #include "xf86drm.h"
     40 #include "amdgpu_drm.h"
     41 #include "amdgpu_internal.h"
     42 
     43 /**
     44  * Create command submission context
     45  *
     46  * \param   dev - \c [in] amdgpu device handle
     47  * \param   context - \c [out] amdgpu context handle
     48  *
     49  * \return  0 on success otherwise POSIX Error code
     50 */
     51 int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
     52 			 amdgpu_context_handle *context)
     53 {
     54 	struct amdgpu_context *gpu_context;
     55 	union drm_amdgpu_ctx args;
     56 	int r;
     57 
     58 	if (NULL == dev)
     59 		return -EINVAL;
     60 	if (NULL == context)
     61 		return -EINVAL;
     62 
     63 	gpu_context = calloc(1, sizeof(struct amdgpu_context));
     64 	if (NULL == gpu_context)
     65 		return -ENOMEM;
     66 
     67 	gpu_context->dev = dev;
     68 
     69 	/* Create the context */
     70 	memset(&args, 0, sizeof(args));
     71 	args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
     72 	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
     73 	if (r)
     74 		goto error;
     75 
     76 	gpu_context->id = args.out.alloc.ctx_id;
     77 	*context = (amdgpu_context_handle)gpu_context;
     78 
     79 	return 0;
     80 
     81 error:
     82 	free(gpu_context);
     83 	return r;
     84 }
     85 
     86 /**
     87  * Release command submission context
     88  *
     89  * \param   dev - \c [in] amdgpu device handle
     90  * \param   context - \c [in] amdgpu context handle
     91  *
     92  * \return  0 on success otherwise POSIX Error code
     93 */
     94 int amdgpu_cs_ctx_free(amdgpu_context_handle context)
     95 {
     96 	union drm_amdgpu_ctx args;
     97 	int r;
     98 
     99 	if (NULL == context)
    100 		return -EINVAL;
    101 
    102 	/* now deal with kernel side */
    103 	memset(&args, 0, sizeof(args));
    104 	args.in.op = AMDGPU_CTX_OP_FREE_CTX;
    105 	args.in.ctx_id = context->id;
    106 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
    107 				&args, sizeof(args));
    108 
    109 	free(context);
    110 
    111 	return r;
    112 }
    113 
    114 int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
    115 				uint32_t *state, uint32_t *hangs)
    116 {
    117 	union drm_amdgpu_ctx args;
    118 	int r;
    119 
    120 	if (!context)
    121 		return -EINVAL;
    122 
    123 	memset(&args, 0, sizeof(args));
    124 	args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
    125 	args.in.ctx_id = context->id;
    126 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
    127 				&args, sizeof(args));
    128 	if (!r) {
    129 		*state = args.out.state.reset_status;
    130 		*hangs = args.out.state.hangs;
    131 	}
    132 	return r;
    133 }
    134 
    135 /**
    136  * Submit command to kernel DRM
    137  * \param   dev - \c [in]  Device handle
    138  * \param   context - \c [in]  GPU Context
    139  * \param   ibs_request - \c [in]  Pointer to submission requests
    140  * \param   fence - \c [out] return fence for this submission
    141  *
    142  * \return  0 on success otherwise POSIX Error code
    143  * \sa amdgpu_cs_submit()
    144 */
    145 static int amdgpu_cs_submit_one(amdgpu_context_handle context,
    146 				struct amdgpu_cs_request *ibs_request)
    147 {
    148 	union drm_amdgpu_cs cs;
    149 	uint64_t *chunk_array;
    150 	struct drm_amdgpu_cs_chunk *chunks;
    151 	struct drm_amdgpu_cs_chunk_data *chunk_data;
    152 	struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
    153 	uint32_t i, size;
    154 	bool user_fence;
    155 	int r = 0;
    156 
    157 	if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
    158 		return -EINVAL;
    159 	if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS)
    160 		return -EINVAL;
    161 	if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
    162 		return -EINVAL;
    163 	user_fence = (ibs_request->fence_info.handle != NULL);
    164 
    165 	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1);
    166 
    167 	chunk_array = alloca(sizeof(uint64_t) * size);
    168 	chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
    169 
    170 	size = ibs_request->number_of_ibs + (user_fence ? 1 : 0);
    171 
    172 	chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
    173 
    174 	memset(&cs, 0, sizeof(cs));
    175 	cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
    176 	cs.in.ctx_id = context->id;
    177 	if (ibs_request->resources)
    178 		cs.in.bo_list_handle = ibs_request->resources->handle;
    179 	cs.in.num_chunks = ibs_request->number_of_ibs;
    180 	/* IB chunks */
    181 	for (i = 0; i < ibs_request->number_of_ibs; i++) {
    182 		struct amdgpu_cs_ib_info *ib;
    183 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
    184 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
    185 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
    186 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
    187 
    188 		ib = &ibs_request->ibs[i];
    189 
    190 		chunk_data[i].ib_data._pad = 0;
    191 		chunk_data[i].ib_data.va_start = ib->ib_mc_address;
    192 		chunk_data[i].ib_data.ib_bytes = ib->size * 4;
    193 		chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
    194 		chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
    195 		chunk_data[i].ib_data.ring = ibs_request->ring;
    196 		chunk_data[i].ib_data.flags = ib->flags;
    197 	}
    198 
    199 	if (user_fence) {
    200 		i = cs.in.num_chunks++;
    201 
    202 		/* fence chunk */
    203 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
    204 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
    205 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
    206 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
    207 
    208 		/* fence bo handle */
    209 		chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle;
    210 		/* offset */
    211 		chunk_data[i].fence_data.offset =
    212 			ibs_request->fence_info.offset * sizeof(uint64_t);
    213 	}
    214 
    215 	if (ibs_request->number_of_dependencies) {
    216 		dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) *
    217 			ibs_request->number_of_dependencies);
    218 		if (!dependencies) {
    219 			r = -ENOMEM;
    220 			goto error_unlock;
    221 		}
    222 
    223 		for (i = 0; i < ibs_request->number_of_dependencies; ++i) {
    224 			struct amdgpu_cs_fence *info = &ibs_request->dependencies[i];
    225 			struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i];
    226 			dep->ip_type = info->ip_type;
    227 			dep->ip_instance = info->ip_instance;
    228 			dep->ring = info->ring;
    229 			dep->ctx_id = info->context->id;
    230 			dep->handle = info->fence;
    231 		}
    232 
    233 		i = cs.in.num_chunks++;
    234 
    235 		/* dependencies chunk */
    236 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
    237 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
    238 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4
    239 			* ibs_request->number_of_dependencies;
    240 		chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
    241 	}
    242 
    243 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
    244 				&cs, sizeof(cs));
    245 	if (r)
    246 		goto error_unlock;
    247 
    248 	ibs_request->seq_no = cs.out.handle;
    249 
    250 error_unlock:
    251 	free(dependencies);
    252 	return r;
    253 }
    254 
    255 int amdgpu_cs_submit(amdgpu_context_handle context,
    256 		     uint64_t flags,
    257 		     struct amdgpu_cs_request *ibs_request,
    258 		     uint32_t number_of_requests)
    259 {
    260 	uint32_t i;
    261 	int r;
    262 
    263 	if (NULL == context)
    264 		return -EINVAL;
    265 	if (NULL == ibs_request)
    266 		return -EINVAL;
    267 
    268 	r = 0;
    269 	for (i = 0; i < number_of_requests; i++) {
    270 		r = amdgpu_cs_submit_one(context, ibs_request);
    271 		if (r)
    272 			break;
    273 		ibs_request++;
    274 	}
    275 
    276 	return r;
    277 }
    278 
    279 /**
    280  * Calculate absolute timeout.
    281  *
    282  * \param   timeout - \c [in] timeout in nanoseconds.
    283  *
    284  * \return  absolute timeout in nanoseconds
    285 */
    286 drm_private uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout)
    287 {
    288 	int r;
    289 
    290 	if (timeout != AMDGPU_TIMEOUT_INFINITE) {
    291 		struct timespec current;
    292 		uint64_t current_ns;
    293 		r = clock_gettime(CLOCK_MONOTONIC, &current);
    294 		if (r) {
    295 			fprintf(stderr, "clock_gettime() returned error (%d)!", errno);
    296 			return AMDGPU_TIMEOUT_INFINITE;
    297 		}
    298 
    299 		current_ns = ((uint64_t)current.tv_sec) * 1000000000ull;
    300 		current_ns += current.tv_nsec;
    301 		timeout += current_ns;
    302 		if (timeout < current_ns)
    303 			timeout = AMDGPU_TIMEOUT_INFINITE;
    304 	}
    305 	return timeout;
    306 }
    307 
    308 static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
    309 				unsigned ip,
    310 				unsigned ip_instance,
    311 				uint32_t ring,
    312 				uint64_t handle,
    313 				uint64_t timeout_ns,
    314 				uint64_t flags,
    315 				bool *busy)
    316 {
    317 	amdgpu_device_handle dev = context->dev;
    318 	union drm_amdgpu_wait_cs args;
    319 	int r;
    320 
    321 	memset(&args, 0, sizeof(args));
    322 	args.in.handle = handle;
    323 	args.in.ip_type = ip;
    324 	args.in.ip_instance = ip_instance;
    325 	args.in.ring = ring;
    326 	args.in.ctx_id = context->id;
    327 
    328 	if (flags & AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE)
    329 		args.in.timeout = timeout_ns;
    330 	else
    331 		args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
    332 
    333 	r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args);
    334 	if (r)
    335 		return -errno;
    336 
    337 	*busy = args.out.status;
    338 	return 0;
    339 }
    340 
    341 int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
    342 				 uint64_t timeout_ns,
    343 				 uint64_t flags,
    344 				 uint32_t *expired)
    345 {
    346 	bool busy = true;
    347 	int r;
    348 
    349 	if (NULL == fence)
    350 		return -EINVAL;
    351 	if (NULL == expired)
    352 		return -EINVAL;
    353 	if (NULL == fence->context)
    354 		return -EINVAL;
    355 	if (fence->ip_type >= AMDGPU_HW_IP_NUM)
    356 		return -EINVAL;
    357 	if (fence->ring >= AMDGPU_CS_MAX_RINGS)
    358 		return -EINVAL;
    359 
    360 	*expired = false;
    361 
    362 	r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type,
    363 				fence->ip_instance, fence->ring,
    364 			       	fence->fence, timeout_ns, flags, &busy);
    365 
    366 	if (!r && !busy)
    367 		*expired = true;
    368 
    369 	return r;
    370 }
    371 
    372