Home | History | Annotate | Download | only in amdgpu
      1 /*
      2  * Copyright 2014 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22  */
     23 
     24 #ifdef HAVE_CONFIG_H
     25 #include "config.h"
     26 #endif
     27 
     28 #include <stdlib.h>
     29 #include <stdio.h>
     30 #include <string.h>
     31 #include <errno.h>
     32 #include <pthread.h>
     33 #include <sched.h>
     34 #include <sys/ioctl.h>
     35 #ifdef HAVE_ALLOCA_H
     36 # include <alloca.h>
     37 #endif
     38 
     39 #include "xf86drm.h"
     40 #include "amdgpu_drm.h"
     41 #include "amdgpu_internal.h"
     42 
     43 static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem);
     44 static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem);
     45 
     46 /**
     47  * Create command submission context
     48  *
     49  * \param   dev - \c [in] amdgpu device handle
     50  * \param   context - \c [out] amdgpu context handle
     51  *
     52  * \return  0 on success otherwise POSIX Error code
     53 */
     54 int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
     55 			 amdgpu_context_handle *context)
     56 {
     57 	struct amdgpu_context *gpu_context;
     58 	union drm_amdgpu_ctx args;
     59 	int i, j, k;
     60 	int r;
     61 
     62 	if (NULL == dev)
     63 		return -EINVAL;
     64 	if (NULL == context)
     65 		return -EINVAL;
     66 
     67 	gpu_context = calloc(1, sizeof(struct amdgpu_context));
     68 	if (NULL == gpu_context)
     69 		return -ENOMEM;
     70 
     71 	gpu_context->dev = dev;
     72 
     73 	r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
     74 	if (r)
     75 		goto error;
     76 
     77 	/* Create the context */
     78 	memset(&args, 0, sizeof(args));
     79 	args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
     80 	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
     81 	if (r)
     82 		goto error;
     83 
     84 	gpu_context->id = args.out.alloc.ctx_id;
     85 	for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
     86 		for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++)
     87 			for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++)
     88 				list_inithead(&gpu_context->sem_list[i][j][k]);
     89 	*context = (amdgpu_context_handle)gpu_context;
     90 
     91 	return 0;
     92 
     93 error:
     94 	pthread_mutex_destroy(&gpu_context->sequence_mutex);
     95 	free(gpu_context);
     96 	return r;
     97 }
     98 
     99 /**
    100  * Release command submission context
    101  *
    102  * \param   dev - \c [in] amdgpu device handle
    103  * \param   context - \c [in] amdgpu context handle
    104  *
    105  * \return  0 on success otherwise POSIX Error code
    106 */
    107 int amdgpu_cs_ctx_free(amdgpu_context_handle context)
    108 {
    109 	union drm_amdgpu_ctx args;
    110 	int i, j, k;
    111 	int r;
    112 
    113 	if (NULL == context)
    114 		return -EINVAL;
    115 
    116 	pthread_mutex_destroy(&context->sequence_mutex);
    117 
    118 	/* now deal with kernel side */
    119 	memset(&args, 0, sizeof(args));
    120 	args.in.op = AMDGPU_CTX_OP_FREE_CTX;
    121 	args.in.ctx_id = context->id;
    122 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
    123 				&args, sizeof(args));
    124 	for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
    125 		for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++) {
    126 			for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++) {
    127 				amdgpu_semaphore_handle sem;
    128 				LIST_FOR_EACH_ENTRY(sem, &context->sem_list[i][j][k], list) {
    129 					list_del(&sem->list);
    130 					amdgpu_cs_reset_sem(sem);
    131 					amdgpu_cs_unreference_sem(sem);
    132 				}
    133 			}
    134 		}
    135 	}
    136 	free(context);
    137 
    138 	return r;
    139 }
    140 
    141 int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
    142 				uint32_t *state, uint32_t *hangs)
    143 {
    144 	union drm_amdgpu_ctx args;
    145 	int r;
    146 
    147 	if (!context)
    148 		return -EINVAL;
    149 
    150 	memset(&args, 0, sizeof(args));
    151 	args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
    152 	args.in.ctx_id = context->id;
    153 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
    154 				&args, sizeof(args));
    155 	if (!r) {
    156 		*state = args.out.state.reset_status;
    157 		*hangs = args.out.state.hangs;
    158 	}
    159 	return r;
    160 }
    161 
    162 /**
    163  * Submit command to kernel DRM
    164  * \param   dev - \c [in]  Device handle
    165  * \param   context - \c [in]  GPU Context
    166  * \param   ibs_request - \c [in]  Pointer to submission requests
    167  * \param   fence - \c [out] return fence for this submission
    168  *
    169  * \return  0 on success otherwise POSIX Error code
    170  * \sa amdgpu_cs_submit()
    171 */
    172 static int amdgpu_cs_submit_one(amdgpu_context_handle context,
    173 				struct amdgpu_cs_request *ibs_request)
    174 {
    175 	union drm_amdgpu_cs cs;
    176 	uint64_t *chunk_array;
    177 	struct drm_amdgpu_cs_chunk *chunks;
    178 	struct drm_amdgpu_cs_chunk_data *chunk_data;
    179 	struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
    180 	struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
    181 	struct list_head *sem_list;
    182 	amdgpu_semaphore_handle sem, tmp;
    183 	uint32_t i, size, sem_count = 0;
    184 	bool user_fence;
    185 	int r = 0;
    186 
    187 	if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
    188 		return -EINVAL;
    189 	if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS)
    190 		return -EINVAL;
    191 	if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
    192 		return -EINVAL;
    193 	if (ibs_request->number_of_ibs == 0) {
    194 		ibs_request->seq_no = AMDGPU_NULL_SUBMIT_SEQ;
    195 		return 0;
    196 	}
    197 	user_fence = (ibs_request->fence_info.handle != NULL);
    198 
    199 	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
    200 
    201 	chunk_array = alloca(sizeof(uint64_t) * size);
    202 	chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
    203 
    204 	size = ibs_request->number_of_ibs + (user_fence ? 1 : 0);
    205 
    206 	chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
    207 
    208 	memset(&cs, 0, sizeof(cs));
    209 	cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
    210 	cs.in.ctx_id = context->id;
    211 	if (ibs_request->resources)
    212 		cs.in.bo_list_handle = ibs_request->resources->handle;
    213 	cs.in.num_chunks = ibs_request->number_of_ibs;
    214 	/* IB chunks */
    215 	for (i = 0; i < ibs_request->number_of_ibs; i++) {
    216 		struct amdgpu_cs_ib_info *ib;
    217 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
    218 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
    219 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
    220 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
    221 
    222 		ib = &ibs_request->ibs[i];
    223 
    224 		chunk_data[i].ib_data._pad = 0;
    225 		chunk_data[i].ib_data.va_start = ib->ib_mc_address;
    226 		chunk_data[i].ib_data.ib_bytes = ib->size * 4;
    227 		chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
    228 		chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
    229 		chunk_data[i].ib_data.ring = ibs_request->ring;
    230 		chunk_data[i].ib_data.flags = ib->flags;
    231 	}
    232 
    233 	pthread_mutex_lock(&context->sequence_mutex);
    234 
    235 	if (user_fence) {
    236 		i = cs.in.num_chunks++;
    237 
    238 		/* fence chunk */
    239 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
    240 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
    241 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
    242 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
    243 
    244 		/* fence bo handle */
    245 		chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle;
    246 		/* offset */
    247 		chunk_data[i].fence_data.offset =
    248 			ibs_request->fence_info.offset * sizeof(uint64_t);
    249 	}
    250 
    251 	if (ibs_request->number_of_dependencies) {
    252 		dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) *
    253 			ibs_request->number_of_dependencies);
    254 		if (!dependencies) {
    255 			r = -ENOMEM;
    256 			goto error_unlock;
    257 		}
    258 
    259 		for (i = 0; i < ibs_request->number_of_dependencies; ++i) {
    260 			struct amdgpu_cs_fence *info = &ibs_request->dependencies[i];
    261 			struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i];
    262 			dep->ip_type = info->ip_type;
    263 			dep->ip_instance = info->ip_instance;
    264 			dep->ring = info->ring;
    265 			dep->ctx_id = info->context->id;
    266 			dep->handle = info->fence;
    267 		}
    268 
    269 		i = cs.in.num_chunks++;
    270 
    271 		/* dependencies chunk */
    272 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
    273 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
    274 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4
    275 			* ibs_request->number_of_dependencies;
    276 		chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
    277 	}
    278 
    279 	sem_list = &context->sem_list[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring];
    280 	LIST_FOR_EACH_ENTRY(sem, sem_list, list)
    281 		sem_count++;
    282 	if (sem_count) {
    283 		sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_count);
    284 		if (!sem_dependencies) {
    285 			r = -ENOMEM;
    286 			goto error_unlock;
    287 		}
    288 		sem_count = 0;
    289 		LIST_FOR_EACH_ENTRY_SAFE(sem, tmp, sem_list, list) {
    290 			struct amdgpu_cs_fence *info = &sem->signal_fence;
    291 			struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++];
    292 			dep->ip_type = info->ip_type;
    293 			dep->ip_instance = info->ip_instance;
    294 			dep->ring = info->ring;
    295 			dep->ctx_id = info->context->id;
    296 			dep->handle = info->fence;
    297 
    298 			list_del(&sem->list);
    299 			amdgpu_cs_reset_sem(sem);
    300 			amdgpu_cs_unreference_sem(sem);
    301 		}
    302 		i = cs.in.num_chunks++;
    303 
    304 		/* dependencies chunk */
    305 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
    306 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
    307 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count;
    308 		chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
    309 	}
    310 
    311 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
    312 				&cs, sizeof(cs));
    313 	if (r)
    314 		goto error_unlock;
    315 
    316 	ibs_request->seq_no = cs.out.handle;
    317 	context->last_seq[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring] = ibs_request->seq_no;
    318 error_unlock:
    319 	pthread_mutex_unlock(&context->sequence_mutex);
    320 	free(dependencies);
    321 	free(sem_dependencies);
    322 	return r;
    323 }
    324 
    325 int amdgpu_cs_submit(amdgpu_context_handle context,
    326 		     uint64_t flags,
    327 		     struct amdgpu_cs_request *ibs_request,
    328 		     uint32_t number_of_requests)
    329 {
    330 	uint32_t i;
    331 	int r;
    332 
    333 	if (NULL == context)
    334 		return -EINVAL;
    335 	if (NULL == ibs_request)
    336 		return -EINVAL;
    337 
    338 	r = 0;
    339 	for (i = 0; i < number_of_requests; i++) {
    340 		r = amdgpu_cs_submit_one(context, ibs_request);
    341 		if (r)
    342 			break;
    343 		ibs_request++;
    344 	}
    345 
    346 	return r;
    347 }
    348 
    349 /**
    350  * Calculate absolute timeout.
    351  *
    352  * \param   timeout - \c [in] timeout in nanoseconds.
    353  *
    354  * \return  absolute timeout in nanoseconds
    355 */
    356 drm_private uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout)
    357 {
    358 	int r;
    359 
    360 	if (timeout != AMDGPU_TIMEOUT_INFINITE) {
    361 		struct timespec current;
    362 		uint64_t current_ns;
    363 		r = clock_gettime(CLOCK_MONOTONIC, &current);
    364 		if (r) {
    365 			fprintf(stderr, "clock_gettime() returned error (%d)!", errno);
    366 			return AMDGPU_TIMEOUT_INFINITE;
    367 		}
    368 
    369 		current_ns = ((uint64_t)current.tv_sec) * 1000000000ull;
    370 		current_ns += current.tv_nsec;
    371 		timeout += current_ns;
    372 		if (timeout < current_ns)
    373 			timeout = AMDGPU_TIMEOUT_INFINITE;
    374 	}
    375 	return timeout;
    376 }
    377 
    378 static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
    379 				unsigned ip,
    380 				unsigned ip_instance,
    381 				uint32_t ring,
    382 				uint64_t handle,
    383 				uint64_t timeout_ns,
    384 				uint64_t flags,
    385 				bool *busy)
    386 {
    387 	amdgpu_device_handle dev = context->dev;
    388 	union drm_amdgpu_wait_cs args;
    389 	int r;
    390 
    391 	memset(&args, 0, sizeof(args));
    392 	args.in.handle = handle;
    393 	args.in.ip_type = ip;
    394 	args.in.ip_instance = ip_instance;
    395 	args.in.ring = ring;
    396 	args.in.ctx_id = context->id;
    397 
    398 	if (flags & AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE)
    399 		args.in.timeout = timeout_ns;
    400 	else
    401 		args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
    402 
    403 	r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args);
    404 	if (r)
    405 		return -errno;
    406 
    407 	*busy = args.out.status;
    408 	return 0;
    409 }
    410 
    411 int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
    412 				 uint64_t timeout_ns,
    413 				 uint64_t flags,
    414 				 uint32_t *expired)
    415 {
    416 	bool busy = true;
    417 	int r;
    418 
    419 	if (NULL == fence)
    420 		return -EINVAL;
    421 	if (NULL == expired)
    422 		return -EINVAL;
    423 	if (NULL == fence->context)
    424 		return -EINVAL;
    425 	if (fence->ip_type >= AMDGPU_HW_IP_NUM)
    426 		return -EINVAL;
    427 	if (fence->ring >= AMDGPU_CS_MAX_RINGS)
    428 		return -EINVAL;
    429 	if (fence->fence == AMDGPU_NULL_SUBMIT_SEQ) {
    430 		*expired = true;
    431 		return 0;
    432 	}
    433 
    434 	*expired = false;
    435 
    436 	r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type,
    437 				fence->ip_instance, fence->ring,
    438 			       	fence->fence, timeout_ns, flags, &busy);
    439 
    440 	if (!r && !busy)
    441 		*expired = true;
    442 
    443 	return r;
    444 }
    445 
    446 int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem)
    447 {
    448 	struct amdgpu_semaphore *gpu_semaphore;
    449 
    450 	if (NULL == sem)
    451 		return -EINVAL;
    452 
    453 	gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore));
    454 	if (NULL == gpu_semaphore)
    455 		return -ENOMEM;
    456 
    457 	atomic_set(&gpu_semaphore->refcount, 1);
    458 	*sem = gpu_semaphore;
    459 
    460 	return 0;
    461 }
    462 
    463 int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
    464 			       uint32_t ip_type,
    465 			       uint32_t ip_instance,
    466 			       uint32_t ring,
    467 			       amdgpu_semaphore_handle sem)
    468 {
    469 	if (NULL == ctx)
    470 		return -EINVAL;
    471 	if (ip_type >= AMDGPU_HW_IP_NUM)
    472 		return -EINVAL;
    473 	if (ring >= AMDGPU_CS_MAX_RINGS)
    474 		return -EINVAL;
    475 	if (NULL == sem)
    476 		return -EINVAL;
    477 	/* sem has been signaled */
    478 	if (sem->signal_fence.context)
    479 		return -EINVAL;
    480 	pthread_mutex_lock(&ctx->sequence_mutex);
    481 	sem->signal_fence.context = ctx;
    482 	sem->signal_fence.ip_type = ip_type;
    483 	sem->signal_fence.ip_instance = ip_instance;
    484 	sem->signal_fence.ring = ring;
    485 	sem->signal_fence.fence = ctx->last_seq[ip_type][ip_instance][ring];
    486 	update_references(NULL, &sem->refcount);
    487 	pthread_mutex_unlock(&ctx->sequence_mutex);
    488 	return 0;
    489 }
    490 
    491 int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
    492 			     uint32_t ip_type,
    493 			     uint32_t ip_instance,
    494 			     uint32_t ring,
    495 			     amdgpu_semaphore_handle sem)
    496 {
    497 	if (NULL == ctx)
    498 		return -EINVAL;
    499 	if (ip_type >= AMDGPU_HW_IP_NUM)
    500 		return -EINVAL;
    501 	if (ring >= AMDGPU_CS_MAX_RINGS)
    502 		return -EINVAL;
    503 	if (NULL == sem)
    504 		return -EINVAL;
    505 	/* must signal first */
    506 	if (NULL == sem->signal_fence.context)
    507 		return -EINVAL;
    508 
    509 	pthread_mutex_lock(&ctx->sequence_mutex);
    510 	list_add(&sem->list, &ctx->sem_list[ip_type][ip_instance][ring]);
    511 	pthread_mutex_unlock(&ctx->sequence_mutex);
    512 	return 0;
    513 }
    514 
    515 static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)
    516 {
    517 	if (NULL == sem)
    518 		return -EINVAL;
    519 	if (NULL == sem->signal_fence.context)
    520 		return -EINVAL;
    521 
    522 	sem->signal_fence.context = NULL;;
    523 	sem->signal_fence.ip_type = 0;
    524 	sem->signal_fence.ip_instance = 0;
    525 	sem->signal_fence.ring = 0;
    526 	sem->signal_fence.fence = 0;
    527 
    528 	return 0;
    529 }
    530 
    531 static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)
    532 {
    533 	if (NULL == sem)
    534 		return -EINVAL;
    535 
    536 	if (update_references(&sem->refcount, NULL))
    537 		free(sem);
    538 	return 0;
    539 }
    540 
    541 int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
    542 {
    543 	return amdgpu_cs_unreference_sem(sem);
    544 }
    545