1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #ifdef HAVE_CONFIG_H 25 #include "config.h" 26 #endif 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 #include <string.h> 31 #include <errno.h> 32 #include <pthread.h> 33 #include <sched.h> 34 #include <sys/ioctl.h> 35 #ifdef HAVE_ALLOCA_H 36 # include <alloca.h> 37 #endif 38 39 #include "xf86drm.h" 40 #include "amdgpu_drm.h" 41 #include "amdgpu_internal.h" 42 43 static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem); 44 static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem); 45 46 /** 47 * Create command submission context 48 * 49 * \param dev - \c [in] amdgpu device handle 50 * \param context - \c [out] amdgpu context handle 51 * 52 * \return 0 on success otherwise POSIX Error code 53 */ 54 int amdgpu_cs_ctx_create(amdgpu_device_handle dev, 55 amdgpu_context_handle *context) 56 { 57 struct amdgpu_context *gpu_context; 58 union drm_amdgpu_ctx args; 59 int i, j, k; 60 int r; 61 62 if (NULL == dev) 63 return -EINVAL; 64 if (NULL == context) 65 return -EINVAL; 66 67 gpu_context = calloc(1, sizeof(struct amdgpu_context)); 68 if (NULL == gpu_context) 69 return -ENOMEM; 70 71 gpu_context->dev = dev; 72 73 r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL); 74 if (r) 75 goto error; 76 77 /* Create the context */ 78 memset(&args, 0, sizeof(args)); 79 args.in.op = AMDGPU_CTX_OP_ALLOC_CTX; 80 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args)); 81 if (r) 82 goto error; 83 84 gpu_context->id = args.out.alloc.ctx_id; 85 for (i = 0; i < AMDGPU_HW_IP_NUM; i++) 86 for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++) 87 for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++) 88 list_inithead(&gpu_context->sem_list[i][j][k]); 89 *context = (amdgpu_context_handle)gpu_context; 90 91 return 0; 92 93 error: 94 pthread_mutex_destroy(&gpu_context->sequence_mutex); 95 free(gpu_context); 96 return r; 97 } 98 99 /** 100 * Release command submission context 101 * 102 * \param dev - \c [in] amdgpu device handle 103 * \param context - \c [in] amdgpu context handle 104 * 105 * \return 0 on success otherwise POSIX Error code 106 */ 107 int amdgpu_cs_ctx_free(amdgpu_context_handle context) 108 { 109 union drm_amdgpu_ctx args; 110 int i, j, k; 111 int r; 112 113 if (NULL == context) 114 return -EINVAL; 115 116 pthread_mutex_destroy(&context->sequence_mutex); 117 118 /* now deal with kernel side */ 119 memset(&args, 0, sizeof(args)); 120 args.in.op = AMDGPU_CTX_OP_FREE_CTX; 121 args.in.ctx_id = context->id; 122 r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX, 123 &args, sizeof(args)); 124 for (i = 0; i < AMDGPU_HW_IP_NUM; i++) { 125 for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++) { 126 for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++) { 127 amdgpu_semaphore_handle sem; 128 LIST_FOR_EACH_ENTRY(sem, &context->sem_list[i][j][k], list) { 129 list_del(&sem->list); 130 amdgpu_cs_reset_sem(sem); 131 amdgpu_cs_unreference_sem(sem); 132 } 133 } 134 } 135 } 136 free(context); 137 138 return r; 139 } 140 141 int amdgpu_cs_query_reset_state(amdgpu_context_handle context, 142 uint32_t *state, uint32_t *hangs) 143 { 144 union drm_amdgpu_ctx args; 145 int r; 146 147 if (!context) 148 return -EINVAL; 149 150 memset(&args, 0, sizeof(args)); 151 args.in.op = AMDGPU_CTX_OP_QUERY_STATE; 152 args.in.ctx_id = context->id; 153 r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX, 154 &args, sizeof(args)); 155 if (!r) { 156 *state = args.out.state.reset_status; 157 *hangs = args.out.state.hangs; 158 } 159 return r; 160 } 161 162 /** 163 * Submit command to kernel DRM 164 * \param dev - \c [in] Device handle 165 * \param context - \c [in] GPU Context 166 * \param ibs_request - \c [in] Pointer to submission requests 167 * \param fence - \c [out] return fence for this submission 168 * 169 * \return 0 on success otherwise POSIX Error code 170 * \sa amdgpu_cs_submit() 171 */ 172 static int amdgpu_cs_submit_one(amdgpu_context_handle context, 173 struct amdgpu_cs_request *ibs_request) 174 { 175 union drm_amdgpu_cs cs; 176 uint64_t *chunk_array; 177 struct drm_amdgpu_cs_chunk *chunks; 178 struct drm_amdgpu_cs_chunk_data *chunk_data; 179 struct drm_amdgpu_cs_chunk_dep *dependencies = NULL; 180 struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL; 181 struct list_head *sem_list; 182 amdgpu_semaphore_handle sem, tmp; 183 uint32_t i, size, sem_count = 0; 184 bool user_fence; 185 int r = 0; 186 187 if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM) 188 return -EINVAL; 189 if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS) 190 return -EINVAL; 191 if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT) 192 return -EINVAL; 193 if (ibs_request->number_of_ibs == 0) { 194 ibs_request->seq_no = AMDGPU_NULL_SUBMIT_SEQ; 195 return 0; 196 } 197 user_fence = (ibs_request->fence_info.handle != NULL); 198 199 size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1; 200 201 chunk_array = alloca(sizeof(uint64_t) * size); 202 chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size); 203 204 size = ibs_request->number_of_ibs + (user_fence ? 1 : 0); 205 206 chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size); 207 208 memset(&cs, 0, sizeof(cs)); 209 cs.in.chunks = (uint64_t)(uintptr_t)chunk_array; 210 cs.in.ctx_id = context->id; 211 if (ibs_request->resources) 212 cs.in.bo_list_handle = ibs_request->resources->handle; 213 cs.in.num_chunks = ibs_request->number_of_ibs; 214 /* IB chunks */ 215 for (i = 0; i < ibs_request->number_of_ibs; i++) { 216 struct amdgpu_cs_ib_info *ib; 217 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i]; 218 chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB; 219 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; 220 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; 221 222 ib = &ibs_request->ibs[i]; 223 224 chunk_data[i].ib_data._pad = 0; 225 chunk_data[i].ib_data.va_start = ib->ib_mc_address; 226 chunk_data[i].ib_data.ib_bytes = ib->size * 4; 227 chunk_data[i].ib_data.ip_type = ibs_request->ip_type; 228 chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance; 229 chunk_data[i].ib_data.ring = ibs_request->ring; 230 chunk_data[i].ib_data.flags = ib->flags; 231 } 232 233 pthread_mutex_lock(&context->sequence_mutex); 234 235 if (user_fence) { 236 i = cs.in.num_chunks++; 237 238 /* fence chunk */ 239 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i]; 240 chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE; 241 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4; 242 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; 243 244 /* fence bo handle */ 245 chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle; 246 /* offset */ 247 chunk_data[i].fence_data.offset = 248 ibs_request->fence_info.offset * sizeof(uint64_t); 249 } 250 251 if (ibs_request->number_of_dependencies) { 252 dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * 253 ibs_request->number_of_dependencies); 254 if (!dependencies) { 255 r = -ENOMEM; 256 goto error_unlock; 257 } 258 259 for (i = 0; i < ibs_request->number_of_dependencies; ++i) { 260 struct amdgpu_cs_fence *info = &ibs_request->dependencies[i]; 261 struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i]; 262 dep->ip_type = info->ip_type; 263 dep->ip_instance = info->ip_instance; 264 dep->ring = info->ring; 265 dep->ctx_id = info->context->id; 266 dep->handle = info->fence; 267 } 268 269 i = cs.in.num_chunks++; 270 271 /* dependencies chunk */ 272 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i]; 273 chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES; 274 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 275 * ibs_request->number_of_dependencies; 276 chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies; 277 } 278 279 sem_list = &context->sem_list[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring]; 280 LIST_FOR_EACH_ENTRY(sem, sem_list, list) 281 sem_count++; 282 if (sem_count) { 283 sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_count); 284 if (!sem_dependencies) { 285 r = -ENOMEM; 286 goto error_unlock; 287 } 288 sem_count = 0; 289 LIST_FOR_EACH_ENTRY_SAFE(sem, tmp, sem_list, list) { 290 struct amdgpu_cs_fence *info = &sem->signal_fence; 291 struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++]; 292 dep->ip_type = info->ip_type; 293 dep->ip_instance = info->ip_instance; 294 dep->ring = info->ring; 295 dep->ctx_id = info->context->id; 296 dep->handle = info->fence; 297 298 list_del(&sem->list); 299 amdgpu_cs_reset_sem(sem); 300 amdgpu_cs_unreference_sem(sem); 301 } 302 i = cs.in.num_chunks++; 303 304 /* dependencies chunk */ 305 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i]; 306 chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES; 307 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count; 308 chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies; 309 } 310 311 r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS, 312 &cs, sizeof(cs)); 313 if (r) 314 goto error_unlock; 315 316 ibs_request->seq_no = cs.out.handle; 317 context->last_seq[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring] = ibs_request->seq_no; 318 error_unlock: 319 pthread_mutex_unlock(&context->sequence_mutex); 320 free(dependencies); 321 free(sem_dependencies); 322 return r; 323 } 324 325 int amdgpu_cs_submit(amdgpu_context_handle context, 326 uint64_t flags, 327 struct amdgpu_cs_request *ibs_request, 328 uint32_t number_of_requests) 329 { 330 uint32_t i; 331 int r; 332 333 if (NULL == context) 334 return -EINVAL; 335 if (NULL == ibs_request) 336 return -EINVAL; 337 338 r = 0; 339 for (i = 0; i < number_of_requests; i++) { 340 r = amdgpu_cs_submit_one(context, ibs_request); 341 if (r) 342 break; 343 ibs_request++; 344 } 345 346 return r; 347 } 348 349 /** 350 * Calculate absolute timeout. 351 * 352 * \param timeout - \c [in] timeout in nanoseconds. 353 * 354 * \return absolute timeout in nanoseconds 355 */ 356 drm_private uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout) 357 { 358 int r; 359 360 if (timeout != AMDGPU_TIMEOUT_INFINITE) { 361 struct timespec current; 362 uint64_t current_ns; 363 r = clock_gettime(CLOCK_MONOTONIC, ¤t); 364 if (r) { 365 fprintf(stderr, "clock_gettime() returned error (%d)!", errno); 366 return AMDGPU_TIMEOUT_INFINITE; 367 } 368 369 current_ns = ((uint64_t)current.tv_sec) * 1000000000ull; 370 current_ns += current.tv_nsec; 371 timeout += current_ns; 372 if (timeout < current_ns) 373 timeout = AMDGPU_TIMEOUT_INFINITE; 374 } 375 return timeout; 376 } 377 378 static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context, 379 unsigned ip, 380 unsigned ip_instance, 381 uint32_t ring, 382 uint64_t handle, 383 uint64_t timeout_ns, 384 uint64_t flags, 385 bool *busy) 386 { 387 amdgpu_device_handle dev = context->dev; 388 union drm_amdgpu_wait_cs args; 389 int r; 390 391 memset(&args, 0, sizeof(args)); 392 args.in.handle = handle; 393 args.in.ip_type = ip; 394 args.in.ip_instance = ip_instance; 395 args.in.ring = ring; 396 args.in.ctx_id = context->id; 397 398 if (flags & AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE) 399 args.in.timeout = timeout_ns; 400 else 401 args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns); 402 403 r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args); 404 if (r) 405 return -errno; 406 407 *busy = args.out.status; 408 return 0; 409 } 410 411 int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence, 412 uint64_t timeout_ns, 413 uint64_t flags, 414 uint32_t *expired) 415 { 416 bool busy = true; 417 int r; 418 419 if (NULL == fence) 420 return -EINVAL; 421 if (NULL == expired) 422 return -EINVAL; 423 if (NULL == fence->context) 424 return -EINVAL; 425 if (fence->ip_type >= AMDGPU_HW_IP_NUM) 426 return -EINVAL; 427 if (fence->ring >= AMDGPU_CS_MAX_RINGS) 428 return -EINVAL; 429 if (fence->fence == AMDGPU_NULL_SUBMIT_SEQ) { 430 *expired = true; 431 return 0; 432 } 433 434 *expired = false; 435 436 r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type, 437 fence->ip_instance, fence->ring, 438 fence->fence, timeout_ns, flags, &busy); 439 440 if (!r && !busy) 441 *expired = true; 442 443 return r; 444 } 445 446 int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem) 447 { 448 struct amdgpu_semaphore *gpu_semaphore; 449 450 if (NULL == sem) 451 return -EINVAL; 452 453 gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore)); 454 if (NULL == gpu_semaphore) 455 return -ENOMEM; 456 457 atomic_set(&gpu_semaphore->refcount, 1); 458 *sem = gpu_semaphore; 459 460 return 0; 461 } 462 463 int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx, 464 uint32_t ip_type, 465 uint32_t ip_instance, 466 uint32_t ring, 467 amdgpu_semaphore_handle sem) 468 { 469 if (NULL == ctx) 470 return -EINVAL; 471 if (ip_type >= AMDGPU_HW_IP_NUM) 472 return -EINVAL; 473 if (ring >= AMDGPU_CS_MAX_RINGS) 474 return -EINVAL; 475 if (NULL == sem) 476 return -EINVAL; 477 /* sem has been signaled */ 478 if (sem->signal_fence.context) 479 return -EINVAL; 480 pthread_mutex_lock(&ctx->sequence_mutex); 481 sem->signal_fence.context = ctx; 482 sem->signal_fence.ip_type = ip_type; 483 sem->signal_fence.ip_instance = ip_instance; 484 sem->signal_fence.ring = ring; 485 sem->signal_fence.fence = ctx->last_seq[ip_type][ip_instance][ring]; 486 update_references(NULL, &sem->refcount); 487 pthread_mutex_unlock(&ctx->sequence_mutex); 488 return 0; 489 } 490 491 int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx, 492 uint32_t ip_type, 493 uint32_t ip_instance, 494 uint32_t ring, 495 amdgpu_semaphore_handle sem) 496 { 497 if (NULL == ctx) 498 return -EINVAL; 499 if (ip_type >= AMDGPU_HW_IP_NUM) 500 return -EINVAL; 501 if (ring >= AMDGPU_CS_MAX_RINGS) 502 return -EINVAL; 503 if (NULL == sem) 504 return -EINVAL; 505 /* must signal first */ 506 if (NULL == sem->signal_fence.context) 507 return -EINVAL; 508 509 pthread_mutex_lock(&ctx->sequence_mutex); 510 list_add(&sem->list, &ctx->sem_list[ip_type][ip_instance][ring]); 511 pthread_mutex_unlock(&ctx->sequence_mutex); 512 return 0; 513 } 514 515 static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem) 516 { 517 if (NULL == sem) 518 return -EINVAL; 519 if (NULL == sem->signal_fence.context) 520 return -EINVAL; 521 522 sem->signal_fence.context = NULL;; 523 sem->signal_fence.ip_type = 0; 524 sem->signal_fence.ip_instance = 0; 525 sem->signal_fence.ring = 0; 526 sem->signal_fence.fence = 0; 527 528 return 0; 529 } 530 531 static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem) 532 { 533 if (NULL == sem) 534 return -EINVAL; 535 536 if (update_references(&sem->refcount, NULL)) 537 free(sem); 538 return 0; 539 } 540 541 int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem) 542 { 543 return amdgpu_cs_unreference_sem(sem); 544 } 545