1 /* 2 * Copyright (c) 2011 Intel Corporation. All Rights Reserved. 3 * Copyright (c) Imagination Technologies Limited, UK 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 20 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 * Authors: 26 * Zeng Li <zeng.li (at) intel.com> 27 * Shengquan Yuan <shengquan.yuan (at) intel.com> 28 * Binglin Chen <binglin.chen (at) intel.com> 29 * 30 */ 31 32 #include "lnc_cmdbuf.h" 33 34 #include <unistd.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <wsbm/wsbm_manager.h> 40 41 #include "psb_def.h" 42 #include "psb_drv_debug.h" 43 #include "lnc_hostcode.h" 44 #include "psb_ws_driver.h" 45 46 /* 47 * Buffer layout: 48 * cmd_base <= cmd_idx < CMD_END() == reloc_base 49 * reloc_base <= reloc_idx < RELOC_END() == (reloc_size) 50 */ 51 52 #define RELOC_END(cmdbuf) (cmdbuf->cmd_base + cmdbuf->size) 53 54 #define CMD_END(cmdbuf) (cmdbuf->reloc_base) 55 56 #define RELOC_SIZE (0x3000) 57 58 #define CMD_SIZE (0x3000) 59 60 #define RELOC_MARGIN (0x0800) 61 62 #define CMD_MARGIN (0x0400) 63 64 65 #define MAX_CMD_COUNT 12 66 67 #define MTX_SEG_SIZE (0x0800) 68 69 /* 70 * Create command buffer 71 */ 72 VAStatus lnc_cmdbuf_create( 73 object_context_p obj_context, 74 psb_driver_data_p driver_data, 75 lnc_cmdbuf_p cmdbuf) 76 { 77 context_ENC_p ctx = (context_ENC_p) obj_context->format_data; 78 VAStatus vaStatus = VA_STATUS_SUCCESS; 79 unsigned int size = CMD_SIZE + RELOC_SIZE; 80 81 cmdbuf->size = 0; 82 cmdbuf->cmd_base = NULL; 83 cmdbuf->cmd_idx = NULL; 84 cmdbuf->reloc_base = NULL; 85 cmdbuf->reloc_idx = NULL; 86 cmdbuf->buffer_refs_count = 0; 87 cmdbuf->buffer_refs_allocated = 10; 88 cmdbuf->buffer_refs = (psb_buffer_p *) calloc(1, sizeof(psb_buffer_p) * cmdbuf->buffer_refs_allocated); 89 if (NULL == cmdbuf->buffer_refs) { 90 cmdbuf->buffer_refs_allocated = 0; 91 vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED; 92 } 93 if (VA_STATUS_SUCCESS == vaStatus) { 94 vaStatus = psb_buffer_create(driver_data, size, psb_bt_cpu_only, &cmdbuf->buf); 95 cmdbuf->size = size; 96 } 97 98 if (VA_STATUS_SUCCESS != vaStatus) { 99 free(cmdbuf->buffer_refs); 100 cmdbuf->buffer_refs = NULL; 101 cmdbuf->buffer_refs_allocated = 0; 102 return vaStatus; 103 } 104 105 /* create topaz parameter buffer */ 106 vaStatus = psb_buffer_create(driver_data, ctx->pic_params_size, psb_bt_cpu_vpu, &cmdbuf->pic_params); 107 if (VA_STATUS_SUCCESS != vaStatus) 108 goto error_out3; 109 110 /* create header buffer */ 111 vaStatus = psb_buffer_create(driver_data, ctx->header_buffer_size, psb_bt_cpu_vpu, &cmdbuf->header_mem); 112 if (VA_STATUS_SUCCESS != vaStatus) 113 goto error_out2; 114 115 /* create slice parameter buffer */ 116 vaStatus = psb_buffer_create(driver_data, ctx->sliceparam_buffer_size, psb_bt_cpu_vpu, &cmdbuf->slice_params); 117 if (VA_STATUS_SUCCESS != vaStatus) 118 goto error_out1; 119 120 /* all cmdbuf share one MTX_CURRENT_IN_PARAMS since every MB has a MTX_CURRENT_IN_PARAMS structure 121 * and filling this structure for all MB is very time-consuming 122 */ 123 cmdbuf->topaz_in_params_I = &ctx->topaz_in_params_I; 124 cmdbuf->topaz_in_params_P = &ctx->topaz_in_params_P; 125 cmdbuf->topaz_above_bellow_params = &ctx->topaz_above_bellow_params; 126 127 return vaStatus; 128 129 error_out1: 130 psb_buffer_destroy(&cmdbuf->header_mem); 131 error_out2: 132 psb_buffer_destroy(&cmdbuf->pic_params); 133 error_out3: 134 lnc_cmdbuf_destroy(cmdbuf); 135 136 return vaStatus; 137 } 138 139 /* 140 * Destroy buffer 141 */ 142 void lnc_cmdbuf_destroy(lnc_cmdbuf_p cmdbuf) 143 { 144 if (cmdbuf->size) { 145 psb_buffer_destroy(&cmdbuf->buf); 146 cmdbuf->size = 0; 147 } 148 if (cmdbuf->buffer_refs_allocated) { 149 free(cmdbuf->buffer_refs); 150 cmdbuf->buffer_refs = NULL; 151 cmdbuf->buffer_refs_allocated = 0; 152 } 153 154 psb_buffer_destroy(&cmdbuf->pic_params); 155 psb_buffer_destroy(&cmdbuf->header_mem); 156 psb_buffer_destroy(&cmdbuf->slice_params); 157 158 } 159 160 /* 161 * Reset buffer & map 162 * 163 * Returns 0 on success 164 */ 165 int lnc_cmdbuf_reset(lnc_cmdbuf_p cmdbuf) 166 { 167 int ret; 168 169 cmdbuf->cmd_base = NULL; 170 cmdbuf->cmd_idx = NULL; 171 cmdbuf->reloc_base = NULL; 172 cmdbuf->reloc_idx = NULL; 173 174 cmdbuf->buffer_refs_count = 0; 175 cmdbuf->cmd_count = 0; 176 177 ret = psb_buffer_map(&cmdbuf->buf, &cmdbuf->cmd_base); 178 if (ret) { 179 return ret; 180 } 181 182 cmdbuf->cmd_start = cmdbuf->cmd_base; 183 cmdbuf->cmd_idx = (uint32_t *) cmdbuf->cmd_base; 184 185 cmdbuf->reloc_base = cmdbuf->cmd_base + CMD_SIZE; 186 cmdbuf->reloc_idx = (struct drm_psb_reloc *) cmdbuf->reloc_base; 187 188 /* Add ourselves to the buffer list */ 189 lnc_cmdbuf_buffer_ref(cmdbuf, &cmdbuf->buf); /* cmd buf == 0 */ 190 return ret; 191 } 192 193 /* 194 * Unmap buffer 195 * 196 * Returns 0 on success 197 */ 198 int lnc_cmdbuf_unmap(lnc_cmdbuf_p cmdbuf) 199 { 200 cmdbuf->cmd_base = NULL; 201 cmdbuf->cmd_start = NULL; 202 cmdbuf->cmd_idx = NULL; 203 cmdbuf->reloc_base = NULL; 204 cmdbuf->reloc_idx = NULL; 205 cmdbuf->cmd_count = 0; 206 psb_buffer_unmap(&cmdbuf->buf); 207 return 0; 208 } 209 210 211 /* 212 * Reference an addtional buffer "buf" in the command stream 213 * Returns a reference index that can be used to refer to "buf" in 214 * relocation records, -1 on error 215 */ 216 int lnc_cmdbuf_buffer_ref(lnc_cmdbuf_p cmdbuf, psb_buffer_p buf) 217 { 218 int item_loc = 0; 219 220 while ((item_loc < cmdbuf->buffer_refs_count) && (cmdbuf->buffer_refs[item_loc] != buf)) { 221 item_loc++; 222 } 223 if (item_loc == cmdbuf->buffer_refs_count) { 224 /* Add new entry */ 225 if (item_loc >= cmdbuf->buffer_refs_allocated) { 226 /* Allocate more entries */ 227 int new_size = cmdbuf->buffer_refs_allocated + 10; 228 psb_buffer_p *new_array; 229 new_array = (psb_buffer_p *) calloc(1, sizeof(psb_buffer_p) * new_size); 230 if (NULL == new_array) { 231 return -1; /* Allocation failure */ 232 } 233 memcpy(new_array, cmdbuf->buffer_refs, sizeof(psb_buffer_p) * cmdbuf->buffer_refs_allocated); 234 free(cmdbuf->buffer_refs); 235 cmdbuf->buffer_refs_allocated = new_size; 236 cmdbuf->buffer_refs = new_array; 237 } 238 cmdbuf->buffer_refs[item_loc] = buf; 239 cmdbuf->buffer_refs_count++; 240 buf->status = psb_bs_queued; 241 } 242 return item_loc; 243 } 244 245 /* Creates a relocation record for a DWORD in the mapped "cmdbuf" at address 246 * "addr_in_cmdbuf" 247 * The relocation is based on the device virtual address of "ref_buffer" 248 * "buf_offset" is be added to the device virtual address, and the sum is then 249 * right shifted with "align_shift". 250 * "mask" determines which bits of the target DWORD will be updated with the so 251 * constructed address. The remaining bits will be filled with bits from "background". 252 */ 253 void lnc_cmdbuf_add_relocation(lnc_cmdbuf_p cmdbuf, 254 uint32_t *addr_in_dst_buffer,/*addr of dst_buffer for the DWORD*/ 255 psb_buffer_p ref_buffer, 256 uint32_t buf_offset, 257 uint32_t mask, 258 uint32_t background, 259 uint32_t align_shift, 260 uint32_t dst_buffer, 261 uint32_t *start_of_dst_buffer) /*Index of the list refered by cmdbuf->buffer_refs */ 262 { 263 struct drm_psb_reloc *reloc = cmdbuf->reloc_idx; 264 uint64_t presumed_offset = wsbmBOOffsetHint(ref_buffer->drm_buf); 265 266 reloc->where = addr_in_dst_buffer - start_of_dst_buffer; /* Offset in DWORDs */ 267 268 reloc->buffer = lnc_cmdbuf_buffer_ref(cmdbuf, ref_buffer); 269 ASSERT(reloc->buffer != -1); 270 271 reloc->reloc_op = PSB_RELOC_OP_OFFSET; 272 #ifndef VA_EMULATOR 273 if (presumed_offset) { 274 uint32_t new_val = presumed_offset + buf_offset; 275 276 new_val = ((new_val >> align_shift) << (align_shift << PSB_RELOC_ALSHIFT_SHIFT)); 277 new_val = (background & ~mask) | (new_val & mask); 278 *addr_in_dst_buffer = new_val; 279 } else { 280 *addr_in_dst_buffer = PSB_RELOC_MAGIC; 281 } 282 #else 283 /* indicate subscript of relocation buffer */ 284 *addr_in_dst_buffer = reloc - (struct drm_psb_reloc *)cmdbuf->reloc_base; 285 #endif 286 reloc->mask = mask; 287 reloc->shift = align_shift << PSB_RELOC_ALSHIFT_SHIFT; 288 reloc->pre_add = buf_offset; 289 reloc->background = background; 290 reloc->dst_buffer = dst_buffer; 291 cmdbuf->reloc_idx++; 292 293 ASSERT(((unsigned char *)(cmdbuf->reloc_idx)) < RELOC_END(cmdbuf)); 294 } 295 296 /* 297 * Advances "obj_context" to the next cmdbuf 298 * 299 * Returns 0 on success 300 */ 301 int lnc_context_get_next_cmdbuf(object_context_p obj_context) 302 { 303 lnc_cmdbuf_p cmdbuf; 304 int ret; 305 306 if (obj_context->lnc_cmdbuf) { 307 return 0; 308 } 309 310 obj_context->cmdbuf_current++; 311 if (obj_context->cmdbuf_current >= LNC_MAX_CMDBUFS_ENCODE) { 312 obj_context->cmdbuf_current = 0; 313 } 314 315 cmdbuf = obj_context->lnc_cmdbuf_list[obj_context->cmdbuf_current]; 316 ret = lnc_cmdbuf_reset(cmdbuf); 317 if (!ret) { 318 /* Success */ 319 obj_context->lnc_cmdbuf = cmdbuf; 320 } 321 322 /* added pic_params/slice_params into ref, so the index is 1/2 */ 323 lnc_cmdbuf_buffer_ref(cmdbuf, &cmdbuf->pic_params); 324 lnc_cmdbuf_buffer_ref(cmdbuf, &cmdbuf->slice_params); 325 326 return ret; 327 } 328 329 /* 330 * This is the user-space do-it-all interface to the drm cmdbuf ioctl. 331 * It allows different buffers as command- and reloc buffer. A list of 332 * cliprects to apply and whether to copy the clipRect content to all 333 * scanout buffers (damage = 1). 334 */ 335 /* 336 * Don't add debug statements in this function, it gets called with the 337 * DRM lock held and output to an X terminal can cause X to deadlock 338 */ 339 static int 340 lncDRMCmdBuf(int fd, int ioctl_offset, psb_buffer_p *buffer_list, int buffer_count, unsigned cmdBufHandle, 341 unsigned cmdBufOffset, unsigned cmdBufSize, 342 unsigned relocBufHandle, unsigned relocBufOffset, 343 unsigned numRelocs, int damage, 344 unsigned engine, unsigned fence_flags, struct psb_ttm_fence_rep *fence_rep) 345 { 346 drm_psb_cmdbuf_arg_t ca; 347 struct psb_validate_arg *arg_list; 348 int i; 349 int ret; 350 uint64_t mask = PSB_GPU_ACCESS_MASK; 351 352 arg_list = (struct psb_validate_arg *) calloc(1, sizeof(struct psb_validate_arg) * buffer_count); 353 if (arg_list == NULL) { 354 drv_debug_msg(VIDEO_DEBUG_ERROR, "Allocate memory failed\n"); 355 return -ENOMEM; 356 } 357 358 for (i = 0; i < buffer_count; i++) { 359 struct psb_validate_arg *arg = &(arg_list[i]); 360 struct psb_validate_req *req = &arg->d.req; 361 362 req->next = (unsigned long) & (arg_list[i+1]); 363 364 req->buffer_handle = wsbmKBufHandle(wsbmKBuf(buffer_list[i]->drm_buf)); 365 req->group = 0; 366 req->set_flags = (PSB_GPU_ACCESS_READ | PSB_GPU_ACCESS_WRITE) & mask; 367 req->clear_flags = (~(PSB_GPU_ACCESS_READ | PSB_GPU_ACCESS_WRITE)) & mask; 368 #if 1 369 req->presumed_gpu_offset = (uint64_t)wsbmBOOffsetHint(buffer_list[i]->drm_buf); 370 req->presumed_flags = PSB_USE_PRESUMED; 371 if ((req->presumed_gpu_offset >> 28) & 0x1) { 372 drv_debug_msg(VIDEO_DEBUG_ERROR, "buffer is at the address topaz can not access\n"); 373 ret = -1; 374 goto out; 375 } 376 #else 377 req->presumed_flags = 0; 378 #endif 379 req->pad64 = (uint32_t)buffer_list[i]->pl_flags; 380 } 381 arg_list[buffer_count-1].d.req.next = 0; 382 383 ca.buffer_list = (uint64_t)((unsigned long)arg_list); 384 ca.cmdbuf_handle = cmdBufHandle; 385 ca.cmdbuf_offset = cmdBufOffset; 386 ca.cmdbuf_size = cmdBufSize; 387 ca.reloc_handle = relocBufHandle; 388 ca.reloc_offset = relocBufOffset; 389 ca.num_relocs = numRelocs; 390 ca.engine = engine; 391 ca.fence_flags = fence_flags; 392 ca.fence_arg = (uint64_t)((unsigned long)fence_rep); 393 394 do { 395 ret = drmCommandWrite(fd, ioctl_offset, &ca, sizeof(ca)); 396 } while (ret == EAGAIN); 397 398 if (ret) 399 goto out; 400 401 for (i = 0; i < buffer_count; i++) { 402 struct psb_validate_arg *arg = &(arg_list[i]); 403 struct psb_validate_rep *rep = &arg->d.rep; 404 405 if (!arg->handled) { 406 ret = -EFAULT; 407 goto out; 408 } 409 if (arg->ret != 0) { 410 ret = arg->ret; 411 goto out; 412 } 413 wsbmUpdateKBuf(wsbmKBuf(buffer_list[i]->drm_buf), 414 rep->gpu_offset, rep->placement, rep->fence_type_mask); 415 } 416 out: 417 free(arg_list); 418 for (i = 0; i < buffer_count; i++) { 419 /* 420 * Buffer no longer queued in userspace 421 */ 422 switch (buffer_list[i]->status) { 423 case psb_bs_queued: 424 buffer_list[i]->status = psb_bs_ready; 425 break; 426 427 case psb_bs_abandoned: 428 psb_buffer_destroy(buffer_list[i]); 429 free(buffer_list[i]); 430 break; 431 432 default: 433 /* Not supposed to happen */ 434 ASSERT(0); 435 } 436 } 437 438 return ret; 439 } 440 441 #if 0 442 static struct _WsbmFenceObject * 443 lnc_fence_wait(psb_driver_data_p driver_data, 444 struct psb_ttm_fence_rep *fence_rep, int *status) 445 446 { 447 struct _WsbmFenceObject *fence = NULL; 448 int ret = -1; 449 450 /* copy fence information */ 451 if (fence_rep->error != 0) { 452 drv_debug_msg(VIDEO_DEBUG_ERROR, "drm failed to create a fence" 453 " and has idled the HW\n"); 454 DEBUG_FAILURE_RET; 455 return NULL; 456 } 457 458 fence = wsbmFenceCreate(driver_data->fence_mgr, fence_rep->fence_class, 459 fence_rep->fence_type, 460 (unsigned char *)fence_rep->handle, 461 0); 462 if (fence) 463 *status = wsbmFenceFinish(fence, fence_rep->fence_type, 0); 464 465 return fence; 466 } 467 #endif 468 469 /* 470 * Submits the current cmdbuf 471 * 472 * Returns 0 on success 473 */ 474 int lnc_context_submit_cmdbuf(object_context_p obj_context) 475 { 476 477 return 0; 478 } 479 480 /* 481 * FrameSkip is only meaningful for RC enabled mode 482 * Topaz raises this flag after surface N encoding is finished (vaSyncSurface gets back) 483 * then for the next encode surface N+1 (ctx->src_surface) frameskip flag is cleared in vaBeginPicuture 484 * and is always set in vaEndPicture:lnc_PatchRCMode 485 * vaQuerySurfaceStatus is supposed only to be called after vaEndPicture/vaSyncSurface, 486 * The caller should ensure the surface pertains to an encode context 487 */ 488 int lnc_surface_get_frameskip(psb_driver_data_p driver_data, psb_surface_p surface, int *frame_skip) 489 { 490 struct drm_lnc_video_getparam_arg arg; 491 unsigned long temp; 492 int ret = 0; 493 494 /* bit31 indicate if frameskip is already settled, it is used to record the frame skip flag for old surfaces 495 * because current FRAMESKIP in hardware can't be applied to the old surfaces 496 * bit31 is cleared when the surface is used as encode render target or reference/reconstrucure target 497 */ 498 if (GET_SURFACE_INFO_skipped_flag(surface) & SURFACE_INFO_SKIP_FLAG_SETTLED) { 499 *frame_skip = GET_SURFACE_INFO_skipped_flag(surface) & 1; 500 return 0; 501 } 502 503 /* not settled, we get it from current HW FRAMESKIP flag */ 504 arg.key = LNC_VIDEO_FRAME_SKIP; 505 arg.value = (uint64_t)((unsigned long) & temp); 506 ret = drmCommandWriteRead(driver_data->drm_fd, driver_data->getParamIoctlOffset, 507 &arg, sizeof(arg)); 508 if (ret == 0) { 509 SET_SURFACE_INFO_skipped_flag(surface, temp); 510 *frame_skip = temp; 511 if (temp == 1) 512 drv_debug_msg(VIDEO_DEBUG_GENERAL, "Detected a skipped frame for encode\n"); 513 } 514 515 return ret; 516 } 517 518 519 /* 520 * Flushes all cmdbufs 521 */ 522 int lnc_context_flush_cmdbuf(object_context_p obj_context) 523 { 524 lnc_cmdbuf_p cmdbuf = obj_context->lnc_cmdbuf; 525 psb_driver_data_p driver_data = obj_context->driver_data; 526 unsigned int fence_flags; 527 struct psb_ttm_fence_rep fence_rep; 528 unsigned int reloc_offset; 529 unsigned int num_relocs; 530 int ret; 531 unsigned int cmdbuffer_size = (unsigned char *) cmdbuf->cmd_idx - cmdbuf->cmd_start; /* In bytes */ 532 533 ASSERT(cmdbuffer_size < CMD_SIZE); 534 ASSERT((unsigned char *) cmdbuf->cmd_idx < CMD_END(cmdbuf)); 535 /* LOCK */ 536 ret = LOCK_HARDWARE(driver_data); 537 if (ret) { 538 UNLOCK_HARDWARE(driver_data); 539 DEBUG_FAILURE_RET; 540 return ret; 541 } 542 543 /* Now calculate the total number of relocations */ 544 reloc_offset = cmdbuf->reloc_base - cmdbuf->cmd_base; 545 num_relocs = (((unsigned char *) cmdbuf->reloc_idx) - cmdbuf->reloc_base) / sizeof(struct drm_psb_reloc); 546 547 lnc_cmdbuf_unmap(cmdbuf); 548 549 ASSERT(NULL == cmdbuf->reloc_base); 550 551 if (psb_video_trace_fp) 552 fence_flags = 0; 553 else 554 fence_flags = DRM_PSB_FENCE_NO_USER; 555 556 #ifndef LNC_ENGINE_ENCODE 557 #define LNC_ENGINE_ENCODE 5 558 #endif 559 560 wsbmWriteLockKernelBO(); 561 ret = lncDRMCmdBuf(driver_data->drm_fd, driver_data->execIoctlOffset, 562 cmdbuf->buffer_refs, cmdbuf->buffer_refs_count, wsbmKBufHandle(wsbmKBuf(cmdbuf->buf.drm_buf)), 563 0, cmdbuffer_size,/*unsigned cmdBufSize*/ 564 wsbmKBufHandle(wsbmKBuf(cmdbuf->buf.drm_buf)), reloc_offset, num_relocs, 565 0, LNC_ENGINE_ENCODE, fence_flags, &fence_rep); 566 wsbmWriteUnlockKernelBO(); 567 UNLOCK_HARDWARE(driver_data); 568 569 if (ret) { 570 obj_context->lnc_cmdbuf = NULL; 571 572 DEBUG_FAILURE_RET; 573 return ret; 574 } 575 576 #if 0 577 int status = -1; 578 struct _WsbmFenceObject *fence = NULL; 579 580 fence = lnc_fence_wait(driver_data, &fence_rep, &status); 581 drv_debug_msg(VIDEO_DEBUG_GENERAL, "psb_fence_wait returns: %d (fence=0x%08x)\n", status, fence); 582 583 if (fence) 584 wsbmFenceUnreference(fence); 585 #endif 586 587 obj_context->lnc_cmdbuf = NULL; 588 589 return 0; 590 } 591 592