1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <unistd.h> 27 #ifdef HAVE_ALLOCA_H 28 # include <alloca.h> 29 #endif 30 #include <sys/wait.h> 31 32 #include "CUnit/Basic.h" 33 34 #include "amdgpu_test.h" 35 #include "amdgpu_drm.h" 36 37 static amdgpu_device_handle device_handle; 38 static uint32_t major_version; 39 static uint32_t minor_version; 40 static uint32_t family_id; 41 42 static void amdgpu_query_info_test(void); 43 static void amdgpu_command_submission_gfx(void); 44 static void amdgpu_command_submission_compute(void); 45 static void amdgpu_command_submission_multi_fence(void); 46 static void amdgpu_command_submission_sdma(void); 47 static void amdgpu_userptr_test(void); 48 static void amdgpu_semaphore_test(void); 49 static void amdgpu_sync_dependency_test(void); 50 static void amdgpu_bo_eviction_test(void); 51 52 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 53 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 54 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 55 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 56 unsigned ip_type, 57 int instance, int pm4_dw, uint32_t *pm4_src, 58 int res_cnt, amdgpu_bo_handle *resources, 59 struct amdgpu_cs_ib_info *ib_info, 60 struct amdgpu_cs_request *ibs_request); 61 62 CU_TestInfo basic_tests[] = { 63 { "Query Info Test", amdgpu_query_info_test }, 64 { "Userptr Test", amdgpu_userptr_test }, 65 { "bo eviction Test", amdgpu_bo_eviction_test }, 66 { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 67 { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 68 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 69 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 70 { "SW semaphore Test", amdgpu_semaphore_test }, 71 { "Sync dependency Test", amdgpu_sync_dependency_test }, 72 CU_TEST_INFO_NULL, 73 }; 74 #define BUFFER_SIZE (8 * 1024) 75 #define SDMA_PKT_HEADER_op_offset 0 76 #define SDMA_PKT_HEADER_op_mask 0x000000FF 77 #define SDMA_PKT_HEADER_op_shift 0 78 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 79 #define SDMA_OPCODE_CONSTANT_FILL 11 80 # define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 81 /* 0 = byte fill 82 * 2 = DW fill 83 */ 84 #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 85 (((sub_op) & 0xFF) << 8) | \ 86 (((op) & 0xFF) << 0)) 87 #define SDMA_OPCODE_WRITE 2 88 # define SDMA_WRITE_SUB_OPCODE_LINEAR 0 89 # define SDMA_WRTIE_SUB_OPCODE_TILED 1 90 91 #define SDMA_OPCODE_COPY 1 92 # define SDMA_COPY_SUB_OPCODE_LINEAR 0 93 94 #define GFX_COMPUTE_NOP 0xffff1000 95 #define SDMA_NOP 0x0 96 97 /* PM4 */ 98 #define PACKET_TYPE0 0 99 #define PACKET_TYPE1 1 100 #define PACKET_TYPE2 2 101 #define PACKET_TYPE3 3 102 103 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 104 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 105 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 106 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 107 #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 108 ((reg) & 0xFFFF) | \ 109 ((n) & 0x3FFF) << 16) 110 #define CP_PACKET2 0x80000000 111 #define PACKET2_PAD_SHIFT 0 112 #define PACKET2_PAD_MASK (0x3fffffff << 0) 113 114 #define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 115 116 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 117 (((op) & 0xFF) << 8) | \ 118 ((n) & 0x3FFF) << 16) 119 120 /* Packet 3 types */ 121 #define PACKET3_NOP 0x10 122 123 #define PACKET3_WRITE_DATA 0x37 124 #define WRITE_DATA_DST_SEL(x) ((x) << 8) 125 /* 0 - register 126 * 1 - memory (sync - via GRBM) 127 * 2 - gl2 128 * 3 - gds 129 * 4 - reserved 130 * 5 - memory (async - direct) 131 */ 132 #define WR_ONE_ADDR (1 << 16) 133 #define WR_CONFIRM (1 << 20) 134 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 135 /* 0 - LRU 136 * 1 - Stream 137 */ 138 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 139 /* 0 - me 140 * 1 - pfp 141 * 2 - ce 142 */ 143 144 #define PACKET3_DMA_DATA 0x50 145 /* 1. header 146 * 2. CONTROL 147 * 3. SRC_ADDR_LO or DATA [31:0] 148 * 4. SRC_ADDR_HI [31:0] 149 * 5. DST_ADDR_LO [31:0] 150 * 6. DST_ADDR_HI [7:0] 151 * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 152 */ 153 /* CONTROL */ 154 # define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 155 /* 0 - ME 156 * 1 - PFP 157 */ 158 # define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 159 /* 0 - LRU 160 * 1 - Stream 161 * 2 - Bypass 162 */ 163 # define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 164 # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 165 /* 0 - DST_ADDR using DAS 166 * 1 - GDS 167 * 3 - DST_ADDR using L2 168 */ 169 # define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 170 /* 0 - LRU 171 * 1 - Stream 172 * 2 - Bypass 173 */ 174 # define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 175 # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 176 /* 0 - SRC_ADDR using SAS 177 * 1 - GDS 178 * 2 - DATA 179 * 3 - SRC_ADDR using L2 180 */ 181 # define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 182 /* COMMAND */ 183 # define PACKET3_DMA_DATA_DIS_WC (1 << 21) 184 # define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 185 /* 0 - none 186 * 1 - 8 in 16 187 * 2 - 8 in 32 188 * 3 - 8 in 64 189 */ 190 # define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 191 /* 0 - none 192 * 1 - 8 in 16 193 * 2 - 8 in 32 194 * 3 - 8 in 64 195 */ 196 # define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 197 /* 0 - memory 198 * 1 - register 199 */ 200 # define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 201 /* 0 - memory 202 * 1 - register 203 */ 204 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 205 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 206 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 207 208 #define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 209 (((b) & 0x1) << 26) | \ 210 (((t) & 0x1) << 23) | \ 211 (((s) & 0x1) << 22) | \ 212 (((cnt) & 0xFFFFF) << 0)) 213 #define SDMA_OPCODE_COPY_SI 3 214 #define SDMA_OPCODE_CONSTANT_FILL_SI 13 215 #define SDMA_NOP_SI 0xf 216 #define GFX_COMPUTE_NOP_SI 0x80000000 217 #define PACKET3_DMA_DATA_SI 0x41 218 # define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 219 /* 0 - ME 220 * 1 - PFP 221 */ 222 # define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 223 /* 0 - DST_ADDR using DAS 224 * 1 - GDS 225 * 3 - DST_ADDR using L2 226 */ 227 # define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 228 /* 0 - SRC_ADDR using SAS 229 * 1 - GDS 230 * 2 - DATA 231 * 3 - SRC_ADDR using L2 232 */ 233 # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 234 235 236 #define PKT3_CONTEXT_CONTROL 0x28 237 #define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 238 #define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 239 #define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 240 241 #define PKT3_CLEAR_STATE 0x12 242 243 #define PKT3_SET_SH_REG 0x76 244 #define PACKET3_SET_SH_REG_START 0x00002c00 245 246 #define PACKET3_DISPATCH_DIRECT 0x15 247 248 249 /* gfx 8 */ 250 #define mmCOMPUTE_PGM_LO 0x2e0c 251 #define mmCOMPUTE_PGM_RSRC1 0x2e12 252 #define mmCOMPUTE_TMPRING_SIZE 0x2e18 253 #define mmCOMPUTE_USER_DATA_0 0x2e40 254 #define mmCOMPUTE_USER_DATA_1 0x2e41 255 #define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 256 #define mmCOMPUTE_NUM_THREAD_X 0x2e07 257 258 259 260 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 261 ((num & 0x0000ff00) << 8) | \ 262 ((num & 0x00ff0000) >> 8) | \ 263 ((num & 0x000000ff) << 24)) 264 265 266 /* Shader code 267 * void main() 268 { 269 270 float x = some_input; 271 for (unsigned i = 0; i < 1000000; i++) 272 x = sin(x); 273 274 u[0] = 42u; 275 } 276 */ 277 278 static uint32_t shader_bin[] = { 279 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 280 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 281 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 282 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 283 }; 284 285 #define CODE_OFFSET 512 286 #define DATA_OFFSET 1024 287 288 289 int suite_basic_tests_init(void) 290 { 291 struct amdgpu_gpu_info gpu_info = {0}; 292 int r; 293 294 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 295 &minor_version, &device_handle); 296 297 if (r) { 298 if ((r == -EACCES) && (errno == EACCES)) 299 printf("\n\nError:%s. " 300 "Hint:Try to run this test program as root.", 301 strerror(errno)); 302 return CUE_SINIT_FAILED; 303 } 304 305 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 306 if (r) 307 return CUE_SINIT_FAILED; 308 309 family_id = gpu_info.family_id; 310 311 return CUE_SUCCESS; 312 } 313 314 int suite_basic_tests_clean(void) 315 { 316 int r = amdgpu_device_deinitialize(device_handle); 317 318 if (r == 0) 319 return CUE_SUCCESS; 320 else 321 return CUE_SCLEAN_FAILED; 322 } 323 324 static void amdgpu_query_info_test(void) 325 { 326 struct amdgpu_gpu_info gpu_info = {0}; 327 uint32_t version, feature; 328 int r; 329 330 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 331 CU_ASSERT_EQUAL(r, 0); 332 333 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 334 0, &version, &feature); 335 CU_ASSERT_EQUAL(r, 0); 336 } 337 338 static void amdgpu_command_submission_gfx_separate_ibs(void) 339 { 340 amdgpu_context_handle context_handle; 341 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 342 void *ib_result_cpu, *ib_result_ce_cpu; 343 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 344 struct amdgpu_cs_request ibs_request = {0}; 345 struct amdgpu_cs_ib_info ib_info[2]; 346 struct amdgpu_cs_fence fence_status = {0}; 347 uint32_t *ptr; 348 uint32_t expired; 349 amdgpu_bo_list_handle bo_list; 350 amdgpu_va_handle va_handle, va_handle_ce; 351 int r, i = 0; 352 353 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 354 CU_ASSERT_EQUAL(r, 0); 355 356 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 357 AMDGPU_GEM_DOMAIN_GTT, 0, 358 &ib_result_handle, &ib_result_cpu, 359 &ib_result_mc_address, &va_handle); 360 CU_ASSERT_EQUAL(r, 0); 361 362 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 363 AMDGPU_GEM_DOMAIN_GTT, 0, 364 &ib_result_ce_handle, &ib_result_ce_cpu, 365 &ib_result_ce_mc_address, &va_handle_ce); 366 CU_ASSERT_EQUAL(r, 0); 367 368 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 369 ib_result_ce_handle, &bo_list); 370 CU_ASSERT_EQUAL(r, 0); 371 372 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 373 374 /* IT_SET_CE_DE_COUNTERS */ 375 ptr = ib_result_ce_cpu; 376 if (family_id != AMDGPU_FAMILY_SI) { 377 ptr[i++] = 0xc0008900; 378 ptr[i++] = 0; 379 } 380 ptr[i++] = 0xc0008400; 381 ptr[i++] = 1; 382 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 383 ib_info[0].size = i; 384 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 385 386 /* IT_WAIT_ON_CE_COUNTER */ 387 ptr = ib_result_cpu; 388 ptr[0] = 0xc0008600; 389 ptr[1] = 0x00000001; 390 ib_info[1].ib_mc_address = ib_result_mc_address; 391 ib_info[1].size = 2; 392 393 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 394 ibs_request.number_of_ibs = 2; 395 ibs_request.ibs = ib_info; 396 ibs_request.resources = bo_list; 397 ibs_request.fence_info.handle = NULL; 398 399 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 400 401 CU_ASSERT_EQUAL(r, 0); 402 403 fence_status.context = context_handle; 404 fence_status.ip_type = AMDGPU_HW_IP_GFX; 405 fence_status.ip_instance = 0; 406 fence_status.fence = ibs_request.seq_no; 407 408 r = amdgpu_cs_query_fence_status(&fence_status, 409 AMDGPU_TIMEOUT_INFINITE, 410 0, &expired); 411 CU_ASSERT_EQUAL(r, 0); 412 413 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 414 ib_result_mc_address, 4096); 415 CU_ASSERT_EQUAL(r, 0); 416 417 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 418 ib_result_ce_mc_address, 4096); 419 CU_ASSERT_EQUAL(r, 0); 420 421 r = amdgpu_bo_list_destroy(bo_list); 422 CU_ASSERT_EQUAL(r, 0); 423 424 r = amdgpu_cs_ctx_free(context_handle); 425 CU_ASSERT_EQUAL(r, 0); 426 427 } 428 429 static void amdgpu_command_submission_gfx_shared_ib(void) 430 { 431 amdgpu_context_handle context_handle; 432 amdgpu_bo_handle ib_result_handle; 433 void *ib_result_cpu; 434 uint64_t ib_result_mc_address; 435 struct amdgpu_cs_request ibs_request = {0}; 436 struct amdgpu_cs_ib_info ib_info[2]; 437 struct amdgpu_cs_fence fence_status = {0}; 438 uint32_t *ptr; 439 uint32_t expired; 440 amdgpu_bo_list_handle bo_list; 441 amdgpu_va_handle va_handle; 442 int r, i = 0; 443 444 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 445 CU_ASSERT_EQUAL(r, 0); 446 447 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 448 AMDGPU_GEM_DOMAIN_GTT, 0, 449 &ib_result_handle, &ib_result_cpu, 450 &ib_result_mc_address, &va_handle); 451 CU_ASSERT_EQUAL(r, 0); 452 453 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 454 &bo_list); 455 CU_ASSERT_EQUAL(r, 0); 456 457 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 458 459 /* IT_SET_CE_DE_COUNTERS */ 460 ptr = ib_result_cpu; 461 if (family_id != AMDGPU_FAMILY_SI) { 462 ptr[i++] = 0xc0008900; 463 ptr[i++] = 0; 464 } 465 ptr[i++] = 0xc0008400; 466 ptr[i++] = 1; 467 ib_info[0].ib_mc_address = ib_result_mc_address; 468 ib_info[0].size = i; 469 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 470 471 ptr = (uint32_t *)ib_result_cpu + 4; 472 ptr[0] = 0xc0008600; 473 ptr[1] = 0x00000001; 474 ib_info[1].ib_mc_address = ib_result_mc_address + 16; 475 ib_info[1].size = 2; 476 477 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 478 ibs_request.number_of_ibs = 2; 479 ibs_request.ibs = ib_info; 480 ibs_request.resources = bo_list; 481 ibs_request.fence_info.handle = NULL; 482 483 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 484 485 CU_ASSERT_EQUAL(r, 0); 486 487 fence_status.context = context_handle; 488 fence_status.ip_type = AMDGPU_HW_IP_GFX; 489 fence_status.ip_instance = 0; 490 fence_status.fence = ibs_request.seq_no; 491 492 r = amdgpu_cs_query_fence_status(&fence_status, 493 AMDGPU_TIMEOUT_INFINITE, 494 0, &expired); 495 CU_ASSERT_EQUAL(r, 0); 496 497 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 498 ib_result_mc_address, 4096); 499 CU_ASSERT_EQUAL(r, 0); 500 501 r = amdgpu_bo_list_destroy(bo_list); 502 CU_ASSERT_EQUAL(r, 0); 503 504 r = amdgpu_cs_ctx_free(context_handle); 505 CU_ASSERT_EQUAL(r, 0); 506 } 507 508 static void amdgpu_command_submission_gfx_cp_write_data(void) 509 { 510 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 511 } 512 513 static void amdgpu_command_submission_gfx_cp_const_fill(void) 514 { 515 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 516 } 517 518 static void amdgpu_command_submission_gfx_cp_copy_data(void) 519 { 520 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 521 } 522 523 static void amdgpu_bo_eviction_test(void) 524 { 525 const int sdma_write_length = 1024; 526 const int pm4_dw = 256; 527 amdgpu_context_handle context_handle; 528 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 529 amdgpu_bo_handle *resources; 530 uint32_t *pm4; 531 struct amdgpu_cs_ib_info *ib_info; 532 struct amdgpu_cs_request *ibs_request; 533 uint64_t bo1_mc, bo2_mc; 534 volatile unsigned char *bo1_cpu, *bo2_cpu; 535 int i, j, r, loop1, loop2; 536 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 537 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 538 struct amdgpu_heap_info vram_info, gtt_info; 539 540 pm4 = calloc(pm4_dw, sizeof(*pm4)); 541 CU_ASSERT_NOT_EQUAL(pm4, NULL); 542 543 ib_info = calloc(1, sizeof(*ib_info)); 544 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 545 546 ibs_request = calloc(1, sizeof(*ibs_request)); 547 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 548 549 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 550 CU_ASSERT_EQUAL(r, 0); 551 552 /* prepare resource */ 553 resources = calloc(4, sizeof(amdgpu_bo_handle)); 554 CU_ASSERT_NOT_EQUAL(resources, NULL); 555 556 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 557 0, &vram_info); 558 CU_ASSERT_EQUAL(r, 0); 559 560 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 561 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 562 CU_ASSERT_EQUAL(r, 0); 563 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 564 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 565 CU_ASSERT_EQUAL(r, 0); 566 567 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 568 0, >t_info); 569 CU_ASSERT_EQUAL(r, 0); 570 571 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 572 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 573 CU_ASSERT_EQUAL(r, 0); 574 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 575 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 576 CU_ASSERT_EQUAL(r, 0); 577 578 579 580 loop1 = loop2 = 0; 581 /* run 9 circle to test all mapping combination */ 582 while(loop1 < 2) { 583 while(loop2 < 2) { 584 /* allocate UC bo1for sDMA use */ 585 r = amdgpu_bo_alloc_and_map(device_handle, 586 sdma_write_length, 4096, 587 AMDGPU_GEM_DOMAIN_GTT, 588 gtt_flags[loop1], &bo1, 589 (void**)&bo1_cpu, &bo1_mc, 590 &bo1_va_handle); 591 CU_ASSERT_EQUAL(r, 0); 592 593 /* set bo1 */ 594 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 595 596 /* allocate UC bo2 for sDMA use */ 597 r = amdgpu_bo_alloc_and_map(device_handle, 598 sdma_write_length, 4096, 599 AMDGPU_GEM_DOMAIN_GTT, 600 gtt_flags[loop2], &bo2, 601 (void**)&bo2_cpu, &bo2_mc, 602 &bo2_va_handle); 603 CU_ASSERT_EQUAL(r, 0); 604 605 /* clear bo2 */ 606 memset((void*)bo2_cpu, 0, sdma_write_length); 607 608 resources[0] = bo1; 609 resources[1] = bo2; 610 resources[2] = vram_max[loop2]; 611 resources[3] = gtt_max[loop2]; 612 613 /* fulfill PM4: test DMA copy linear */ 614 i = j = 0; 615 if (family_id == AMDGPU_FAMILY_SI) { 616 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 617 sdma_write_length); 618 pm4[i++] = 0xffffffff & bo2_mc; 619 pm4[i++] = 0xffffffff & bo1_mc; 620 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 621 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 622 } else { 623 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 624 if (family_id >= AMDGPU_FAMILY_AI) 625 pm4[i++] = sdma_write_length - 1; 626 else 627 pm4[i++] = sdma_write_length; 628 pm4[i++] = 0; 629 pm4[i++] = 0xffffffff & bo1_mc; 630 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 631 pm4[i++] = 0xffffffff & bo2_mc; 632 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 633 } 634 635 amdgpu_test_exec_cs_helper(context_handle, 636 AMDGPU_HW_IP_DMA, 0, 637 i, pm4, 638 4, resources, 639 ib_info, ibs_request); 640 641 /* verify if SDMA test result meets with expected */ 642 i = 0; 643 while(i < sdma_write_length) { 644 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 645 } 646 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 647 sdma_write_length); 648 CU_ASSERT_EQUAL(r, 0); 649 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 650 sdma_write_length); 651 CU_ASSERT_EQUAL(r, 0); 652 loop2++; 653 } 654 loop2 = 0; 655 loop1++; 656 } 657 amdgpu_bo_free(vram_max[0]); 658 amdgpu_bo_free(vram_max[1]); 659 amdgpu_bo_free(gtt_max[0]); 660 amdgpu_bo_free(gtt_max[1]); 661 /* clean resources */ 662 free(resources); 663 free(ibs_request); 664 free(ib_info); 665 free(pm4); 666 667 /* end of test */ 668 r = amdgpu_cs_ctx_free(context_handle); 669 CU_ASSERT_EQUAL(r, 0); 670 } 671 672 673 static void amdgpu_command_submission_gfx(void) 674 { 675 /* write data using the CP */ 676 amdgpu_command_submission_gfx_cp_write_data(); 677 /* const fill using the CP */ 678 amdgpu_command_submission_gfx_cp_const_fill(); 679 /* copy data using the CP */ 680 amdgpu_command_submission_gfx_cp_copy_data(); 681 /* separate IB buffers for multi-IB submission */ 682 amdgpu_command_submission_gfx_separate_ibs(); 683 /* shared IB buffer for multi-IB submission */ 684 amdgpu_command_submission_gfx_shared_ib(); 685 } 686 687 static void amdgpu_semaphore_test(void) 688 { 689 amdgpu_context_handle context_handle[2]; 690 amdgpu_semaphore_handle sem; 691 amdgpu_bo_handle ib_result_handle[2]; 692 void *ib_result_cpu[2]; 693 uint64_t ib_result_mc_address[2]; 694 struct amdgpu_cs_request ibs_request[2] = {0}; 695 struct amdgpu_cs_ib_info ib_info[2] = {0}; 696 struct amdgpu_cs_fence fence_status = {0}; 697 uint32_t *ptr; 698 uint32_t expired; 699 uint32_t sdma_nop, gfx_nop; 700 amdgpu_bo_list_handle bo_list[2]; 701 amdgpu_va_handle va_handle[2]; 702 int r, i; 703 704 if (family_id == AMDGPU_FAMILY_SI) { 705 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 706 gfx_nop = GFX_COMPUTE_NOP_SI; 707 } else { 708 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 709 gfx_nop = GFX_COMPUTE_NOP; 710 } 711 712 r = amdgpu_cs_create_semaphore(&sem); 713 CU_ASSERT_EQUAL(r, 0); 714 for (i = 0; i < 2; i++) { 715 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 716 CU_ASSERT_EQUAL(r, 0); 717 718 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 719 AMDGPU_GEM_DOMAIN_GTT, 0, 720 &ib_result_handle[i], &ib_result_cpu[i], 721 &ib_result_mc_address[i], &va_handle[i]); 722 CU_ASSERT_EQUAL(r, 0); 723 724 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 725 NULL, &bo_list[i]); 726 CU_ASSERT_EQUAL(r, 0); 727 } 728 729 /* 1. same context different engine */ 730 ptr = ib_result_cpu[0]; 731 ptr[0] = sdma_nop; 732 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 733 ib_info[0].size = 1; 734 735 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 736 ibs_request[0].number_of_ibs = 1; 737 ibs_request[0].ibs = &ib_info[0]; 738 ibs_request[0].resources = bo_list[0]; 739 ibs_request[0].fence_info.handle = NULL; 740 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 741 CU_ASSERT_EQUAL(r, 0); 742 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 743 CU_ASSERT_EQUAL(r, 0); 744 745 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 746 CU_ASSERT_EQUAL(r, 0); 747 ptr = ib_result_cpu[1]; 748 ptr[0] = gfx_nop; 749 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 750 ib_info[1].size = 1; 751 752 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 753 ibs_request[1].number_of_ibs = 1; 754 ibs_request[1].ibs = &ib_info[1]; 755 ibs_request[1].resources = bo_list[1]; 756 ibs_request[1].fence_info.handle = NULL; 757 758 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 759 CU_ASSERT_EQUAL(r, 0); 760 761 fence_status.context = context_handle[0]; 762 fence_status.ip_type = AMDGPU_HW_IP_GFX; 763 fence_status.ip_instance = 0; 764 fence_status.fence = ibs_request[1].seq_no; 765 r = amdgpu_cs_query_fence_status(&fence_status, 766 500000000, 0, &expired); 767 CU_ASSERT_EQUAL(r, 0); 768 CU_ASSERT_EQUAL(expired, true); 769 770 /* 2. same engine different context */ 771 ptr = ib_result_cpu[0]; 772 ptr[0] = gfx_nop; 773 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 774 ib_info[0].size = 1; 775 776 ibs_request[0].ip_type = AMDGPU_HW_IP_GFX; 777 ibs_request[0].number_of_ibs = 1; 778 ibs_request[0].ibs = &ib_info[0]; 779 ibs_request[0].resources = bo_list[0]; 780 ibs_request[0].fence_info.handle = NULL; 781 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 782 CU_ASSERT_EQUAL(r, 0); 783 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 784 CU_ASSERT_EQUAL(r, 0); 785 786 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem); 787 CU_ASSERT_EQUAL(r, 0); 788 ptr = ib_result_cpu[1]; 789 ptr[0] = gfx_nop; 790 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 791 ib_info[1].size = 1; 792 793 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 794 ibs_request[1].number_of_ibs = 1; 795 ibs_request[1].ibs = &ib_info[1]; 796 ibs_request[1].resources = bo_list[1]; 797 ibs_request[1].fence_info.handle = NULL; 798 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 799 800 CU_ASSERT_EQUAL(r, 0); 801 802 fence_status.context = context_handle[1]; 803 fence_status.ip_type = AMDGPU_HW_IP_GFX; 804 fence_status.ip_instance = 0; 805 fence_status.fence = ibs_request[1].seq_no; 806 r = amdgpu_cs_query_fence_status(&fence_status, 807 500000000, 0, &expired); 808 CU_ASSERT_EQUAL(r, 0); 809 CU_ASSERT_EQUAL(expired, true); 810 811 for (i = 0; i < 2; i++) { 812 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 813 ib_result_mc_address[i], 4096); 814 CU_ASSERT_EQUAL(r, 0); 815 816 r = amdgpu_bo_list_destroy(bo_list[i]); 817 CU_ASSERT_EQUAL(r, 0); 818 819 r = amdgpu_cs_ctx_free(context_handle[i]); 820 CU_ASSERT_EQUAL(r, 0); 821 } 822 823 r = amdgpu_cs_destroy_semaphore(sem); 824 CU_ASSERT_EQUAL(r, 0); 825 } 826 827 static void amdgpu_command_submission_compute_nop(void) 828 { 829 amdgpu_context_handle context_handle; 830 amdgpu_bo_handle ib_result_handle; 831 void *ib_result_cpu; 832 uint64_t ib_result_mc_address; 833 struct amdgpu_cs_request ibs_request; 834 struct amdgpu_cs_ib_info ib_info; 835 struct amdgpu_cs_fence fence_status; 836 uint32_t *ptr; 837 uint32_t expired; 838 int r, instance; 839 amdgpu_bo_list_handle bo_list; 840 amdgpu_va_handle va_handle; 841 struct drm_amdgpu_info_hw_ip info; 842 843 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 844 CU_ASSERT_EQUAL(r, 0); 845 846 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 847 CU_ASSERT_EQUAL(r, 0); 848 849 for (instance = 0; (1 << instance) & info.available_rings; instance++) { 850 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 851 AMDGPU_GEM_DOMAIN_GTT, 0, 852 &ib_result_handle, &ib_result_cpu, 853 &ib_result_mc_address, &va_handle); 854 CU_ASSERT_EQUAL(r, 0); 855 856 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 857 &bo_list); 858 CU_ASSERT_EQUAL(r, 0); 859 860 ptr = ib_result_cpu; 861 memset(ptr, 0, 16); 862 ptr[0]=PACKET3(PACKET3_NOP, 14); 863 864 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 865 ib_info.ib_mc_address = ib_result_mc_address; 866 ib_info.size = 16; 867 868 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 869 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 870 ibs_request.ring = instance; 871 ibs_request.number_of_ibs = 1; 872 ibs_request.ibs = &ib_info; 873 ibs_request.resources = bo_list; 874 ibs_request.fence_info.handle = NULL; 875 876 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 877 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 878 CU_ASSERT_EQUAL(r, 0); 879 880 fence_status.context = context_handle; 881 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 882 fence_status.ip_instance = 0; 883 fence_status.ring = instance; 884 fence_status.fence = ibs_request.seq_no; 885 886 r = amdgpu_cs_query_fence_status(&fence_status, 887 AMDGPU_TIMEOUT_INFINITE, 888 0, &expired); 889 CU_ASSERT_EQUAL(r, 0); 890 891 r = amdgpu_bo_list_destroy(bo_list); 892 CU_ASSERT_EQUAL(r, 0); 893 894 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 895 ib_result_mc_address, 4096); 896 CU_ASSERT_EQUAL(r, 0); 897 } 898 899 r = amdgpu_cs_ctx_free(context_handle); 900 CU_ASSERT_EQUAL(r, 0); 901 } 902 903 static void amdgpu_command_submission_compute_cp_write_data(void) 904 { 905 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 906 } 907 908 static void amdgpu_command_submission_compute_cp_const_fill(void) 909 { 910 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 911 } 912 913 static void amdgpu_command_submission_compute_cp_copy_data(void) 914 { 915 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 916 } 917 918 static void amdgpu_command_submission_compute(void) 919 { 920 /* write data using the CP */ 921 amdgpu_command_submission_compute_cp_write_data(); 922 /* const fill using the CP */ 923 amdgpu_command_submission_compute_cp_const_fill(); 924 /* copy data using the CP */ 925 amdgpu_command_submission_compute_cp_copy_data(); 926 /* nop test */ 927 amdgpu_command_submission_compute_nop(); 928 } 929 930 /* 931 * caller need create/release: 932 * pm4_src, resources, ib_info, and ibs_request 933 * submit command stream described in ibs_request and wait for this IB accomplished 934 */ 935 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 936 unsigned ip_type, 937 int instance, int pm4_dw, uint32_t *pm4_src, 938 int res_cnt, amdgpu_bo_handle *resources, 939 struct amdgpu_cs_ib_info *ib_info, 940 struct amdgpu_cs_request *ibs_request) 941 { 942 int r; 943 uint32_t expired; 944 uint32_t *ring_ptr; 945 amdgpu_bo_handle ib_result_handle; 946 void *ib_result_cpu; 947 uint64_t ib_result_mc_address; 948 struct amdgpu_cs_fence fence_status = {0}; 949 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 950 amdgpu_va_handle va_handle; 951 952 /* prepare CS */ 953 CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 954 CU_ASSERT_NOT_EQUAL(resources, NULL); 955 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 956 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 957 CU_ASSERT_TRUE(pm4_dw <= 1024); 958 959 /* allocate IB */ 960 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 961 AMDGPU_GEM_DOMAIN_GTT, 0, 962 &ib_result_handle, &ib_result_cpu, 963 &ib_result_mc_address, &va_handle); 964 CU_ASSERT_EQUAL(r, 0); 965 966 /* copy PM4 packet to ring from caller */ 967 ring_ptr = ib_result_cpu; 968 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 969 970 ib_info->ib_mc_address = ib_result_mc_address; 971 ib_info->size = pm4_dw; 972 973 ibs_request->ip_type = ip_type; 974 ibs_request->ring = instance; 975 ibs_request->number_of_ibs = 1; 976 ibs_request->ibs = ib_info; 977 ibs_request->fence_info.handle = NULL; 978 979 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 980 all_res[res_cnt] = ib_result_handle; 981 982 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 983 NULL, &ibs_request->resources); 984 CU_ASSERT_EQUAL(r, 0); 985 986 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 987 988 /* submit CS */ 989 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 990 CU_ASSERT_EQUAL(r, 0); 991 992 r = amdgpu_bo_list_destroy(ibs_request->resources); 993 CU_ASSERT_EQUAL(r, 0); 994 995 fence_status.ip_type = ip_type; 996 fence_status.ip_instance = 0; 997 fence_status.ring = ibs_request->ring; 998 fence_status.context = context_handle; 999 fence_status.fence = ibs_request->seq_no; 1000 1001 /* wait for IB accomplished */ 1002 r = amdgpu_cs_query_fence_status(&fence_status, 1003 AMDGPU_TIMEOUT_INFINITE, 1004 0, &expired); 1005 CU_ASSERT_EQUAL(r, 0); 1006 CU_ASSERT_EQUAL(expired, true); 1007 1008 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1009 ib_result_mc_address, 4096); 1010 CU_ASSERT_EQUAL(r, 0); 1011 } 1012 1013 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 1014 { 1015 const int sdma_write_length = 128; 1016 const int pm4_dw = 256; 1017 amdgpu_context_handle context_handle; 1018 amdgpu_bo_handle bo; 1019 amdgpu_bo_handle *resources; 1020 uint32_t *pm4; 1021 struct amdgpu_cs_ib_info *ib_info; 1022 struct amdgpu_cs_request *ibs_request; 1023 uint64_t bo_mc; 1024 volatile uint32_t *bo_cpu; 1025 int i, j, r, loop, ring_id; 1026 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1027 amdgpu_va_handle va_handle; 1028 struct drm_amdgpu_info_hw_ip hw_ip_info; 1029 1030 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1031 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1032 1033 ib_info = calloc(1, sizeof(*ib_info)); 1034 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1035 1036 ibs_request = calloc(1, sizeof(*ibs_request)); 1037 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1038 1039 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1040 CU_ASSERT_EQUAL(r, 0); 1041 1042 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1043 CU_ASSERT_EQUAL(r, 0); 1044 1045 /* prepare resource */ 1046 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1047 CU_ASSERT_NOT_EQUAL(resources, NULL); 1048 1049 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1050 loop = 0; 1051 while(loop < 2) { 1052 /* allocate UC bo for sDMA use */ 1053 r = amdgpu_bo_alloc_and_map(device_handle, 1054 sdma_write_length * sizeof(uint32_t), 1055 4096, AMDGPU_GEM_DOMAIN_GTT, 1056 gtt_flags[loop], &bo, (void**)&bo_cpu, 1057 &bo_mc, &va_handle); 1058 CU_ASSERT_EQUAL(r, 0); 1059 1060 /* clear bo */ 1061 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 1062 1063 resources[0] = bo; 1064 1065 /* fulfill PM4: test DMA write-linear */ 1066 i = j = 0; 1067 if (ip_type == AMDGPU_HW_IP_DMA) { 1068 if (family_id == AMDGPU_FAMILY_SI) 1069 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1070 sdma_write_length); 1071 else 1072 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1073 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 1074 pm4[i++] = 0xffffffff & bo_mc; 1075 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1076 if (family_id >= AMDGPU_FAMILY_AI) 1077 pm4[i++] = sdma_write_length - 1; 1078 else if (family_id != AMDGPU_FAMILY_SI) 1079 pm4[i++] = sdma_write_length; 1080 while(j++ < sdma_write_length) 1081 pm4[i++] = 0xdeadbeaf; 1082 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1083 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1084 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 1085 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1086 pm4[i++] = 0xfffffffc & bo_mc; 1087 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1088 while(j++ < sdma_write_length) 1089 pm4[i++] = 0xdeadbeaf; 1090 } 1091 1092 amdgpu_test_exec_cs_helper(context_handle, 1093 ip_type, ring_id, 1094 i, pm4, 1095 1, resources, 1096 ib_info, ibs_request); 1097 1098 /* verify if SDMA test result meets with expected */ 1099 i = 0; 1100 while(i < sdma_write_length) { 1101 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1102 } 1103 1104 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1105 sdma_write_length * sizeof(uint32_t)); 1106 CU_ASSERT_EQUAL(r, 0); 1107 loop++; 1108 } 1109 } 1110 /* clean resources */ 1111 free(resources); 1112 free(ibs_request); 1113 free(ib_info); 1114 free(pm4); 1115 1116 /* end of test */ 1117 r = amdgpu_cs_ctx_free(context_handle); 1118 CU_ASSERT_EQUAL(r, 0); 1119 } 1120 1121 static void amdgpu_command_submission_sdma_write_linear(void) 1122 { 1123 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 1124 } 1125 1126 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 1127 { 1128 const int sdma_write_length = 1024 * 1024; 1129 const int pm4_dw = 256; 1130 amdgpu_context_handle context_handle; 1131 amdgpu_bo_handle bo; 1132 amdgpu_bo_handle *resources; 1133 uint32_t *pm4; 1134 struct amdgpu_cs_ib_info *ib_info; 1135 struct amdgpu_cs_request *ibs_request; 1136 uint64_t bo_mc; 1137 volatile uint32_t *bo_cpu; 1138 int i, j, r, loop, ring_id; 1139 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1140 amdgpu_va_handle va_handle; 1141 struct drm_amdgpu_info_hw_ip hw_ip_info; 1142 1143 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1144 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1145 1146 ib_info = calloc(1, sizeof(*ib_info)); 1147 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1148 1149 ibs_request = calloc(1, sizeof(*ibs_request)); 1150 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1151 1152 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1153 CU_ASSERT_EQUAL(r, 0); 1154 1155 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1156 CU_ASSERT_EQUAL(r, 0); 1157 1158 /* prepare resource */ 1159 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1160 CU_ASSERT_NOT_EQUAL(resources, NULL); 1161 1162 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1163 loop = 0; 1164 while(loop < 2) { 1165 /* allocate UC bo for sDMA use */ 1166 r = amdgpu_bo_alloc_and_map(device_handle, 1167 sdma_write_length, 4096, 1168 AMDGPU_GEM_DOMAIN_GTT, 1169 gtt_flags[loop], &bo, (void**)&bo_cpu, 1170 &bo_mc, &va_handle); 1171 CU_ASSERT_EQUAL(r, 0); 1172 1173 /* clear bo */ 1174 memset((void*)bo_cpu, 0, sdma_write_length); 1175 1176 resources[0] = bo; 1177 1178 /* fulfill PM4: test DMA const fill */ 1179 i = j = 0; 1180 if (ip_type == AMDGPU_HW_IP_DMA) { 1181 if (family_id == AMDGPU_FAMILY_SI) { 1182 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 1183 0, 0, 0, 1184 sdma_write_length / 4); 1185 pm4[i++] = 0xfffffffc & bo_mc; 1186 pm4[i++] = 0xdeadbeaf; 1187 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 1188 } else { 1189 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 1190 SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 1191 pm4[i++] = 0xffffffff & bo_mc; 1192 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1193 pm4[i++] = 0xdeadbeaf; 1194 if (family_id >= AMDGPU_FAMILY_AI) 1195 pm4[i++] = sdma_write_length - 1; 1196 else 1197 pm4[i++] = sdma_write_length; 1198 } 1199 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1200 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1201 if (family_id == AMDGPU_FAMILY_SI) { 1202 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1203 pm4[i++] = 0xdeadbeaf; 1204 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1205 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1206 PACKET3_DMA_DATA_SI_SRC_SEL(2) | 1207 PACKET3_DMA_DATA_SI_CP_SYNC; 1208 pm4[i++] = 0xffffffff & bo_mc; 1209 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1210 pm4[i++] = sdma_write_length; 1211 } else { 1212 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1213 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1214 PACKET3_DMA_DATA_DST_SEL(0) | 1215 PACKET3_DMA_DATA_SRC_SEL(2) | 1216 PACKET3_DMA_DATA_CP_SYNC; 1217 pm4[i++] = 0xdeadbeaf; 1218 pm4[i++] = 0; 1219 pm4[i++] = 0xfffffffc & bo_mc; 1220 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1221 pm4[i++] = sdma_write_length; 1222 } 1223 } 1224 1225 amdgpu_test_exec_cs_helper(context_handle, 1226 ip_type, ring_id, 1227 i, pm4, 1228 1, resources, 1229 ib_info, ibs_request); 1230 1231 /* verify if SDMA test result meets with expected */ 1232 i = 0; 1233 while(i < (sdma_write_length / 4)) { 1234 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1235 } 1236 1237 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1238 sdma_write_length); 1239 CU_ASSERT_EQUAL(r, 0); 1240 loop++; 1241 } 1242 } 1243 /* clean resources */ 1244 free(resources); 1245 free(ibs_request); 1246 free(ib_info); 1247 free(pm4); 1248 1249 /* end of test */ 1250 r = amdgpu_cs_ctx_free(context_handle); 1251 CU_ASSERT_EQUAL(r, 0); 1252 } 1253 1254 static void amdgpu_command_submission_sdma_const_fill(void) 1255 { 1256 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 1257 } 1258 1259 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 1260 { 1261 const int sdma_write_length = 1024; 1262 const int pm4_dw = 256; 1263 amdgpu_context_handle context_handle; 1264 amdgpu_bo_handle bo1, bo2; 1265 amdgpu_bo_handle *resources; 1266 uint32_t *pm4; 1267 struct amdgpu_cs_ib_info *ib_info; 1268 struct amdgpu_cs_request *ibs_request; 1269 uint64_t bo1_mc, bo2_mc; 1270 volatile unsigned char *bo1_cpu, *bo2_cpu; 1271 int i, j, r, loop1, loop2, ring_id; 1272 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1273 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 1274 struct drm_amdgpu_info_hw_ip hw_ip_info; 1275 1276 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1277 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1278 1279 ib_info = calloc(1, sizeof(*ib_info)); 1280 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1281 1282 ibs_request = calloc(1, sizeof(*ibs_request)); 1283 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1284 1285 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1286 CU_ASSERT_EQUAL(r, 0); 1287 1288 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1289 CU_ASSERT_EQUAL(r, 0); 1290 1291 /* prepare resource */ 1292 resources = calloc(2, sizeof(amdgpu_bo_handle)); 1293 CU_ASSERT_NOT_EQUAL(resources, NULL); 1294 1295 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1296 loop1 = loop2 = 0; 1297 /* run 9 circle to test all mapping combination */ 1298 while(loop1 < 2) { 1299 while(loop2 < 2) { 1300 /* allocate UC bo1for sDMA use */ 1301 r = amdgpu_bo_alloc_and_map(device_handle, 1302 sdma_write_length, 4096, 1303 AMDGPU_GEM_DOMAIN_GTT, 1304 gtt_flags[loop1], &bo1, 1305 (void**)&bo1_cpu, &bo1_mc, 1306 &bo1_va_handle); 1307 CU_ASSERT_EQUAL(r, 0); 1308 1309 /* set bo1 */ 1310 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 1311 1312 /* allocate UC bo2 for sDMA use */ 1313 r = amdgpu_bo_alloc_and_map(device_handle, 1314 sdma_write_length, 4096, 1315 AMDGPU_GEM_DOMAIN_GTT, 1316 gtt_flags[loop2], &bo2, 1317 (void**)&bo2_cpu, &bo2_mc, 1318 &bo2_va_handle); 1319 CU_ASSERT_EQUAL(r, 0); 1320 1321 /* clear bo2 */ 1322 memset((void*)bo2_cpu, 0, sdma_write_length); 1323 1324 resources[0] = bo1; 1325 resources[1] = bo2; 1326 1327 /* fulfill PM4: test DMA copy linear */ 1328 i = j = 0; 1329 if (ip_type == AMDGPU_HW_IP_DMA) { 1330 if (family_id == AMDGPU_FAMILY_SI) { 1331 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 1332 0, 0, 0, 1333 sdma_write_length); 1334 pm4[i++] = 0xffffffff & bo2_mc; 1335 pm4[i++] = 0xffffffff & bo1_mc; 1336 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1337 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1338 } else { 1339 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 1340 SDMA_COPY_SUB_OPCODE_LINEAR, 1341 0); 1342 if (family_id >= AMDGPU_FAMILY_AI) 1343 pm4[i++] = sdma_write_length - 1; 1344 else 1345 pm4[i++] = sdma_write_length; 1346 pm4[i++] = 0; 1347 pm4[i++] = 0xffffffff & bo1_mc; 1348 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1349 pm4[i++] = 0xffffffff & bo2_mc; 1350 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1351 } 1352 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1353 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1354 if (family_id == AMDGPU_FAMILY_SI) { 1355 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1356 pm4[i++] = 0xfffffffc & bo1_mc; 1357 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1358 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1359 PACKET3_DMA_DATA_SI_SRC_SEL(0) | 1360 PACKET3_DMA_DATA_SI_CP_SYNC | 1361 (0xffff00000000 & bo1_mc) >> 32; 1362 pm4[i++] = 0xfffffffc & bo2_mc; 1363 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1364 pm4[i++] = sdma_write_length; 1365 } else { 1366 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1367 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1368 PACKET3_DMA_DATA_DST_SEL(0) | 1369 PACKET3_DMA_DATA_SRC_SEL(0) | 1370 PACKET3_DMA_DATA_CP_SYNC; 1371 pm4[i++] = 0xfffffffc & bo1_mc; 1372 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1373 pm4[i++] = 0xfffffffc & bo2_mc; 1374 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1375 pm4[i++] = sdma_write_length; 1376 } 1377 } 1378 1379 amdgpu_test_exec_cs_helper(context_handle, 1380 ip_type, ring_id, 1381 i, pm4, 1382 2, resources, 1383 ib_info, ibs_request); 1384 1385 /* verify if SDMA test result meets with expected */ 1386 i = 0; 1387 while(i < sdma_write_length) { 1388 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 1389 } 1390 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 1391 sdma_write_length); 1392 CU_ASSERT_EQUAL(r, 0); 1393 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 1394 sdma_write_length); 1395 CU_ASSERT_EQUAL(r, 0); 1396 loop2++; 1397 } 1398 loop1++; 1399 } 1400 } 1401 /* clean resources */ 1402 free(resources); 1403 free(ibs_request); 1404 free(ib_info); 1405 free(pm4); 1406 1407 /* end of test */ 1408 r = amdgpu_cs_ctx_free(context_handle); 1409 CU_ASSERT_EQUAL(r, 0); 1410 } 1411 1412 static void amdgpu_command_submission_sdma_copy_linear(void) 1413 { 1414 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 1415 } 1416 1417 static void amdgpu_command_submission_sdma(void) 1418 { 1419 amdgpu_command_submission_sdma_write_linear(); 1420 amdgpu_command_submission_sdma_const_fill(); 1421 amdgpu_command_submission_sdma_copy_linear(); 1422 } 1423 1424 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1425 { 1426 amdgpu_context_handle context_handle; 1427 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1428 void *ib_result_cpu, *ib_result_ce_cpu; 1429 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1430 struct amdgpu_cs_request ibs_request[2] = {0}; 1431 struct amdgpu_cs_ib_info ib_info[2]; 1432 struct amdgpu_cs_fence fence_status[2] = {0}; 1433 uint32_t *ptr; 1434 uint32_t expired; 1435 amdgpu_bo_list_handle bo_list; 1436 amdgpu_va_handle va_handle, va_handle_ce; 1437 int r; 1438 int i = 0, ib_cs_num = 2; 1439 1440 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1441 CU_ASSERT_EQUAL(r, 0); 1442 1443 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1444 AMDGPU_GEM_DOMAIN_GTT, 0, 1445 &ib_result_handle, &ib_result_cpu, 1446 &ib_result_mc_address, &va_handle); 1447 CU_ASSERT_EQUAL(r, 0); 1448 1449 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1450 AMDGPU_GEM_DOMAIN_GTT, 0, 1451 &ib_result_ce_handle, &ib_result_ce_cpu, 1452 &ib_result_ce_mc_address, &va_handle_ce); 1453 CU_ASSERT_EQUAL(r, 0); 1454 1455 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1456 ib_result_ce_handle, &bo_list); 1457 CU_ASSERT_EQUAL(r, 0); 1458 1459 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1460 1461 /* IT_SET_CE_DE_COUNTERS */ 1462 ptr = ib_result_ce_cpu; 1463 if (family_id != AMDGPU_FAMILY_SI) { 1464 ptr[i++] = 0xc0008900; 1465 ptr[i++] = 0; 1466 } 1467 ptr[i++] = 0xc0008400; 1468 ptr[i++] = 1; 1469 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1470 ib_info[0].size = i; 1471 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1472 1473 /* IT_WAIT_ON_CE_COUNTER */ 1474 ptr = ib_result_cpu; 1475 ptr[0] = 0xc0008600; 1476 ptr[1] = 0x00000001; 1477 ib_info[1].ib_mc_address = ib_result_mc_address; 1478 ib_info[1].size = 2; 1479 1480 for (i = 0; i < ib_cs_num; i++) { 1481 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1482 ibs_request[i].number_of_ibs = 2; 1483 ibs_request[i].ibs = ib_info; 1484 ibs_request[i].resources = bo_list; 1485 ibs_request[i].fence_info.handle = NULL; 1486 } 1487 1488 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1489 1490 CU_ASSERT_EQUAL(r, 0); 1491 1492 for (i = 0; i < ib_cs_num; i++) { 1493 fence_status[i].context = context_handle; 1494 fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1495 fence_status[i].fence = ibs_request[i].seq_no; 1496 } 1497 1498 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1499 AMDGPU_TIMEOUT_INFINITE, 1500 &expired, NULL); 1501 CU_ASSERT_EQUAL(r, 0); 1502 1503 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1504 ib_result_mc_address, 4096); 1505 CU_ASSERT_EQUAL(r, 0); 1506 1507 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 1508 ib_result_ce_mc_address, 4096); 1509 CU_ASSERT_EQUAL(r, 0); 1510 1511 r = amdgpu_bo_list_destroy(bo_list); 1512 CU_ASSERT_EQUAL(r, 0); 1513 1514 r = amdgpu_cs_ctx_free(context_handle); 1515 CU_ASSERT_EQUAL(r, 0); 1516 } 1517 1518 static void amdgpu_command_submission_multi_fence(void) 1519 { 1520 amdgpu_command_submission_multi_fence_wait_all(true); 1521 amdgpu_command_submission_multi_fence_wait_all(false); 1522 } 1523 1524 static void amdgpu_userptr_test(void) 1525 { 1526 int i, r, j; 1527 uint32_t *pm4 = NULL; 1528 uint64_t bo_mc; 1529 void *ptr = NULL; 1530 int pm4_dw = 256; 1531 int sdma_write_length = 4; 1532 amdgpu_bo_handle handle; 1533 amdgpu_context_handle context_handle; 1534 struct amdgpu_cs_ib_info *ib_info; 1535 struct amdgpu_cs_request *ibs_request; 1536 amdgpu_bo_handle buf_handle; 1537 amdgpu_va_handle va_handle; 1538 1539 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1540 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1541 1542 ib_info = calloc(1, sizeof(*ib_info)); 1543 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1544 1545 ibs_request = calloc(1, sizeof(*ibs_request)); 1546 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1547 1548 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1549 CU_ASSERT_EQUAL(r, 0); 1550 1551 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 1552 CU_ASSERT_NOT_EQUAL(ptr, NULL); 1553 memset(ptr, 0, BUFFER_SIZE); 1554 1555 r = amdgpu_create_bo_from_user_mem(device_handle, 1556 ptr, BUFFER_SIZE, &buf_handle); 1557 CU_ASSERT_EQUAL(r, 0); 1558 1559 r = amdgpu_va_range_alloc(device_handle, 1560 amdgpu_gpu_va_range_general, 1561 BUFFER_SIZE, 1, 0, &bo_mc, 1562 &va_handle, 0); 1563 CU_ASSERT_EQUAL(r, 0); 1564 1565 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 1566 CU_ASSERT_EQUAL(r, 0); 1567 1568 handle = buf_handle; 1569 1570 j = i = 0; 1571 1572 if (family_id == AMDGPU_FAMILY_SI) 1573 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1574 sdma_write_length); 1575 else 1576 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1577 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 1578 pm4[i++] = 0xffffffff & bo_mc; 1579 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1580 if (family_id >= AMDGPU_FAMILY_AI) 1581 pm4[i++] = sdma_write_length - 1; 1582 else if (family_id != AMDGPU_FAMILY_SI) 1583 pm4[i++] = sdma_write_length; 1584 1585 while (j++ < sdma_write_length) 1586 pm4[i++] = 0xdeadbeaf; 1587 1588 if (!fork()) { 1589 pm4[0] = 0x0; 1590 exit(0); 1591 } 1592 1593 amdgpu_test_exec_cs_helper(context_handle, 1594 AMDGPU_HW_IP_DMA, 0, 1595 i, pm4, 1596 1, &handle, 1597 ib_info, ibs_request); 1598 i = 0; 1599 while (i < sdma_write_length) { 1600 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 1601 } 1602 free(ibs_request); 1603 free(ib_info); 1604 free(pm4); 1605 1606 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 1607 CU_ASSERT_EQUAL(r, 0); 1608 r = amdgpu_va_range_free(va_handle); 1609 CU_ASSERT_EQUAL(r, 0); 1610 r = amdgpu_bo_free(buf_handle); 1611 CU_ASSERT_EQUAL(r, 0); 1612 free(ptr); 1613 1614 r = amdgpu_cs_ctx_free(context_handle); 1615 CU_ASSERT_EQUAL(r, 0); 1616 1617 wait(NULL); 1618 } 1619 1620 static void amdgpu_sync_dependency_test(void) 1621 { 1622 amdgpu_context_handle context_handle[2]; 1623 amdgpu_bo_handle ib_result_handle; 1624 void *ib_result_cpu; 1625 uint64_t ib_result_mc_address; 1626 struct amdgpu_cs_request ibs_request; 1627 struct amdgpu_cs_ib_info ib_info; 1628 struct amdgpu_cs_fence fence_status; 1629 uint32_t expired; 1630 int i, j, r; 1631 amdgpu_bo_list_handle bo_list; 1632 amdgpu_va_handle va_handle; 1633 static uint32_t *ptr; 1634 uint64_t seq_no; 1635 1636 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 1637 CU_ASSERT_EQUAL(r, 0); 1638 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 1639 CU_ASSERT_EQUAL(r, 0); 1640 1641 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 1642 AMDGPU_GEM_DOMAIN_GTT, 0, 1643 &ib_result_handle, &ib_result_cpu, 1644 &ib_result_mc_address, &va_handle); 1645 CU_ASSERT_EQUAL(r, 0); 1646 1647 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 1648 &bo_list); 1649 CU_ASSERT_EQUAL(r, 0); 1650 1651 ptr = ib_result_cpu; 1652 i = 0; 1653 1654 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 1655 1656 /* Dispatch minimal init config and verify it's executed */ 1657 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 1658 ptr[i++] = 0x80000000; 1659 ptr[i++] = 0x80000000; 1660 1661 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 1662 ptr[i++] = 0x80000000; 1663 1664 1665 /* Program compute regs */ 1666 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 1667 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1668 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 1669 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 1670 1671 1672 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 1673 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 1674 /* 1675 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 1676 SGPRS = 1 1677 PRIORITY = 0 1678 FLOAT_MODE = 192 (0xc0) 1679 PRIV = 0 1680 DX10_CLAMP = 1 1681 DEBUG_MODE = 0 1682 IEEE_MODE = 0 1683 BULKY = 0 1684 CDBG_USER = 0 1685 * 1686 */ 1687 ptr[i++] = 0x002c0040; 1688 1689 1690 /* 1691 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 1692 USER_SGPR = 8 1693 TRAP_PRESENT = 0 1694 TGID_X_EN = 0 1695 TGID_Y_EN = 0 1696 TGID_Z_EN = 0 1697 TG_SIZE_EN = 0 1698 TIDIG_COMP_CNT = 0 1699 EXCP_EN_MSB = 0 1700 LDS_SIZE = 0 1701 EXCP_EN = 0 1702 * 1703 */ 1704 ptr[i++] = 0x00000010; 1705 1706 1707 /* 1708 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 1709 WAVESIZE = 0 1710 * 1711 */ 1712 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 1713 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 1714 ptr[i++] = 0x00000100; 1715 1716 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 1717 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 1718 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 1719 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 1720 1721 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 1722 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 1723 ptr[i++] = 0; 1724 1725 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 1726 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 1727 ptr[i++] = 1; 1728 ptr[i++] = 1; 1729 ptr[i++] = 1; 1730 1731 1732 /* Dispatch */ 1733 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1734 ptr[i++] = 1; 1735 ptr[i++] = 1; 1736 ptr[i++] = 1; 1737 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 1738 1739 1740 while (i & 7) 1741 ptr[i++] = 0xffff1000; /* type3 nop packet */ 1742 1743 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 1744 ib_info.ib_mc_address = ib_result_mc_address; 1745 ib_info.size = i; 1746 1747 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 1748 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 1749 ibs_request.ring = 0; 1750 ibs_request.number_of_ibs = 1; 1751 ibs_request.ibs = &ib_info; 1752 ibs_request.resources = bo_list; 1753 ibs_request.fence_info.handle = NULL; 1754 1755 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 1756 CU_ASSERT_EQUAL(r, 0); 1757 seq_no = ibs_request.seq_no; 1758 1759 1760 1761 /* Prepare second command with dependency on the first */ 1762 j = i; 1763 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 1764 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1765 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 1766 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 1767 ptr[i++] = 99; 1768 1769 while (i & 7) 1770 ptr[i++] = 0xffff1000; /* type3 nop packet */ 1771 1772 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 1773 ib_info.ib_mc_address = ib_result_mc_address + j * 4; 1774 ib_info.size = i - j; 1775 1776 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 1777 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 1778 ibs_request.ring = 0; 1779 ibs_request.number_of_ibs = 1; 1780 ibs_request.ibs = &ib_info; 1781 ibs_request.resources = bo_list; 1782 ibs_request.fence_info.handle = NULL; 1783 1784 ibs_request.number_of_dependencies = 1; 1785 1786 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 1787 ibs_request.dependencies[0].context = context_handle[1]; 1788 ibs_request.dependencies[0].ip_instance = 0; 1789 ibs_request.dependencies[0].ring = 0; 1790 ibs_request.dependencies[0].fence = seq_no; 1791 1792 1793 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 1794 CU_ASSERT_EQUAL(r, 0); 1795 1796 1797 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 1798 fence_status.context = context_handle[0]; 1799 fence_status.ip_type = AMDGPU_HW_IP_GFX; 1800 fence_status.ip_instance = 0; 1801 fence_status.ring = 0; 1802 fence_status.fence = ibs_request.seq_no; 1803 1804 r = amdgpu_cs_query_fence_status(&fence_status, 1805 AMDGPU_TIMEOUT_INFINITE,0, &expired); 1806 CU_ASSERT_EQUAL(r, 0); 1807 1808 /* Expect the second command to wait for shader to complete */ 1809 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 1810 1811 r = amdgpu_bo_list_destroy(bo_list); 1812 CU_ASSERT_EQUAL(r, 0); 1813 1814 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1815 ib_result_mc_address, 4096); 1816 CU_ASSERT_EQUAL(r, 0); 1817 1818 r = amdgpu_cs_ctx_free(context_handle[0]); 1819 CU_ASSERT_EQUAL(r, 0); 1820 r = amdgpu_cs_ctx_free(context_handle[1]); 1821 CU_ASSERT_EQUAL(r, 0); 1822 1823 free(ibs_request.dependencies); 1824 } 1825