Home | History | Annotate | Download | only in amdgpu
      1 /*
      2  * Copyright 2014 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22 */
     23 
     24 #ifdef HAVE_CONFIG_H
     25 #include "config.h"
     26 #endif
     27 
     28 #include <stdio.h>
     29 #include <stdlib.h>
     30 #include <unistd.h>
     31 #ifdef HAVE_ALLOCA_H
     32 # include <alloca.h>
     33 #endif
     34 
     35 #include "CUnit/Basic.h"
     36 
     37 #include "amdgpu_test.h"
     38 #include "amdgpu_drm.h"
     39 
     40 static  amdgpu_device_handle device_handle;
     41 static  uint32_t  major_version;
     42 static  uint32_t  minor_version;
     43 
     44 static void amdgpu_query_info_test(void);
     45 static void amdgpu_memory_alloc(void);
     46 static void amdgpu_command_submission_gfx(void);
     47 static void amdgpu_command_submission_compute(void);
     48 static void amdgpu_command_submission_sdma(void);
     49 static void amdgpu_userptr_test(void);
     50 
     51 CU_TestInfo basic_tests[] = {
     52 	{ "Query Info Test",  amdgpu_query_info_test },
     53 	{ "Memory alloc Test",  amdgpu_memory_alloc },
     54 	{ "Userptr Test",  amdgpu_userptr_test },
     55 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
     56 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
     57 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
     58 	CU_TEST_INFO_NULL,
     59 };
     60 #define BUFFER_SIZE (8 * 1024)
     61 #define SDMA_PKT_HEADER_op_offset 0
     62 #define SDMA_PKT_HEADER_op_mask   0x000000FF
     63 #define SDMA_PKT_HEADER_op_shift  0
     64 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
     65 #define SDMA_OPCODE_CONSTANT_FILL  11
     66 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
     67 	/* 0 = byte fill
     68 	 * 2 = DW fill
     69 	 */
     70 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
     71 					(((sub_op) & 0xFF) << 8) |	\
     72 					(((op) & 0xFF) << 0))
     73 #define	SDMA_OPCODE_WRITE				  2
     74 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
     75 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
     76 
     77 #define	SDMA_OPCODE_COPY				  1
     78 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
     79 
     80 int suite_basic_tests_init(void)
     81 {
     82 	int r;
     83 
     84 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
     85 				   &minor_version, &device_handle);
     86 
     87 	if (r == 0)
     88 		return CUE_SUCCESS;
     89 	else
     90 		return CUE_SINIT_FAILED;
     91 }
     92 
     93 int suite_basic_tests_clean(void)
     94 {
     95 	int r = amdgpu_device_deinitialize(device_handle);
     96 
     97 	if (r == 0)
     98 		return CUE_SUCCESS;
     99 	else
    100 		return CUE_SCLEAN_FAILED;
    101 }
    102 
    103 static void amdgpu_query_info_test(void)
    104 {
    105 	struct amdgpu_gpu_info gpu_info = {0};
    106 	uint32_t version, feature;
    107 	int r;
    108 
    109 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
    110 	CU_ASSERT_EQUAL(r, 0);
    111 
    112 	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
    113 					  0, &version, &feature);
    114 	CU_ASSERT_EQUAL(r, 0);
    115 }
    116 
    117 static void amdgpu_memory_alloc(void)
    118 {
    119 	amdgpu_bo_handle bo;
    120 	amdgpu_va_handle va_handle;
    121 	uint64_t bo_mc;
    122 	int r;
    123 
    124 	/* Test visible VRAM */
    125 	bo = gpu_mem_alloc(device_handle,
    126 			4096, 4096,
    127 			AMDGPU_GEM_DOMAIN_VRAM,
    128 			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
    129 			&bo_mc, &va_handle);
    130 
    131 	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
    132 	CU_ASSERT_EQUAL(r, 0);
    133 
    134 	/* Test invisible VRAM */
    135 	bo = gpu_mem_alloc(device_handle,
    136 			4096, 4096,
    137 			AMDGPU_GEM_DOMAIN_VRAM,
    138 			AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
    139 			&bo_mc, &va_handle);
    140 
    141 	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
    142 	CU_ASSERT_EQUAL(r, 0);
    143 
    144 	/* Test GART Cacheable */
    145 	bo = gpu_mem_alloc(device_handle,
    146 			4096, 4096,
    147 			AMDGPU_GEM_DOMAIN_GTT,
    148 			0, &bo_mc, &va_handle);
    149 
    150 	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
    151 	CU_ASSERT_EQUAL(r, 0);
    152 
    153 	/* Test GART USWC */
    154 	bo = gpu_mem_alloc(device_handle,
    155 			4096, 4096,
    156 			AMDGPU_GEM_DOMAIN_GTT,
    157 			AMDGPU_GEM_CREATE_CPU_GTT_USWC,
    158 			&bo_mc, &va_handle);
    159 
    160 	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
    161 	CU_ASSERT_EQUAL(r, 0);
    162 }
    163 
    164 static void amdgpu_command_submission_gfx_separate_ibs(void)
    165 {
    166 	amdgpu_context_handle context_handle;
    167 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
    168 	void *ib_result_cpu, *ib_result_ce_cpu;
    169 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
    170 	struct amdgpu_cs_request ibs_request = {0};
    171 	struct amdgpu_cs_ib_info ib_info[2];
    172 	struct amdgpu_cs_fence fence_status = {0};
    173 	uint32_t *ptr;
    174 	uint32_t expired;
    175 	amdgpu_bo_list_handle bo_list;
    176 	amdgpu_va_handle va_handle, va_handle_ce;
    177 	int r;
    178 
    179 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    180 	CU_ASSERT_EQUAL(r, 0);
    181 
    182 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    183 				    AMDGPU_GEM_DOMAIN_GTT, 0,
    184 				    &ib_result_handle, &ib_result_cpu,
    185 				    &ib_result_mc_address, &va_handle);
    186 	CU_ASSERT_EQUAL(r, 0);
    187 
    188 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    189 				    AMDGPU_GEM_DOMAIN_GTT, 0,
    190 				    &ib_result_ce_handle, &ib_result_ce_cpu,
    191 				    &ib_result_ce_mc_address, &va_handle_ce);
    192 	CU_ASSERT_EQUAL(r, 0);
    193 
    194 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
    195 			       ib_result_ce_handle, &bo_list);
    196 	CU_ASSERT_EQUAL(r, 0);
    197 
    198 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
    199 
    200 	/* IT_SET_CE_DE_COUNTERS */
    201 	ptr = ib_result_ce_cpu;
    202 	ptr[0] = 0xc0008900;
    203 	ptr[1] = 0;
    204 	ptr[2] = 0xc0008400;
    205 	ptr[3] = 1;
    206 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
    207 	ib_info[0].size = 4;
    208 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
    209 
    210 	/* IT_WAIT_ON_CE_COUNTER */
    211 	ptr = ib_result_cpu;
    212 	ptr[0] = 0xc0008600;
    213 	ptr[1] = 0x00000001;
    214 	ib_info[1].ib_mc_address = ib_result_mc_address;
    215 	ib_info[1].size = 2;
    216 
    217 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
    218 	ibs_request.number_of_ibs = 2;
    219 	ibs_request.ibs = ib_info;
    220 	ibs_request.resources = bo_list;
    221 	ibs_request.fence_info.handle = NULL;
    222 
    223 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
    224 
    225 	CU_ASSERT_EQUAL(r, 0);
    226 
    227 	fence_status.context = context_handle;
    228 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
    229 	fence_status.fence = ibs_request.seq_no;
    230 
    231 	r = amdgpu_cs_query_fence_status(&fence_status,
    232 					 AMDGPU_TIMEOUT_INFINITE,
    233 					 0, &expired);
    234 	CU_ASSERT_EQUAL(r, 0);
    235 
    236 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
    237 				     ib_result_mc_address, 4096);
    238 	CU_ASSERT_EQUAL(r, 0);
    239 
    240 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
    241 				     ib_result_ce_mc_address, 4096);
    242 	CU_ASSERT_EQUAL(r, 0);
    243 
    244 	r = amdgpu_bo_list_destroy(bo_list);
    245 	CU_ASSERT_EQUAL(r, 0);
    246 
    247 	r = amdgpu_cs_ctx_free(context_handle);
    248 	CU_ASSERT_EQUAL(r, 0);
    249 
    250 }
    251 
    252 static void amdgpu_command_submission_gfx_shared_ib(void)
    253 {
    254 	amdgpu_context_handle context_handle;
    255 	amdgpu_bo_handle ib_result_handle;
    256 	void *ib_result_cpu;
    257 	uint64_t ib_result_mc_address;
    258 	struct amdgpu_cs_request ibs_request = {0};
    259 	struct amdgpu_cs_ib_info ib_info[2];
    260 	struct amdgpu_cs_fence fence_status = {0};
    261 	uint32_t *ptr;
    262 	uint32_t expired;
    263 	amdgpu_bo_list_handle bo_list;
    264 	amdgpu_va_handle va_handle;
    265 	int r;
    266 
    267 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    268 	CU_ASSERT_EQUAL(r, 0);
    269 
    270 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    271 				    AMDGPU_GEM_DOMAIN_GTT, 0,
    272 				    &ib_result_handle, &ib_result_cpu,
    273 				    &ib_result_mc_address, &va_handle);
    274 	CU_ASSERT_EQUAL(r, 0);
    275 
    276 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
    277 			       &bo_list);
    278 	CU_ASSERT_EQUAL(r, 0);
    279 
    280 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
    281 
    282 	/* IT_SET_CE_DE_COUNTERS */
    283 	ptr = ib_result_cpu;
    284 	ptr[0] = 0xc0008900;
    285 	ptr[1] = 0;
    286 	ptr[2] = 0xc0008400;
    287 	ptr[3] = 1;
    288 	ib_info[0].ib_mc_address = ib_result_mc_address;
    289 	ib_info[0].size = 4;
    290 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
    291 
    292 	ptr = (uint32_t *)ib_result_cpu + 4;
    293 	ptr[0] = 0xc0008600;
    294 	ptr[1] = 0x00000001;
    295 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
    296 	ib_info[1].size = 2;
    297 
    298 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
    299 	ibs_request.number_of_ibs = 2;
    300 	ibs_request.ibs = ib_info;
    301 	ibs_request.resources = bo_list;
    302 	ibs_request.fence_info.handle = NULL;
    303 
    304 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
    305 
    306 	CU_ASSERT_EQUAL(r, 0);
    307 
    308 	fence_status.context = context_handle;
    309 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
    310 	fence_status.fence = ibs_request.seq_no;
    311 
    312 	r = amdgpu_cs_query_fence_status(&fence_status,
    313 					 AMDGPU_TIMEOUT_INFINITE,
    314 					 0, &expired);
    315 	CU_ASSERT_EQUAL(r, 0);
    316 
    317 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
    318 				     ib_result_mc_address, 4096);
    319 	CU_ASSERT_EQUAL(r, 0);
    320 
    321 	r = amdgpu_bo_list_destroy(bo_list);
    322 	CU_ASSERT_EQUAL(r, 0);
    323 
    324 	r = amdgpu_cs_ctx_free(context_handle);
    325 	CU_ASSERT_EQUAL(r, 0);
    326 }
    327 
    328 static void amdgpu_command_submission_gfx(void)
    329 {
    330 	/* separate IB buffers for multi-IB submission */
    331 	amdgpu_command_submission_gfx_separate_ibs();
    332 	/* shared IB buffer for multi-IB submission */
    333 	amdgpu_command_submission_gfx_shared_ib();
    334 }
    335 
    336 static void amdgpu_command_submission_compute(void)
    337 {
    338 	amdgpu_context_handle context_handle;
    339 	amdgpu_bo_handle ib_result_handle;
    340 	void *ib_result_cpu;
    341 	uint64_t ib_result_mc_address;
    342 	struct amdgpu_cs_request ibs_request;
    343 	struct amdgpu_cs_ib_info ib_info;
    344 	struct amdgpu_cs_fence fence_status;
    345 	uint32_t *ptr;
    346 	uint32_t expired;
    347 	int i, r, instance;
    348 	amdgpu_bo_list_handle bo_list;
    349 	amdgpu_va_handle va_handle;
    350 
    351 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    352 	CU_ASSERT_EQUAL(r, 0);
    353 
    354 	for (instance = 0; instance < 8; instance++) {
    355 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    356 					    AMDGPU_GEM_DOMAIN_GTT, 0,
    357 					    &ib_result_handle, &ib_result_cpu,
    358 					    &ib_result_mc_address, &va_handle);
    359 		CU_ASSERT_EQUAL(r, 0);
    360 
    361 		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
    362 				       &bo_list);
    363 		CU_ASSERT_EQUAL(r, 0);
    364 
    365 		ptr = ib_result_cpu;
    366 		for (i = 0; i < 16; ++i)
    367 			ptr[i] = 0xffff1000;
    368 
    369 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
    370 		ib_info.ib_mc_address = ib_result_mc_address;
    371 		ib_info.size = 16;
    372 
    373 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
    374 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
    375 		ibs_request.ring = instance;
    376 		ibs_request.number_of_ibs = 1;
    377 		ibs_request.ibs = &ib_info;
    378 		ibs_request.resources = bo_list;
    379 		ibs_request.fence_info.handle = NULL;
    380 
    381 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
    382 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
    383 		CU_ASSERT_EQUAL(r, 0);
    384 
    385 		fence_status.context = context_handle;
    386 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
    387 		fence_status.ring = instance;
    388 		fence_status.fence = ibs_request.seq_no;
    389 
    390 		r = amdgpu_cs_query_fence_status(&fence_status,
    391 						 AMDGPU_TIMEOUT_INFINITE,
    392 						 0, &expired);
    393 		CU_ASSERT_EQUAL(r, 0);
    394 
    395 		r = amdgpu_bo_list_destroy(bo_list);
    396 		CU_ASSERT_EQUAL(r, 0);
    397 
    398 		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
    399 					     ib_result_mc_address, 4096);
    400 		CU_ASSERT_EQUAL(r, 0);
    401 	}
    402 
    403 	r = amdgpu_cs_ctx_free(context_handle);
    404 	CU_ASSERT_EQUAL(r, 0);
    405 }
    406 
    407 /*
    408  * caller need create/release:
    409  * pm4_src, resources, ib_info, and ibs_request
    410  * submit command stream described in ibs_request and wait for this IB accomplished
    411  */
    412 static void amdgpu_sdma_test_exec_cs(amdgpu_context_handle context_handle,
    413 				 int instance, int pm4_dw, uint32_t *pm4_src,
    414 				 int res_cnt, amdgpu_bo_handle *resources,
    415 				 struct amdgpu_cs_ib_info *ib_info,
    416 				 struct amdgpu_cs_request *ibs_request)
    417 {
    418 	int r;
    419 	uint32_t expired;
    420 	uint32_t *ring_ptr;
    421 	amdgpu_bo_handle ib_result_handle;
    422 	void *ib_result_cpu;
    423 	uint64_t ib_result_mc_address;
    424 	struct amdgpu_cs_fence fence_status = {0};
    425 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
    426 	amdgpu_va_handle va_handle;
    427 
    428 	/* prepare CS */
    429 	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
    430 	CU_ASSERT_NOT_EQUAL(resources, NULL);
    431 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
    432 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    433 	CU_ASSERT_TRUE(pm4_dw <= 1024);
    434 
    435 	/* allocate IB */
    436 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    437 				    AMDGPU_GEM_DOMAIN_GTT, 0,
    438 				    &ib_result_handle, &ib_result_cpu,
    439 				    &ib_result_mc_address, &va_handle);
    440 	CU_ASSERT_EQUAL(r, 0);
    441 
    442 	/* copy PM4 packet to ring from caller */
    443 	ring_ptr = ib_result_cpu;
    444 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
    445 
    446 	ib_info->ib_mc_address = ib_result_mc_address;
    447 	ib_info->size = pm4_dw;
    448 
    449 	ibs_request->ip_type = AMDGPU_HW_IP_DMA;
    450 	ibs_request->ring = instance;
    451 	ibs_request->number_of_ibs = 1;
    452 	ibs_request->ibs = ib_info;
    453 	ibs_request->fence_info.handle = NULL;
    454 
    455 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
    456 	all_res[res_cnt] = ib_result_handle;
    457 
    458 	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
    459 				  NULL, &ibs_request->resources);
    460 	CU_ASSERT_EQUAL(r, 0);
    461 
    462 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    463 
    464 	/* submit CS */
    465 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
    466 	CU_ASSERT_EQUAL(r, 0);
    467 
    468 	r = amdgpu_bo_list_destroy(ibs_request->resources);
    469 	CU_ASSERT_EQUAL(r, 0);
    470 
    471 	fence_status.ip_type = AMDGPU_HW_IP_DMA;
    472 	fence_status.ring = ibs_request->ring;
    473 	fence_status.context = context_handle;
    474 	fence_status.fence = ibs_request->seq_no;
    475 
    476 	/* wait for IB accomplished */
    477 	r = amdgpu_cs_query_fence_status(&fence_status,
    478 					 AMDGPU_TIMEOUT_INFINITE,
    479 					 0, &expired);
    480 	CU_ASSERT_EQUAL(r, 0);
    481 	CU_ASSERT_EQUAL(expired, true);
    482 
    483 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
    484 				     ib_result_mc_address, 4096);
    485 	CU_ASSERT_EQUAL(r, 0);
    486 }
    487 
    488 static void amdgpu_command_submission_sdma_write_linear(void)
    489 {
    490 	const int sdma_write_length = 128;
    491 	const int pm4_dw = 256;
    492 	amdgpu_context_handle context_handle;
    493 	amdgpu_bo_handle bo;
    494 	amdgpu_bo_handle *resources;
    495 	uint32_t *pm4;
    496 	struct amdgpu_cs_ib_info *ib_info;
    497 	struct amdgpu_cs_request *ibs_request;
    498 	uint64_t bo_mc;
    499 	volatile uint32_t *bo_cpu;
    500 	int i, j, r, loop;
    501 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
    502 	amdgpu_va_handle va_handle;
    503 
    504 	pm4 = calloc(pm4_dw, sizeof(*pm4));
    505 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
    506 
    507 	ib_info = calloc(1, sizeof(*ib_info));
    508 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
    509 
    510 	ibs_request = calloc(1, sizeof(*ibs_request));
    511 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    512 
    513 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    514 	CU_ASSERT_EQUAL(r, 0);
    515 
    516 	/* prepare resource */
    517 	resources = calloc(1, sizeof(amdgpu_bo_handle));
    518 	CU_ASSERT_NOT_EQUAL(resources, NULL);
    519 
    520 	loop = 0;
    521 	while(loop < 2) {
    522 		/* allocate UC bo for sDMA use */
    523 		r = amdgpu_bo_alloc_and_map(device_handle,
    524 					    sdma_write_length * sizeof(uint32_t),
    525 					    4096, AMDGPU_GEM_DOMAIN_GTT,
    526 					    gtt_flags[loop], &bo, (void**)&bo_cpu,
    527 					    &bo_mc, &va_handle);
    528 		CU_ASSERT_EQUAL(r, 0);
    529 
    530 		/* clear bo */
    531 		memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
    532 
    533 
    534 		resources[0] = bo;
    535 
    536 		/* fullfill PM4: test DMA write-linear */
    537 		i = j = 0;
    538 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
    539 				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
    540 		pm4[i++] = 0xffffffff & bo_mc;
    541 		pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
    542 		pm4[i++] = sdma_write_length;
    543 		while(j++ < sdma_write_length)
    544 			pm4[i++] = 0xdeadbeaf;
    545 
    546 		amdgpu_sdma_test_exec_cs(context_handle, 0,
    547 					i, pm4,
    548 					1, resources,
    549 					ib_info, ibs_request);
    550 
    551 		/* verify if SDMA test result meets with expected */
    552 		i = 0;
    553 		while(i < sdma_write_length) {
    554 			CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
    555 		}
    556 
    557 		r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
    558 					     sdma_write_length * sizeof(uint32_t));
    559 		CU_ASSERT_EQUAL(r, 0);
    560 		loop++;
    561 	}
    562 	/* clean resources */
    563 	free(resources);
    564 	free(ibs_request);
    565 	free(ib_info);
    566 	free(pm4);
    567 
    568 	/* end of test */
    569 	r = amdgpu_cs_ctx_free(context_handle);
    570 	CU_ASSERT_EQUAL(r, 0);
    571 }
    572 
    573 static void amdgpu_command_submission_sdma_const_fill(void)
    574 {
    575 	const int sdma_write_length = 1024 * 1024;
    576 	const int pm4_dw = 256;
    577 	amdgpu_context_handle context_handle;
    578 	amdgpu_bo_handle bo;
    579 	amdgpu_bo_handle *resources;
    580 	uint32_t *pm4;
    581 	struct amdgpu_cs_ib_info *ib_info;
    582 	struct amdgpu_cs_request *ibs_request;
    583 	uint64_t bo_mc;
    584 	volatile uint32_t *bo_cpu;
    585 	int i, j, r, loop;
    586 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
    587 	amdgpu_va_handle va_handle;
    588 
    589 	pm4 = calloc(pm4_dw, sizeof(*pm4));
    590 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
    591 
    592 	ib_info = calloc(1, sizeof(*ib_info));
    593 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
    594 
    595 	ibs_request = calloc(1, sizeof(*ibs_request));
    596 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    597 
    598 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    599 	CU_ASSERT_EQUAL(r, 0);
    600 
    601 	/* prepare resource */
    602 	resources = calloc(1, sizeof(amdgpu_bo_handle));
    603 	CU_ASSERT_NOT_EQUAL(resources, NULL);
    604 
    605 	loop = 0;
    606 	while(loop < 2) {
    607 		/* allocate UC bo for sDMA use */
    608 		r = amdgpu_bo_alloc_and_map(device_handle,
    609 					    sdma_write_length, 4096,
    610 					    AMDGPU_GEM_DOMAIN_GTT,
    611 					    gtt_flags[loop], &bo, (void**)&bo_cpu,
    612 					    &bo_mc, &va_handle);
    613 		CU_ASSERT_EQUAL(r, 0);
    614 
    615 		/* clear bo */
    616 		memset((void*)bo_cpu, 0, sdma_write_length);
    617 
    618 		resources[0] = bo;
    619 
    620 		/* fullfill PM4: test DMA const fill */
    621 		i = j = 0;
    622 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
    623 				   SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
    624 		pm4[i++] = 0xffffffff & bo_mc;
    625 		pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
    626 		pm4[i++] = 0xdeadbeaf;
    627 		pm4[i++] = sdma_write_length;
    628 
    629 		amdgpu_sdma_test_exec_cs(context_handle, 0,
    630 					i, pm4,
    631 					1, resources,
    632 					ib_info, ibs_request);
    633 
    634 		/* verify if SDMA test result meets with expected */
    635 		i = 0;
    636 		while(i < (sdma_write_length / 4)) {
    637 			CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
    638 		}
    639 
    640 		r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
    641 					     sdma_write_length);
    642 		CU_ASSERT_EQUAL(r, 0);
    643 		loop++;
    644 	}
    645 	/* clean resources */
    646 	free(resources);
    647 	free(ibs_request);
    648 	free(ib_info);
    649 	free(pm4);
    650 
    651 	/* end of test */
    652 	r = amdgpu_cs_ctx_free(context_handle);
    653 	CU_ASSERT_EQUAL(r, 0);
    654 }
    655 
    656 static void amdgpu_command_submission_sdma_copy_linear(void)
    657 {
    658 	const int sdma_write_length = 1024;
    659 	const int pm4_dw = 256;
    660 	amdgpu_context_handle context_handle;
    661 	amdgpu_bo_handle bo1, bo2;
    662 	amdgpu_bo_handle *resources;
    663 	uint32_t *pm4;
    664 	struct amdgpu_cs_ib_info *ib_info;
    665 	struct amdgpu_cs_request *ibs_request;
    666 	uint64_t bo1_mc, bo2_mc;
    667 	volatile unsigned char *bo1_cpu, *bo2_cpu;
    668 	int i, j, r, loop1, loop2;
    669 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
    670 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
    671 
    672 	pm4 = calloc(pm4_dw, sizeof(*pm4));
    673 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
    674 
    675 	ib_info = calloc(1, sizeof(*ib_info));
    676 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
    677 
    678 	ibs_request = calloc(1, sizeof(*ibs_request));
    679 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    680 
    681 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    682 	CU_ASSERT_EQUAL(r, 0);
    683 
    684 	/* prepare resource */
    685 	resources = calloc(2, sizeof(amdgpu_bo_handle));
    686 	CU_ASSERT_NOT_EQUAL(resources, NULL);
    687 
    688 	loop1 = loop2 = 0;
    689 	/* run 9 circle to test all mapping combination */
    690 	while(loop1 < 2) {
    691 		while(loop2 < 2) {
    692 			/* allocate UC bo1for sDMA use */
    693 			r = amdgpu_bo_alloc_and_map(device_handle,
    694 						    sdma_write_length, 4096,
    695 						    AMDGPU_GEM_DOMAIN_GTT,
    696 						    gtt_flags[loop1], &bo1,
    697 						    (void**)&bo1_cpu, &bo1_mc,
    698 						    &bo1_va_handle);
    699 			CU_ASSERT_EQUAL(r, 0);
    700 
    701 			/* set bo1 */
    702 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
    703 
    704 			/* allocate UC bo2 for sDMA use */
    705 			r = amdgpu_bo_alloc_and_map(device_handle,
    706 						    sdma_write_length, 4096,
    707 						    AMDGPU_GEM_DOMAIN_GTT,
    708 						    gtt_flags[loop2], &bo2,
    709 						    (void**)&bo2_cpu, &bo2_mc,
    710 						    &bo2_va_handle);
    711 			CU_ASSERT_EQUAL(r, 0);
    712 
    713 			/* clear bo2 */
    714 			memset((void*)bo2_cpu, 0, sdma_write_length);
    715 
    716 			resources[0] = bo1;
    717 			resources[1] = bo2;
    718 
    719 			/* fullfill PM4: test DMA copy linear */
    720 			i = j = 0;
    721 			pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
    722 			pm4[i++] = sdma_write_length;
    723 			pm4[i++] = 0;
    724 			pm4[i++] = 0xffffffff & bo1_mc;
    725 			pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
    726 			pm4[i++] = 0xffffffff & bo2_mc;
    727 			pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
    728 
    729 
    730 			amdgpu_sdma_test_exec_cs(context_handle, 0,
    731 						i, pm4,
    732 						2, resources,
    733 						ib_info, ibs_request);
    734 
    735 			/* verify if SDMA test result meets with expected */
    736 			i = 0;
    737 			while(i < sdma_write_length) {
    738 				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
    739 			}
    740 			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
    741 						     sdma_write_length);
    742 			CU_ASSERT_EQUAL(r, 0);
    743 			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
    744 						     sdma_write_length);
    745 			CU_ASSERT_EQUAL(r, 0);
    746 			loop2++;
    747 		}
    748 		loop1++;
    749 	}
    750 	/* clean resources */
    751 	free(resources);
    752 	free(ibs_request);
    753 	free(ib_info);
    754 	free(pm4);
    755 
    756 	/* end of test */
    757 	r = amdgpu_cs_ctx_free(context_handle);
    758 	CU_ASSERT_EQUAL(r, 0);
    759 }
    760 
    761 static void amdgpu_command_submission_sdma(void)
    762 {
    763 	amdgpu_command_submission_sdma_write_linear();
    764 	amdgpu_command_submission_sdma_const_fill();
    765 	amdgpu_command_submission_sdma_copy_linear();
    766 }
    767 
    768 static void amdgpu_userptr_test(void)
    769 {
    770 	int i, r, j;
    771 	uint32_t *pm4 = NULL;
    772 	uint64_t bo_mc;
    773 	void *ptr = NULL;
    774 	int pm4_dw = 256;
    775 	int sdma_write_length = 4;
    776 	amdgpu_bo_handle handle;
    777 	amdgpu_context_handle context_handle;
    778 	struct amdgpu_cs_ib_info *ib_info;
    779 	struct amdgpu_cs_request *ibs_request;
    780 	amdgpu_bo_handle buf_handle;
    781 	amdgpu_va_handle va_handle;
    782 
    783 	pm4 = calloc(pm4_dw, sizeof(*pm4));
    784 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
    785 
    786 	ib_info = calloc(1, sizeof(*ib_info));
    787 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
    788 
    789 	ibs_request = calloc(1, sizeof(*ibs_request));
    790 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    791 
    792 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    793 	CU_ASSERT_EQUAL(r, 0);
    794 
    795 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
    796 	CU_ASSERT_NOT_EQUAL(ptr, NULL);
    797 	memset(ptr, 0, BUFFER_SIZE);
    798 
    799 	r = amdgpu_create_bo_from_user_mem(device_handle,
    800 					   ptr, BUFFER_SIZE, &buf_handle);
    801 	CU_ASSERT_EQUAL(r, 0);
    802 
    803 	r = amdgpu_va_range_alloc(device_handle,
    804 				  amdgpu_gpu_va_range_general,
    805 				  BUFFER_SIZE, 1, 0, &bo_mc,
    806 				  &va_handle, 0);
    807 	CU_ASSERT_EQUAL(r, 0);
    808 
    809 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
    810 	CU_ASSERT_EQUAL(r, 0);
    811 
    812 	handle = buf_handle;
    813 
    814 	j = i = 0;
    815 	pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
    816 			       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
    817 	pm4[i++] = 0xffffffff & bo_mc;
    818 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
    819 	pm4[i++] = sdma_write_length;
    820 
    821 	while (j++ < sdma_write_length)
    822 		pm4[i++] = 0xdeadbeaf;
    823 
    824 	amdgpu_sdma_test_exec_cs(context_handle, 0,
    825 				 i, pm4,
    826 				 1, &handle,
    827 				 ib_info, ibs_request);
    828 	i = 0;
    829 	while (i < sdma_write_length) {
    830 		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
    831 	}
    832 	free(ibs_request);
    833 	free(ib_info);
    834 	free(pm4);
    835 
    836 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
    837 	CU_ASSERT_EQUAL(r, 0);
    838 	r = amdgpu_va_range_free(va_handle);
    839 	CU_ASSERT_EQUAL(r, 0);
    840 	r = amdgpu_bo_free(buf_handle);
    841 	CU_ASSERT_EQUAL(r, 0);
    842 	free(ptr);
    843 
    844 	r = amdgpu_cs_ctx_free(context_handle);
    845 	CU_ASSERT_EQUAL(r, 0);
    846 }
    847