Home | History | Annotate | Download | only in amdgpu
      1 /*
      2  * Copyright 2014 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22 */
     23 
     24 #ifdef HAVE_CONFIG_H
     25 #include "config.h"
     26 #endif
     27 
     28 #include <stdio.h>
     29 #include <stdlib.h>
     30 #include <unistd.h>
     31 #ifdef HAVE_ALLOCA_H
     32 # include <alloca.h>
     33 #endif
     34 
     35 #include "CUnit/Basic.h"
     36 
     37 #include "amdgpu_test.h"
     38 #include "amdgpu_drm.h"
     39 
     40 static  amdgpu_device_handle device_handle;
     41 static  uint32_t  major_version;
     42 static  uint32_t  minor_version;
     43 
     44 static void amdgpu_query_info_test(void);
     45 static void amdgpu_memory_alloc(void);
     46 static void amdgpu_command_submission_gfx(void);
     47 static void amdgpu_command_submission_compute(void);
     48 static void amdgpu_command_submission_sdma(void);
     49 static void amdgpu_userptr_test(void);
     50 static void amdgpu_semaphore_test(void);
     51 
     52 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
     53 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
     54 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
     55 
     56 CU_TestInfo basic_tests[] = {
     57 	{ "Query Info Test",  amdgpu_query_info_test },
     58 	{ "Memory alloc Test",  amdgpu_memory_alloc },
     59 	{ "Userptr Test",  amdgpu_userptr_test },
     60 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
     61 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
     62 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
     63 	{ "SW semaphore Test",  amdgpu_semaphore_test },
     64 	CU_TEST_INFO_NULL,
     65 };
     66 #define BUFFER_SIZE (8 * 1024)
     67 #define SDMA_PKT_HEADER_op_offset 0
     68 #define SDMA_PKT_HEADER_op_mask   0x000000FF
     69 #define SDMA_PKT_HEADER_op_shift  0
     70 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
     71 #define SDMA_OPCODE_CONSTANT_FILL  11
     72 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
     73 	/* 0 = byte fill
     74 	 * 2 = DW fill
     75 	 */
     76 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
     77 					(((sub_op) & 0xFF) << 8) |	\
     78 					(((op) & 0xFF) << 0))
     79 #define	SDMA_OPCODE_WRITE				  2
     80 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
     81 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
     82 
     83 #define	SDMA_OPCODE_COPY				  1
     84 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
     85 
     86 #define GFX_COMPUTE_NOP  0xffff1000
     87 #define SDMA_NOP  0x0
     88 
     89 /* PM4 */
     90 #define	PACKET_TYPE0	0
     91 #define	PACKET_TYPE1	1
     92 #define	PACKET_TYPE2	2
     93 #define	PACKET_TYPE3	3
     94 
     95 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
     96 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
     97 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
     98 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
     99 #define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
    100 			 ((reg) & 0xFFFF) |			\
    101 			 ((n) & 0x3FFF) << 16)
    102 #define CP_PACKET2			0x80000000
    103 #define		PACKET2_PAD_SHIFT		0
    104 #define		PACKET2_PAD_MASK		(0x3fffffff << 0)
    105 
    106 #define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
    107 
    108 #define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
    109 			 (((op) & 0xFF) << 8) |				\
    110 			 ((n) & 0x3FFF) << 16)
    111 
    112 /* Packet 3 types */
    113 #define	PACKET3_NOP					0x10
    114 
    115 #define	PACKET3_WRITE_DATA				0x37
    116 #define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
    117 		/* 0 - register
    118 		 * 1 - memory (sync - via GRBM)
    119 		 * 2 - gl2
    120 		 * 3 - gds
    121 		 * 4 - reserved
    122 		 * 5 - memory (async - direct)
    123 		 */
    124 #define		WR_ONE_ADDR                             (1 << 16)
    125 #define		WR_CONFIRM                              (1 << 20)
    126 #define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
    127 		/* 0 - LRU
    128 		 * 1 - Stream
    129 		 */
    130 #define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
    131 		/* 0 - me
    132 		 * 1 - pfp
    133 		 * 2 - ce
    134 		 */
    135 
    136 #define	PACKET3_DMA_DATA				0x50
    137 /* 1. header
    138  * 2. CONTROL
    139  * 3. SRC_ADDR_LO or DATA [31:0]
    140  * 4. SRC_ADDR_HI [31:0]
    141  * 5. DST_ADDR_LO [31:0]
    142  * 6. DST_ADDR_HI [7:0]
    143  * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
    144  */
    145 /* CONTROL */
    146 #              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
    147 		/* 0 - ME
    148 		 * 1 - PFP
    149 		 */
    150 #              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
    151 		/* 0 - LRU
    152 		 * 1 - Stream
    153 		 * 2 - Bypass
    154 		 */
    155 #              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
    156 #              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
    157 		/* 0 - DST_ADDR using DAS
    158 		 * 1 - GDS
    159 		 * 3 - DST_ADDR using L2
    160 		 */
    161 #              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
    162 		/* 0 - LRU
    163 		 * 1 - Stream
    164 		 * 2 - Bypass
    165 		 */
    166 #              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
    167 #              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
    168 		/* 0 - SRC_ADDR using SAS
    169 		 * 1 - GDS
    170 		 * 2 - DATA
    171 		 * 3 - SRC_ADDR using L2
    172 		 */
    173 #              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
    174 /* COMMAND */
    175 #              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
    176 #              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
    177 		/* 0 - none
    178 		 * 1 - 8 in 16
    179 		 * 2 - 8 in 32
    180 		 * 3 - 8 in 64
    181 		 */
    182 #              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
    183 		/* 0 - none
    184 		 * 1 - 8 in 16
    185 		 * 2 - 8 in 32
    186 		 * 3 - 8 in 64
    187 		 */
    188 #              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
    189 		/* 0 - memory
    190 		 * 1 - register
    191 		 */
    192 #              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
    193 		/* 0 - memory
    194 		 * 1 - register
    195 		 */
    196 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
    197 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
    198 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
    199 
    200 int suite_basic_tests_init(void)
    201 {
    202 	int r;
    203 
    204 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
    205 				   &minor_version, &device_handle);
    206 
    207 	if (r == 0)
    208 		return CUE_SUCCESS;
    209 	else {
    210 		if ((r == -EACCES) && (errno == EACCES))
    211 			printf("\n\nError:%s. "
    212 				"Hint:Try to run this test program as root.",
    213 				strerror(errno));
    214 		return CUE_SINIT_FAILED;
    215 	}
    216 }
    217 
    218 int suite_basic_tests_clean(void)
    219 {
    220 	int r = amdgpu_device_deinitialize(device_handle);
    221 
    222 	if (r == 0)
    223 		return CUE_SUCCESS;
    224 	else
    225 		return CUE_SCLEAN_FAILED;
    226 }
    227 
    228 static void amdgpu_query_info_test(void)
    229 {
    230 	struct amdgpu_gpu_info gpu_info = {0};
    231 	uint32_t version, feature;
    232 	int r;
    233 
    234 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
    235 	CU_ASSERT_EQUAL(r, 0);
    236 
    237 	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
    238 					  0, &version, &feature);
    239 	CU_ASSERT_EQUAL(r, 0);
    240 }
    241 
    242 static void amdgpu_memory_alloc(void)
    243 {
    244 	amdgpu_bo_handle bo;
    245 	amdgpu_va_handle va_handle;
    246 	uint64_t bo_mc;
    247 	int r;
    248 
    249 	/* Test visible VRAM */
    250 	bo = gpu_mem_alloc(device_handle,
    251 			4096, 4096,
    252 			AMDGPU_GEM_DOMAIN_VRAM,
    253 			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
    254 			&bo_mc, &va_handle);
    255 
    256 	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
    257 	CU_ASSERT_EQUAL(r, 0);
    258 
    259 	/* Test invisible VRAM */
    260 	bo = gpu_mem_alloc(device_handle,
    261 			4096, 4096,
    262 			AMDGPU_GEM_DOMAIN_VRAM,
    263 			AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
    264 			&bo_mc, &va_handle);
    265 
    266 	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
    267 	CU_ASSERT_EQUAL(r, 0);
    268 
    269 	/* Test GART Cacheable */
    270 	bo = gpu_mem_alloc(device_handle,
    271 			4096, 4096,
    272 			AMDGPU_GEM_DOMAIN_GTT,
    273 			0, &bo_mc, &va_handle);
    274 
    275 	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
    276 	CU_ASSERT_EQUAL(r, 0);
    277 
    278 	/* Test GART USWC */
    279 	bo = gpu_mem_alloc(device_handle,
    280 			4096, 4096,
    281 			AMDGPU_GEM_DOMAIN_GTT,
    282 			AMDGPU_GEM_CREATE_CPU_GTT_USWC,
    283 			&bo_mc, &va_handle);
    284 
    285 	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
    286 	CU_ASSERT_EQUAL(r, 0);
    287 }
    288 
    289 static void amdgpu_command_submission_gfx_separate_ibs(void)
    290 {
    291 	amdgpu_context_handle context_handle;
    292 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
    293 	void *ib_result_cpu, *ib_result_ce_cpu;
    294 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
    295 	struct amdgpu_cs_request ibs_request = {0};
    296 	struct amdgpu_cs_ib_info ib_info[2];
    297 	struct amdgpu_cs_fence fence_status = {0};
    298 	uint32_t *ptr;
    299 	uint32_t expired;
    300 	amdgpu_bo_list_handle bo_list;
    301 	amdgpu_va_handle va_handle, va_handle_ce;
    302 	int r;
    303 
    304 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    305 	CU_ASSERT_EQUAL(r, 0);
    306 
    307 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    308 				    AMDGPU_GEM_DOMAIN_GTT, 0,
    309 				    &ib_result_handle, &ib_result_cpu,
    310 				    &ib_result_mc_address, &va_handle);
    311 	CU_ASSERT_EQUAL(r, 0);
    312 
    313 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    314 				    AMDGPU_GEM_DOMAIN_GTT, 0,
    315 				    &ib_result_ce_handle, &ib_result_ce_cpu,
    316 				    &ib_result_ce_mc_address, &va_handle_ce);
    317 	CU_ASSERT_EQUAL(r, 0);
    318 
    319 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
    320 			       ib_result_ce_handle, &bo_list);
    321 	CU_ASSERT_EQUAL(r, 0);
    322 
    323 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
    324 
    325 	/* IT_SET_CE_DE_COUNTERS */
    326 	ptr = ib_result_ce_cpu;
    327 	ptr[0] = 0xc0008900;
    328 	ptr[1] = 0;
    329 	ptr[2] = 0xc0008400;
    330 	ptr[3] = 1;
    331 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
    332 	ib_info[0].size = 4;
    333 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
    334 
    335 	/* IT_WAIT_ON_CE_COUNTER */
    336 	ptr = ib_result_cpu;
    337 	ptr[0] = 0xc0008600;
    338 	ptr[1] = 0x00000001;
    339 	ib_info[1].ib_mc_address = ib_result_mc_address;
    340 	ib_info[1].size = 2;
    341 
    342 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
    343 	ibs_request.number_of_ibs = 2;
    344 	ibs_request.ibs = ib_info;
    345 	ibs_request.resources = bo_list;
    346 	ibs_request.fence_info.handle = NULL;
    347 
    348 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
    349 
    350 	CU_ASSERT_EQUAL(r, 0);
    351 
    352 	fence_status.context = context_handle;
    353 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
    354 	fence_status.ip_instance = 0;
    355 	fence_status.fence = ibs_request.seq_no;
    356 
    357 	r = amdgpu_cs_query_fence_status(&fence_status,
    358 					 AMDGPU_TIMEOUT_INFINITE,
    359 					 0, &expired);
    360 	CU_ASSERT_EQUAL(r, 0);
    361 
    362 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
    363 				     ib_result_mc_address, 4096);
    364 	CU_ASSERT_EQUAL(r, 0);
    365 
    366 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
    367 				     ib_result_ce_mc_address, 4096);
    368 	CU_ASSERT_EQUAL(r, 0);
    369 
    370 	r = amdgpu_bo_list_destroy(bo_list);
    371 	CU_ASSERT_EQUAL(r, 0);
    372 
    373 	r = amdgpu_cs_ctx_free(context_handle);
    374 	CU_ASSERT_EQUAL(r, 0);
    375 
    376 }
    377 
    378 static void amdgpu_command_submission_gfx_shared_ib(void)
    379 {
    380 	amdgpu_context_handle context_handle;
    381 	amdgpu_bo_handle ib_result_handle;
    382 	void *ib_result_cpu;
    383 	uint64_t ib_result_mc_address;
    384 	struct amdgpu_cs_request ibs_request = {0};
    385 	struct amdgpu_cs_ib_info ib_info[2];
    386 	struct amdgpu_cs_fence fence_status = {0};
    387 	uint32_t *ptr;
    388 	uint32_t expired;
    389 	amdgpu_bo_list_handle bo_list;
    390 	amdgpu_va_handle va_handle;
    391 	int r;
    392 
    393 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    394 	CU_ASSERT_EQUAL(r, 0);
    395 
    396 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    397 				    AMDGPU_GEM_DOMAIN_GTT, 0,
    398 				    &ib_result_handle, &ib_result_cpu,
    399 				    &ib_result_mc_address, &va_handle);
    400 	CU_ASSERT_EQUAL(r, 0);
    401 
    402 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
    403 			       &bo_list);
    404 	CU_ASSERT_EQUAL(r, 0);
    405 
    406 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
    407 
    408 	/* IT_SET_CE_DE_COUNTERS */
    409 	ptr = ib_result_cpu;
    410 	ptr[0] = 0xc0008900;
    411 	ptr[1] = 0;
    412 	ptr[2] = 0xc0008400;
    413 	ptr[3] = 1;
    414 	ib_info[0].ib_mc_address = ib_result_mc_address;
    415 	ib_info[0].size = 4;
    416 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
    417 
    418 	ptr = (uint32_t *)ib_result_cpu + 4;
    419 	ptr[0] = 0xc0008600;
    420 	ptr[1] = 0x00000001;
    421 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
    422 	ib_info[1].size = 2;
    423 
    424 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
    425 	ibs_request.number_of_ibs = 2;
    426 	ibs_request.ibs = ib_info;
    427 	ibs_request.resources = bo_list;
    428 	ibs_request.fence_info.handle = NULL;
    429 
    430 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
    431 
    432 	CU_ASSERT_EQUAL(r, 0);
    433 
    434 	fence_status.context = context_handle;
    435 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
    436 	fence_status.ip_instance = 0;
    437 	fence_status.fence = ibs_request.seq_no;
    438 
    439 	r = amdgpu_cs_query_fence_status(&fence_status,
    440 					 AMDGPU_TIMEOUT_INFINITE,
    441 					 0, &expired);
    442 	CU_ASSERT_EQUAL(r, 0);
    443 
    444 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
    445 				     ib_result_mc_address, 4096);
    446 	CU_ASSERT_EQUAL(r, 0);
    447 
    448 	r = amdgpu_bo_list_destroy(bo_list);
    449 	CU_ASSERT_EQUAL(r, 0);
    450 
    451 	r = amdgpu_cs_ctx_free(context_handle);
    452 	CU_ASSERT_EQUAL(r, 0);
    453 }
    454 
    455 static void amdgpu_command_submission_gfx_cp_write_data(void)
    456 {
    457 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
    458 }
    459 
    460 static void amdgpu_command_submission_gfx_cp_const_fill(void)
    461 {
    462 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
    463 }
    464 
    465 static void amdgpu_command_submission_gfx_cp_copy_data(void)
    466 {
    467 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
    468 }
    469 
    470 static void amdgpu_command_submission_gfx(void)
    471 {
    472 	/* write data using the CP */
    473 	amdgpu_command_submission_gfx_cp_write_data();
    474 	/* const fill using the CP */
    475 	amdgpu_command_submission_gfx_cp_const_fill();
    476 	/* copy data using the CP */
    477 	amdgpu_command_submission_gfx_cp_copy_data();
    478 	/* separate IB buffers for multi-IB submission */
    479 	amdgpu_command_submission_gfx_separate_ibs();
    480 	/* shared IB buffer for multi-IB submission */
    481 	amdgpu_command_submission_gfx_shared_ib();
    482 }
    483 
    484 static void amdgpu_semaphore_test(void)
    485 {
    486 	amdgpu_context_handle context_handle[2];
    487 	amdgpu_semaphore_handle sem;
    488 	amdgpu_bo_handle ib_result_handle[2];
    489 	void *ib_result_cpu[2];
    490 	uint64_t ib_result_mc_address[2];
    491 	struct amdgpu_cs_request ibs_request[2] = {0};
    492 	struct amdgpu_cs_ib_info ib_info[2] = {0};
    493 	struct amdgpu_cs_fence fence_status = {0};
    494 	uint32_t *ptr;
    495 	uint32_t expired;
    496 	amdgpu_bo_list_handle bo_list[2];
    497 	amdgpu_va_handle va_handle[2];
    498 	int r, i;
    499 
    500 	r = amdgpu_cs_create_semaphore(&sem);
    501 	CU_ASSERT_EQUAL(r, 0);
    502 	for (i = 0; i < 2; i++) {
    503 		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
    504 		CU_ASSERT_EQUAL(r, 0);
    505 
    506 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    507 					    AMDGPU_GEM_DOMAIN_GTT, 0,
    508 					    &ib_result_handle[i], &ib_result_cpu[i],
    509 					    &ib_result_mc_address[i], &va_handle[i]);
    510 		CU_ASSERT_EQUAL(r, 0);
    511 
    512 		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
    513 				       NULL, &bo_list[i]);
    514 		CU_ASSERT_EQUAL(r, 0);
    515 	}
    516 
    517 	/* 1. same context different engine */
    518 	ptr = ib_result_cpu[0];
    519 	ptr[0] = SDMA_NOP;
    520 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
    521 	ib_info[0].size = 1;
    522 
    523 	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
    524 	ibs_request[0].number_of_ibs = 1;
    525 	ibs_request[0].ibs = &ib_info[0];
    526 	ibs_request[0].resources = bo_list[0];
    527 	ibs_request[0].fence_info.handle = NULL;
    528 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
    529 	CU_ASSERT_EQUAL(r, 0);
    530 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
    531 	CU_ASSERT_EQUAL(r, 0);
    532 
    533 	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
    534 	CU_ASSERT_EQUAL(r, 0);
    535 	ptr = ib_result_cpu[1];
    536 	ptr[0] = GFX_COMPUTE_NOP;
    537 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
    538 	ib_info[1].size = 1;
    539 
    540 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
    541 	ibs_request[1].number_of_ibs = 1;
    542 	ibs_request[1].ibs = &ib_info[1];
    543 	ibs_request[1].resources = bo_list[1];
    544 	ibs_request[1].fence_info.handle = NULL;
    545 
    546 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
    547 	CU_ASSERT_EQUAL(r, 0);
    548 
    549 	fence_status.context = context_handle[0];
    550 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
    551 	fence_status.ip_instance = 0;
    552 	fence_status.fence = ibs_request[1].seq_no;
    553 	r = amdgpu_cs_query_fence_status(&fence_status,
    554 					 500000000, 0, &expired);
    555 	CU_ASSERT_EQUAL(r, 0);
    556 	CU_ASSERT_EQUAL(expired, true);
    557 
    558 	/* 2. same engine different context */
    559 	ptr = ib_result_cpu[0];
    560 	ptr[0] = GFX_COMPUTE_NOP;
    561 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
    562 	ib_info[0].size = 1;
    563 
    564 	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
    565 	ibs_request[0].number_of_ibs = 1;
    566 	ibs_request[0].ibs = &ib_info[0];
    567 	ibs_request[0].resources = bo_list[0];
    568 	ibs_request[0].fence_info.handle = NULL;
    569 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
    570 	CU_ASSERT_EQUAL(r, 0);
    571 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
    572 	CU_ASSERT_EQUAL(r, 0);
    573 
    574 	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
    575 	CU_ASSERT_EQUAL(r, 0);
    576 	ptr = ib_result_cpu[1];
    577 	ptr[0] = GFX_COMPUTE_NOP;
    578 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
    579 	ib_info[1].size = 1;
    580 
    581 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
    582 	ibs_request[1].number_of_ibs = 1;
    583 	ibs_request[1].ibs = &ib_info[1];
    584 	ibs_request[1].resources = bo_list[1];
    585 	ibs_request[1].fence_info.handle = NULL;
    586 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
    587 
    588 	CU_ASSERT_EQUAL(r, 0);
    589 
    590 	fence_status.context = context_handle[1];
    591 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
    592 	fence_status.ip_instance = 0;
    593 	fence_status.fence = ibs_request[1].seq_no;
    594 	r = amdgpu_cs_query_fence_status(&fence_status,
    595 					 500000000, 0, &expired);
    596 	CU_ASSERT_EQUAL(r, 0);
    597 	CU_ASSERT_EQUAL(expired, true);
    598 	for (i = 0; i < 2; i++) {
    599 		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
    600 					     ib_result_mc_address[i], 4096);
    601 		CU_ASSERT_EQUAL(r, 0);
    602 
    603 		r = amdgpu_bo_list_destroy(bo_list[i]);
    604 		CU_ASSERT_EQUAL(r, 0);
    605 
    606 		r = amdgpu_cs_ctx_free(context_handle[i]);
    607 		CU_ASSERT_EQUAL(r, 0);
    608 	}
    609 
    610 	r = amdgpu_cs_destroy_semaphore(sem);
    611 	CU_ASSERT_EQUAL(r, 0);
    612 }
    613 
    614 static void amdgpu_command_submission_compute_nop(void)
    615 {
    616 	amdgpu_context_handle context_handle;
    617 	amdgpu_bo_handle ib_result_handle;
    618 	void *ib_result_cpu;
    619 	uint64_t ib_result_mc_address;
    620 	struct amdgpu_cs_request ibs_request;
    621 	struct amdgpu_cs_ib_info ib_info;
    622 	struct amdgpu_cs_fence fence_status;
    623 	uint32_t *ptr;
    624 	uint32_t expired;
    625 	int i, r, instance;
    626 	amdgpu_bo_list_handle bo_list;
    627 	amdgpu_va_handle va_handle;
    628 
    629 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    630 	CU_ASSERT_EQUAL(r, 0);
    631 
    632 	for (instance = 0; instance < 8; instance++) {
    633 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    634 					    AMDGPU_GEM_DOMAIN_GTT, 0,
    635 					    &ib_result_handle, &ib_result_cpu,
    636 					    &ib_result_mc_address, &va_handle);
    637 		CU_ASSERT_EQUAL(r, 0);
    638 
    639 		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
    640 				       &bo_list);
    641 		CU_ASSERT_EQUAL(r, 0);
    642 
    643 		ptr = ib_result_cpu;
    644 		for (i = 0; i < 16; ++i)
    645 			ptr[i] = 0xffff1000;
    646 
    647 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
    648 		ib_info.ib_mc_address = ib_result_mc_address;
    649 		ib_info.size = 16;
    650 
    651 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
    652 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
    653 		ibs_request.ring = instance;
    654 		ibs_request.number_of_ibs = 1;
    655 		ibs_request.ibs = &ib_info;
    656 		ibs_request.resources = bo_list;
    657 		ibs_request.fence_info.handle = NULL;
    658 
    659 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
    660 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
    661 		CU_ASSERT_EQUAL(r, 0);
    662 
    663 		fence_status.context = context_handle;
    664 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
    665 		fence_status.ip_instance = 0;
    666 		fence_status.ring = instance;
    667 		fence_status.fence = ibs_request.seq_no;
    668 
    669 		r = amdgpu_cs_query_fence_status(&fence_status,
    670 						 AMDGPU_TIMEOUT_INFINITE,
    671 						 0, &expired);
    672 		CU_ASSERT_EQUAL(r, 0);
    673 
    674 		r = amdgpu_bo_list_destroy(bo_list);
    675 		CU_ASSERT_EQUAL(r, 0);
    676 
    677 		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
    678 					     ib_result_mc_address, 4096);
    679 		CU_ASSERT_EQUAL(r, 0);
    680 	}
    681 
    682 	r = amdgpu_cs_ctx_free(context_handle);
    683 	CU_ASSERT_EQUAL(r, 0);
    684 }
    685 
    686 static void amdgpu_command_submission_compute_cp_write_data(void)
    687 {
    688 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
    689 }
    690 
    691 static void amdgpu_command_submission_compute_cp_const_fill(void)
    692 {
    693 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
    694 }
    695 
    696 static void amdgpu_command_submission_compute_cp_copy_data(void)
    697 {
    698 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
    699 }
    700 
    701 static void amdgpu_command_submission_compute(void)
    702 {
    703 	/* write data using the CP */
    704 	amdgpu_command_submission_compute_cp_write_data();
    705 	/* const fill using the CP */
    706 	amdgpu_command_submission_compute_cp_const_fill();
    707 	/* copy data using the CP */
    708 	amdgpu_command_submission_compute_cp_copy_data();
    709 	/* nop test */
    710 	amdgpu_command_submission_compute_nop();
    711 }
    712 
    713 /*
    714  * caller need create/release:
    715  * pm4_src, resources, ib_info, and ibs_request
    716  * submit command stream described in ibs_request and wait for this IB accomplished
    717  */
    718 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
    719 				       unsigned ip_type,
    720 				       int instance, int pm4_dw, uint32_t *pm4_src,
    721 				       int res_cnt, amdgpu_bo_handle *resources,
    722 				       struct amdgpu_cs_ib_info *ib_info,
    723 				       struct amdgpu_cs_request *ibs_request)
    724 {
    725 	int r;
    726 	uint32_t expired;
    727 	uint32_t *ring_ptr;
    728 	amdgpu_bo_handle ib_result_handle;
    729 	void *ib_result_cpu;
    730 	uint64_t ib_result_mc_address;
    731 	struct amdgpu_cs_fence fence_status = {0};
    732 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
    733 	amdgpu_va_handle va_handle;
    734 
    735 	/* prepare CS */
    736 	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
    737 	CU_ASSERT_NOT_EQUAL(resources, NULL);
    738 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
    739 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    740 	CU_ASSERT_TRUE(pm4_dw <= 1024);
    741 
    742 	/* allocate IB */
    743 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    744 				    AMDGPU_GEM_DOMAIN_GTT, 0,
    745 				    &ib_result_handle, &ib_result_cpu,
    746 				    &ib_result_mc_address, &va_handle);
    747 	CU_ASSERT_EQUAL(r, 0);
    748 
    749 	/* copy PM4 packet to ring from caller */
    750 	ring_ptr = ib_result_cpu;
    751 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
    752 
    753 	ib_info->ib_mc_address = ib_result_mc_address;
    754 	ib_info->size = pm4_dw;
    755 
    756 	ibs_request->ip_type = ip_type;
    757 	ibs_request->ring = instance;
    758 	ibs_request->number_of_ibs = 1;
    759 	ibs_request->ibs = ib_info;
    760 	ibs_request->fence_info.handle = NULL;
    761 
    762 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
    763 	all_res[res_cnt] = ib_result_handle;
    764 
    765 	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
    766 				  NULL, &ibs_request->resources);
    767 	CU_ASSERT_EQUAL(r, 0);
    768 
    769 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    770 
    771 	/* submit CS */
    772 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
    773 	CU_ASSERT_EQUAL(r, 0);
    774 
    775 	r = amdgpu_bo_list_destroy(ibs_request->resources);
    776 	CU_ASSERT_EQUAL(r, 0);
    777 
    778 	fence_status.ip_type = ip_type;
    779 	fence_status.ip_instance = 0;
    780 	fence_status.ring = ibs_request->ring;
    781 	fence_status.context = context_handle;
    782 	fence_status.fence = ibs_request->seq_no;
    783 
    784 	/* wait for IB accomplished */
    785 	r = amdgpu_cs_query_fence_status(&fence_status,
    786 					 AMDGPU_TIMEOUT_INFINITE,
    787 					 0, &expired);
    788 	CU_ASSERT_EQUAL(r, 0);
    789 	CU_ASSERT_EQUAL(expired, true);
    790 
    791 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
    792 				     ib_result_mc_address, 4096);
    793 	CU_ASSERT_EQUAL(r, 0);
    794 }
    795 
    796 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
    797 {
    798 	const int sdma_write_length = 128;
    799 	const int pm4_dw = 256;
    800 	amdgpu_context_handle context_handle;
    801 	amdgpu_bo_handle bo;
    802 	amdgpu_bo_handle *resources;
    803 	uint32_t *pm4;
    804 	struct amdgpu_cs_ib_info *ib_info;
    805 	struct amdgpu_cs_request *ibs_request;
    806 	uint64_t bo_mc;
    807 	volatile uint32_t *bo_cpu;
    808 	int i, j, r, loop;
    809 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
    810 	amdgpu_va_handle va_handle;
    811 
    812 	pm4 = calloc(pm4_dw, sizeof(*pm4));
    813 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
    814 
    815 	ib_info = calloc(1, sizeof(*ib_info));
    816 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
    817 
    818 	ibs_request = calloc(1, sizeof(*ibs_request));
    819 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    820 
    821 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    822 	CU_ASSERT_EQUAL(r, 0);
    823 
    824 	/* prepare resource */
    825 	resources = calloc(1, sizeof(amdgpu_bo_handle));
    826 	CU_ASSERT_NOT_EQUAL(resources, NULL);
    827 
    828 	loop = 0;
    829 	while(loop < 2) {
    830 		/* allocate UC bo for sDMA use */
    831 		r = amdgpu_bo_alloc_and_map(device_handle,
    832 					    sdma_write_length * sizeof(uint32_t),
    833 					    4096, AMDGPU_GEM_DOMAIN_GTT,
    834 					    gtt_flags[loop], &bo, (void**)&bo_cpu,
    835 					    &bo_mc, &va_handle);
    836 		CU_ASSERT_EQUAL(r, 0);
    837 
    838 		/* clear bo */
    839 		memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
    840 
    841 
    842 		resources[0] = bo;
    843 
    844 		/* fulfill PM4: test DMA write-linear */
    845 		i = j = 0;
    846 		if (ip_type == AMDGPU_HW_IP_DMA) {
    847 			pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
    848 					       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
    849 			pm4[i++] = 0xffffffff & bo_mc;
    850 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
    851 			pm4[i++] = sdma_write_length;
    852 			while(j++ < sdma_write_length)
    853 				pm4[i++] = 0xdeadbeaf;
    854 		} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
    855 			   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
    856 			pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
    857 			pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
    858 			pm4[i++] = 0xfffffffc & bo_mc;
    859 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
    860 			while(j++ < sdma_write_length)
    861 				pm4[i++] = 0xdeadbeaf;
    862 		}
    863 
    864 		amdgpu_test_exec_cs_helper(context_handle,
    865 					   ip_type, 0,
    866 					   i, pm4,
    867 					   1, resources,
    868 					   ib_info, ibs_request);
    869 
    870 		/* verify if SDMA test result meets with expected */
    871 		i = 0;
    872 		while(i < sdma_write_length) {
    873 			CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
    874 		}
    875 
    876 		r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
    877 					     sdma_write_length * sizeof(uint32_t));
    878 		CU_ASSERT_EQUAL(r, 0);
    879 		loop++;
    880 	}
    881 	/* clean resources */
    882 	free(resources);
    883 	free(ibs_request);
    884 	free(ib_info);
    885 	free(pm4);
    886 
    887 	/* end of test */
    888 	r = amdgpu_cs_ctx_free(context_handle);
    889 	CU_ASSERT_EQUAL(r, 0);
    890 }
    891 
    892 static void amdgpu_command_submission_sdma_write_linear(void)
    893 {
    894 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
    895 }
    896 
    897 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
    898 {
    899 	const int sdma_write_length = 1024 * 1024;
    900 	const int pm4_dw = 256;
    901 	amdgpu_context_handle context_handle;
    902 	amdgpu_bo_handle bo;
    903 	amdgpu_bo_handle *resources;
    904 	uint32_t *pm4;
    905 	struct amdgpu_cs_ib_info *ib_info;
    906 	struct amdgpu_cs_request *ibs_request;
    907 	uint64_t bo_mc;
    908 	volatile uint32_t *bo_cpu;
    909 	int i, j, r, loop;
    910 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
    911 	amdgpu_va_handle va_handle;
    912 
    913 	pm4 = calloc(pm4_dw, sizeof(*pm4));
    914 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
    915 
    916 	ib_info = calloc(1, sizeof(*ib_info));
    917 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
    918 
    919 	ibs_request = calloc(1, sizeof(*ibs_request));
    920 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    921 
    922 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    923 	CU_ASSERT_EQUAL(r, 0);
    924 
    925 	/* prepare resource */
    926 	resources = calloc(1, sizeof(amdgpu_bo_handle));
    927 	CU_ASSERT_NOT_EQUAL(resources, NULL);
    928 
    929 	loop = 0;
    930 	while(loop < 2) {
    931 		/* allocate UC bo for sDMA use */
    932 		r = amdgpu_bo_alloc_and_map(device_handle,
    933 					    sdma_write_length, 4096,
    934 					    AMDGPU_GEM_DOMAIN_GTT,
    935 					    gtt_flags[loop], &bo, (void**)&bo_cpu,
    936 					    &bo_mc, &va_handle);
    937 		CU_ASSERT_EQUAL(r, 0);
    938 
    939 		/* clear bo */
    940 		memset((void*)bo_cpu, 0, sdma_write_length);
    941 
    942 		resources[0] = bo;
    943 
    944 		/* fulfill PM4: test DMA const fill */
    945 		i = j = 0;
    946 		if (ip_type == AMDGPU_HW_IP_DMA) {
    947 			pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
    948 					       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
    949 			pm4[i++] = 0xffffffff & bo_mc;
    950 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
    951 			pm4[i++] = 0xdeadbeaf;
    952 			pm4[i++] = sdma_write_length;
    953 		} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
    954 			   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
    955 			pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
    956 			pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
    957 				PACKET3_DMA_DATA_DST_SEL(0) |
    958 				PACKET3_DMA_DATA_SRC_SEL(2) |
    959 				PACKET3_DMA_DATA_CP_SYNC;
    960 			pm4[i++] = 0xdeadbeaf;
    961 			pm4[i++] = 0;
    962 			pm4[i++] = 0xfffffffc & bo_mc;
    963 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
    964 			pm4[i++] = sdma_write_length;
    965 		}
    966 
    967 		amdgpu_test_exec_cs_helper(context_handle,
    968 					   ip_type, 0,
    969 					   i, pm4,
    970 					   1, resources,
    971 					   ib_info, ibs_request);
    972 
    973 		/* verify if SDMA test result meets with expected */
    974 		i = 0;
    975 		while(i < (sdma_write_length / 4)) {
    976 			CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
    977 		}
    978 
    979 		r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
    980 					     sdma_write_length);
    981 		CU_ASSERT_EQUAL(r, 0);
    982 		loop++;
    983 	}
    984 	/* clean resources */
    985 	free(resources);
    986 	free(ibs_request);
    987 	free(ib_info);
    988 	free(pm4);
    989 
    990 	/* end of test */
    991 	r = amdgpu_cs_ctx_free(context_handle);
    992 	CU_ASSERT_EQUAL(r, 0);
    993 }
    994 
    995 static void amdgpu_command_submission_sdma_const_fill(void)
    996 {
    997 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
    998 }
    999 
   1000 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
   1001 {
   1002 	const int sdma_write_length = 1024;
   1003 	const int pm4_dw = 256;
   1004 	amdgpu_context_handle context_handle;
   1005 	amdgpu_bo_handle bo1, bo2;
   1006 	amdgpu_bo_handle *resources;
   1007 	uint32_t *pm4;
   1008 	struct amdgpu_cs_ib_info *ib_info;
   1009 	struct amdgpu_cs_request *ibs_request;
   1010 	uint64_t bo1_mc, bo2_mc;
   1011 	volatile unsigned char *bo1_cpu, *bo2_cpu;
   1012 	int i, j, r, loop1, loop2;
   1013 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
   1014 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
   1015 
   1016 	pm4 = calloc(pm4_dw, sizeof(*pm4));
   1017 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
   1018 
   1019 	ib_info = calloc(1, sizeof(*ib_info));
   1020 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
   1021 
   1022 	ibs_request = calloc(1, sizeof(*ibs_request));
   1023 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
   1024 
   1025 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
   1026 	CU_ASSERT_EQUAL(r, 0);
   1027 
   1028 	/* prepare resource */
   1029 	resources = calloc(2, sizeof(amdgpu_bo_handle));
   1030 	CU_ASSERT_NOT_EQUAL(resources, NULL);
   1031 
   1032 	loop1 = loop2 = 0;
   1033 	/* run 9 circle to test all mapping combination */
   1034 	while(loop1 < 2) {
   1035 		while(loop2 < 2) {
   1036 			/* allocate UC bo1for sDMA use */
   1037 			r = amdgpu_bo_alloc_and_map(device_handle,
   1038 						    sdma_write_length, 4096,
   1039 						    AMDGPU_GEM_DOMAIN_GTT,
   1040 						    gtt_flags[loop1], &bo1,
   1041 						    (void**)&bo1_cpu, &bo1_mc,
   1042 						    &bo1_va_handle);
   1043 			CU_ASSERT_EQUAL(r, 0);
   1044 
   1045 			/* set bo1 */
   1046 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
   1047 
   1048 			/* allocate UC bo2 for sDMA use */
   1049 			r = amdgpu_bo_alloc_and_map(device_handle,
   1050 						    sdma_write_length, 4096,
   1051 						    AMDGPU_GEM_DOMAIN_GTT,
   1052 						    gtt_flags[loop2], &bo2,
   1053 						    (void**)&bo2_cpu, &bo2_mc,
   1054 						    &bo2_va_handle);
   1055 			CU_ASSERT_EQUAL(r, 0);
   1056 
   1057 			/* clear bo2 */
   1058 			memset((void*)bo2_cpu, 0, sdma_write_length);
   1059 
   1060 			resources[0] = bo1;
   1061 			resources[1] = bo2;
   1062 
   1063 			/* fulfill PM4: test DMA copy linear */
   1064 			i = j = 0;
   1065 			if (ip_type == AMDGPU_HW_IP_DMA) {
   1066 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
   1067 				pm4[i++] = sdma_write_length;
   1068 				pm4[i++] = 0;
   1069 				pm4[i++] = 0xffffffff & bo1_mc;
   1070 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
   1071 				pm4[i++] = 0xffffffff & bo2_mc;
   1072 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
   1073 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
   1074 				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
   1075 				pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
   1076 				pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
   1077 					PACKET3_DMA_DATA_DST_SEL(0) |
   1078 					PACKET3_DMA_DATA_SRC_SEL(0) |
   1079 					PACKET3_DMA_DATA_CP_SYNC;
   1080 				pm4[i++] = 0xfffffffc & bo1_mc;
   1081 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
   1082 				pm4[i++] = 0xfffffffc & bo2_mc;
   1083 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
   1084 				pm4[i++] = sdma_write_length;
   1085 			}
   1086 
   1087 			amdgpu_test_exec_cs_helper(context_handle,
   1088 						   ip_type, 0,
   1089 						   i, pm4,
   1090 						   2, resources,
   1091 						   ib_info, ibs_request);
   1092 
   1093 			/* verify if SDMA test result meets with expected */
   1094 			i = 0;
   1095 			while(i < sdma_write_length) {
   1096 				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
   1097 			}
   1098 			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
   1099 						     sdma_write_length);
   1100 			CU_ASSERT_EQUAL(r, 0);
   1101 			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
   1102 						     sdma_write_length);
   1103 			CU_ASSERT_EQUAL(r, 0);
   1104 			loop2++;
   1105 		}
   1106 		loop1++;
   1107 	}
   1108 	/* clean resources */
   1109 	free(resources);
   1110 	free(ibs_request);
   1111 	free(ib_info);
   1112 	free(pm4);
   1113 
   1114 	/* end of test */
   1115 	r = amdgpu_cs_ctx_free(context_handle);
   1116 	CU_ASSERT_EQUAL(r, 0);
   1117 }
   1118 
   1119 static void amdgpu_command_submission_sdma_copy_linear(void)
   1120 {
   1121 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
   1122 }
   1123 
   1124 static void amdgpu_command_submission_sdma(void)
   1125 {
   1126 	amdgpu_command_submission_sdma_write_linear();
   1127 	amdgpu_command_submission_sdma_const_fill();
   1128 	amdgpu_command_submission_sdma_copy_linear();
   1129 }
   1130 
   1131 static void amdgpu_userptr_test(void)
   1132 {
   1133 	int i, r, j;
   1134 	uint32_t *pm4 = NULL;
   1135 	uint64_t bo_mc;
   1136 	void *ptr = NULL;
   1137 	int pm4_dw = 256;
   1138 	int sdma_write_length = 4;
   1139 	amdgpu_bo_handle handle;
   1140 	amdgpu_context_handle context_handle;
   1141 	struct amdgpu_cs_ib_info *ib_info;
   1142 	struct amdgpu_cs_request *ibs_request;
   1143 	amdgpu_bo_handle buf_handle;
   1144 	amdgpu_va_handle va_handle;
   1145 
   1146 	pm4 = calloc(pm4_dw, sizeof(*pm4));
   1147 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
   1148 
   1149 	ib_info = calloc(1, sizeof(*ib_info));
   1150 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
   1151 
   1152 	ibs_request = calloc(1, sizeof(*ibs_request));
   1153 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
   1154 
   1155 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
   1156 	CU_ASSERT_EQUAL(r, 0);
   1157 
   1158 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
   1159 	CU_ASSERT_NOT_EQUAL(ptr, NULL);
   1160 	memset(ptr, 0, BUFFER_SIZE);
   1161 
   1162 	r = amdgpu_create_bo_from_user_mem(device_handle,
   1163 					   ptr, BUFFER_SIZE, &buf_handle);
   1164 	CU_ASSERT_EQUAL(r, 0);
   1165 
   1166 	r = amdgpu_va_range_alloc(device_handle,
   1167 				  amdgpu_gpu_va_range_general,
   1168 				  BUFFER_SIZE, 1, 0, &bo_mc,
   1169 				  &va_handle, 0);
   1170 	CU_ASSERT_EQUAL(r, 0);
   1171 
   1172 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
   1173 	CU_ASSERT_EQUAL(r, 0);
   1174 
   1175 	handle = buf_handle;
   1176 
   1177 	j = i = 0;
   1178 	pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
   1179 			       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
   1180 	pm4[i++] = 0xffffffff & bo_mc;
   1181 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
   1182 	pm4[i++] = sdma_write_length;
   1183 
   1184 	while (j++ < sdma_write_length)
   1185 		pm4[i++] = 0xdeadbeaf;
   1186 
   1187 	amdgpu_test_exec_cs_helper(context_handle,
   1188 				   AMDGPU_HW_IP_DMA, 0,
   1189 				   i, pm4,
   1190 				   1, &handle,
   1191 				   ib_info, ibs_request);
   1192 	i = 0;
   1193 	while (i < sdma_write_length) {
   1194 		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
   1195 	}
   1196 	free(ibs_request);
   1197 	free(ib_info);
   1198 	free(pm4);
   1199 
   1200 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
   1201 	CU_ASSERT_EQUAL(r, 0);
   1202 	r = amdgpu_va_range_free(va_handle);
   1203 	CU_ASSERT_EQUAL(r, 0);
   1204 	r = amdgpu_bo_free(buf_handle);
   1205 	CU_ASSERT_EQUAL(r, 0);
   1206 	free(ptr);
   1207 
   1208 	r = amdgpu_cs_ctx_free(context_handle);
   1209 	CU_ASSERT_EQUAL(r, 0);
   1210 }
   1211