Home | History | Annotate | Download | only in amdgpu
      1 /*
      2  * Copyright 2014 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22 */
     23 
     24 #include <stdio.h>
     25 #include <stdlib.h>
     26 #include <unistd.h>
     27 #ifdef HAVE_ALLOCA_H
     28 # include <alloca.h>
     29 #endif
     30 #include <sys/wait.h>
     31 
     32 #include "CUnit/Basic.h"
     33 
     34 #include "amdgpu_test.h"
     35 #include "amdgpu_drm.h"
     36 
     37 static  amdgpu_device_handle device_handle;
     38 static  uint32_t  major_version;
     39 static  uint32_t  minor_version;
     40 static  uint32_t  family_id;
     41 
     42 static void amdgpu_query_info_test(void);
     43 static void amdgpu_command_submission_gfx(void);
     44 static void amdgpu_command_submission_compute(void);
     45 static void amdgpu_command_submission_multi_fence(void);
     46 static void amdgpu_command_submission_sdma(void);
     47 static void amdgpu_userptr_test(void);
     48 static void amdgpu_semaphore_test(void);
     49 static void amdgpu_sync_dependency_test(void);
     50 static void amdgpu_bo_eviction_test(void);
     51 
     52 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
     53 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
     54 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
     55 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
     56 				       unsigned ip_type,
     57 				       int instance, int pm4_dw, uint32_t *pm4_src,
     58 				       int res_cnt, amdgpu_bo_handle *resources,
     59 				       struct amdgpu_cs_ib_info *ib_info,
     60 				       struct amdgpu_cs_request *ibs_request);
     61 
     62 CU_TestInfo basic_tests[] = {
     63 	{ "Query Info Test",  amdgpu_query_info_test },
     64 	{ "Userptr Test",  amdgpu_userptr_test },
     65 	{ "bo eviction Test",  amdgpu_bo_eviction_test },
     66 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
     67 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
     68 	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
     69 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
     70 	{ "SW semaphore Test",  amdgpu_semaphore_test },
     71 	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
     72 	CU_TEST_INFO_NULL,
     73 };
     74 #define BUFFER_SIZE (8 * 1024)
     75 #define SDMA_PKT_HEADER_op_offset 0
     76 #define SDMA_PKT_HEADER_op_mask   0x000000FF
     77 #define SDMA_PKT_HEADER_op_shift  0
     78 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
     79 #define SDMA_OPCODE_CONSTANT_FILL  11
     80 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
     81 	/* 0 = byte fill
     82 	 * 2 = DW fill
     83 	 */
     84 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
     85 					(((sub_op) & 0xFF) << 8) |	\
     86 					(((op) & 0xFF) << 0))
     87 #define	SDMA_OPCODE_WRITE				  2
     88 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
     89 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
     90 
     91 #define	SDMA_OPCODE_COPY				  1
     92 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
     93 
     94 #define GFX_COMPUTE_NOP  0xffff1000
     95 #define SDMA_NOP  0x0
     96 
     97 /* PM4 */
     98 #define	PACKET_TYPE0	0
     99 #define	PACKET_TYPE1	1
    100 #define	PACKET_TYPE2	2
    101 #define	PACKET_TYPE3	3
    102 
    103 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
    104 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
    105 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
    106 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
    107 #define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
    108 			 ((reg) & 0xFFFF) |			\
    109 			 ((n) & 0x3FFF) << 16)
    110 #define CP_PACKET2			0x80000000
    111 #define		PACKET2_PAD_SHIFT		0
    112 #define		PACKET2_PAD_MASK		(0x3fffffff << 0)
    113 
    114 #define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
    115 
    116 #define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
    117 			 (((op) & 0xFF) << 8) |				\
    118 			 ((n) & 0x3FFF) << 16)
    119 
    120 /* Packet 3 types */
    121 #define	PACKET3_NOP					0x10
    122 
    123 #define	PACKET3_WRITE_DATA				0x37
    124 #define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
    125 		/* 0 - register
    126 		 * 1 - memory (sync - via GRBM)
    127 		 * 2 - gl2
    128 		 * 3 - gds
    129 		 * 4 - reserved
    130 		 * 5 - memory (async - direct)
    131 		 */
    132 #define		WR_ONE_ADDR                             (1 << 16)
    133 #define		WR_CONFIRM                              (1 << 20)
    134 #define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
    135 		/* 0 - LRU
    136 		 * 1 - Stream
    137 		 */
    138 #define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
    139 		/* 0 - me
    140 		 * 1 - pfp
    141 		 * 2 - ce
    142 		 */
    143 
    144 #define	PACKET3_DMA_DATA				0x50
    145 /* 1. header
    146  * 2. CONTROL
    147  * 3. SRC_ADDR_LO or DATA [31:0]
    148  * 4. SRC_ADDR_HI [31:0]
    149  * 5. DST_ADDR_LO [31:0]
    150  * 6. DST_ADDR_HI [7:0]
    151  * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
    152  */
    153 /* CONTROL */
    154 #              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
    155 		/* 0 - ME
    156 		 * 1 - PFP
    157 		 */
    158 #              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
    159 		/* 0 - LRU
    160 		 * 1 - Stream
    161 		 * 2 - Bypass
    162 		 */
    163 #              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
    164 #              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
    165 		/* 0 - DST_ADDR using DAS
    166 		 * 1 - GDS
    167 		 * 3 - DST_ADDR using L2
    168 		 */
    169 #              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
    170 		/* 0 - LRU
    171 		 * 1 - Stream
    172 		 * 2 - Bypass
    173 		 */
    174 #              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
    175 #              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
    176 		/* 0 - SRC_ADDR using SAS
    177 		 * 1 - GDS
    178 		 * 2 - DATA
    179 		 * 3 - SRC_ADDR using L2
    180 		 */
    181 #              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
    182 /* COMMAND */
    183 #              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
    184 #              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
    185 		/* 0 - none
    186 		 * 1 - 8 in 16
    187 		 * 2 - 8 in 32
    188 		 * 3 - 8 in 64
    189 		 */
    190 #              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
    191 		/* 0 - none
    192 		 * 1 - 8 in 16
    193 		 * 2 - 8 in 32
    194 		 * 3 - 8 in 64
    195 		 */
    196 #              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
    197 		/* 0 - memory
    198 		 * 1 - register
    199 		 */
    200 #              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
    201 		/* 0 - memory
    202 		 * 1 - register
    203 		 */
    204 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
    205 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
    206 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
    207 
    208 #define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
    209 						(((b) & 0x1) << 26) |		\
    210 						(((t) & 0x1) << 23) |		\
    211 						(((s) & 0x1) << 22) |		\
    212 						(((cnt) & 0xFFFFF) << 0))
    213 #define	SDMA_OPCODE_COPY_SI	3
    214 #define SDMA_OPCODE_CONSTANT_FILL_SI	13
    215 #define SDMA_NOP_SI  0xf
    216 #define GFX_COMPUTE_NOP_SI 0x80000000
    217 #define	PACKET3_DMA_DATA_SI	0x41
    218 #              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
    219 		/* 0 - ME
    220 		 * 1 - PFP
    221 		 */
    222 #              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
    223 		/* 0 - DST_ADDR using DAS
    224 		 * 1 - GDS
    225 		 * 3 - DST_ADDR using L2
    226 		 */
    227 #              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
    228 		/* 0 - SRC_ADDR using SAS
    229 		 * 1 - GDS
    230 		 * 2 - DATA
    231 		 * 3 - SRC_ADDR using L2
    232 		 */
    233 #              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
    234 
    235 
    236 #define PKT3_CONTEXT_CONTROL                   0x28
    237 #define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
    238 #define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
    239 #define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
    240 
    241 #define PKT3_CLEAR_STATE                       0x12
    242 
    243 #define PKT3_SET_SH_REG                        0x76
    244 #define		PACKET3_SET_SH_REG_START			0x00002c00
    245 
    246 #define	PACKET3_DISPATCH_DIRECT				0x15
    247 
    248 
    249 /* gfx 8 */
    250 #define mmCOMPUTE_PGM_LO                                                        0x2e0c
    251 #define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
    252 #define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
    253 #define mmCOMPUTE_USER_DATA_0                                                   0x2e40
    254 #define mmCOMPUTE_USER_DATA_1                                                   0x2e41
    255 #define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
    256 #define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
    257 
    258 
    259 
    260 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
    261 		      ((num & 0x0000ff00) << 8) | \
    262 		      ((num & 0x00ff0000) >> 8) | \
    263 		      ((num & 0x000000ff) << 24))
    264 
    265 
    266 /* Shader code
    267  * void main()
    268 {
    269 
    270 	float x = some_input;
    271 		for (unsigned i = 0; i < 1000000; i++)
    272   	x = sin(x);
    273 
    274 	u[0] = 42u;
    275 }
    276 */
    277 
    278 static  uint32_t shader_bin[] = {
    279 	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
    280 	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
    281 	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
    282 	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
    283 };
    284 
    285 #define CODE_OFFSET 512
    286 #define DATA_OFFSET 1024
    287 
    288 
    289 int suite_basic_tests_init(void)
    290 {
    291 	struct amdgpu_gpu_info gpu_info = {0};
    292 	int r;
    293 
    294 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
    295 				   &minor_version, &device_handle);
    296 
    297 	if (r) {
    298 		if ((r == -EACCES) && (errno == EACCES))
    299 			printf("\n\nError:%s. "
    300 				"Hint:Try to run this test program as root.",
    301 				strerror(errno));
    302 		return CUE_SINIT_FAILED;
    303 	}
    304 
    305 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
    306 	if (r)
    307 		return CUE_SINIT_FAILED;
    308 
    309 	family_id = gpu_info.family_id;
    310 
    311 	return CUE_SUCCESS;
    312 }
    313 
    314 int suite_basic_tests_clean(void)
    315 {
    316 	int r = amdgpu_device_deinitialize(device_handle);
    317 
    318 	if (r == 0)
    319 		return CUE_SUCCESS;
    320 	else
    321 		return CUE_SCLEAN_FAILED;
    322 }
    323 
    324 static void amdgpu_query_info_test(void)
    325 {
    326 	struct amdgpu_gpu_info gpu_info = {0};
    327 	uint32_t version, feature;
    328 	int r;
    329 
    330 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
    331 	CU_ASSERT_EQUAL(r, 0);
    332 
    333 	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
    334 					  0, &version, &feature);
    335 	CU_ASSERT_EQUAL(r, 0);
    336 }
    337 
    338 static void amdgpu_command_submission_gfx_separate_ibs(void)
    339 {
    340 	amdgpu_context_handle context_handle;
    341 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
    342 	void *ib_result_cpu, *ib_result_ce_cpu;
    343 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
    344 	struct amdgpu_cs_request ibs_request = {0};
    345 	struct amdgpu_cs_ib_info ib_info[2];
    346 	struct amdgpu_cs_fence fence_status = {0};
    347 	uint32_t *ptr;
    348 	uint32_t expired;
    349 	amdgpu_bo_list_handle bo_list;
    350 	amdgpu_va_handle va_handle, va_handle_ce;
    351 	int r, i = 0;
    352 
    353 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    354 	CU_ASSERT_EQUAL(r, 0);
    355 
    356 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    357 				    AMDGPU_GEM_DOMAIN_GTT, 0,
    358 				    &ib_result_handle, &ib_result_cpu,
    359 				    &ib_result_mc_address, &va_handle);
    360 	CU_ASSERT_EQUAL(r, 0);
    361 
    362 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    363 				    AMDGPU_GEM_DOMAIN_GTT, 0,
    364 				    &ib_result_ce_handle, &ib_result_ce_cpu,
    365 				    &ib_result_ce_mc_address, &va_handle_ce);
    366 	CU_ASSERT_EQUAL(r, 0);
    367 
    368 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
    369 			       ib_result_ce_handle, &bo_list);
    370 	CU_ASSERT_EQUAL(r, 0);
    371 
    372 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
    373 
    374 	/* IT_SET_CE_DE_COUNTERS */
    375 	ptr = ib_result_ce_cpu;
    376 	if (family_id != AMDGPU_FAMILY_SI) {
    377 		ptr[i++] = 0xc0008900;
    378 		ptr[i++] = 0;
    379 	}
    380 	ptr[i++] = 0xc0008400;
    381 	ptr[i++] = 1;
    382 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
    383 	ib_info[0].size = i;
    384 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
    385 
    386 	/* IT_WAIT_ON_CE_COUNTER */
    387 	ptr = ib_result_cpu;
    388 	ptr[0] = 0xc0008600;
    389 	ptr[1] = 0x00000001;
    390 	ib_info[1].ib_mc_address = ib_result_mc_address;
    391 	ib_info[1].size = 2;
    392 
    393 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
    394 	ibs_request.number_of_ibs = 2;
    395 	ibs_request.ibs = ib_info;
    396 	ibs_request.resources = bo_list;
    397 	ibs_request.fence_info.handle = NULL;
    398 
    399 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
    400 
    401 	CU_ASSERT_EQUAL(r, 0);
    402 
    403 	fence_status.context = context_handle;
    404 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
    405 	fence_status.ip_instance = 0;
    406 	fence_status.fence = ibs_request.seq_no;
    407 
    408 	r = amdgpu_cs_query_fence_status(&fence_status,
    409 					 AMDGPU_TIMEOUT_INFINITE,
    410 					 0, &expired);
    411 	CU_ASSERT_EQUAL(r, 0);
    412 
    413 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
    414 				     ib_result_mc_address, 4096);
    415 	CU_ASSERT_EQUAL(r, 0);
    416 
    417 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
    418 				     ib_result_ce_mc_address, 4096);
    419 	CU_ASSERT_EQUAL(r, 0);
    420 
    421 	r = amdgpu_bo_list_destroy(bo_list);
    422 	CU_ASSERT_EQUAL(r, 0);
    423 
    424 	r = amdgpu_cs_ctx_free(context_handle);
    425 	CU_ASSERT_EQUAL(r, 0);
    426 
    427 }
    428 
    429 static void amdgpu_command_submission_gfx_shared_ib(void)
    430 {
    431 	amdgpu_context_handle context_handle;
    432 	amdgpu_bo_handle ib_result_handle;
    433 	void *ib_result_cpu;
    434 	uint64_t ib_result_mc_address;
    435 	struct amdgpu_cs_request ibs_request = {0};
    436 	struct amdgpu_cs_ib_info ib_info[2];
    437 	struct amdgpu_cs_fence fence_status = {0};
    438 	uint32_t *ptr;
    439 	uint32_t expired;
    440 	amdgpu_bo_list_handle bo_list;
    441 	amdgpu_va_handle va_handle;
    442 	int r, i = 0;
    443 
    444 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    445 	CU_ASSERT_EQUAL(r, 0);
    446 
    447 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    448 				    AMDGPU_GEM_DOMAIN_GTT, 0,
    449 				    &ib_result_handle, &ib_result_cpu,
    450 				    &ib_result_mc_address, &va_handle);
    451 	CU_ASSERT_EQUAL(r, 0);
    452 
    453 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
    454 			       &bo_list);
    455 	CU_ASSERT_EQUAL(r, 0);
    456 
    457 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
    458 
    459 	/* IT_SET_CE_DE_COUNTERS */
    460 	ptr = ib_result_cpu;
    461 	if (family_id != AMDGPU_FAMILY_SI) {
    462 		ptr[i++] = 0xc0008900;
    463 		ptr[i++] = 0;
    464 	}
    465 	ptr[i++] = 0xc0008400;
    466 	ptr[i++] = 1;
    467 	ib_info[0].ib_mc_address = ib_result_mc_address;
    468 	ib_info[0].size = i;
    469 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
    470 
    471 	ptr = (uint32_t *)ib_result_cpu + 4;
    472 	ptr[0] = 0xc0008600;
    473 	ptr[1] = 0x00000001;
    474 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
    475 	ib_info[1].size = 2;
    476 
    477 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
    478 	ibs_request.number_of_ibs = 2;
    479 	ibs_request.ibs = ib_info;
    480 	ibs_request.resources = bo_list;
    481 	ibs_request.fence_info.handle = NULL;
    482 
    483 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
    484 
    485 	CU_ASSERT_EQUAL(r, 0);
    486 
    487 	fence_status.context = context_handle;
    488 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
    489 	fence_status.ip_instance = 0;
    490 	fence_status.fence = ibs_request.seq_no;
    491 
    492 	r = amdgpu_cs_query_fence_status(&fence_status,
    493 					 AMDGPU_TIMEOUT_INFINITE,
    494 					 0, &expired);
    495 	CU_ASSERT_EQUAL(r, 0);
    496 
    497 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
    498 				     ib_result_mc_address, 4096);
    499 	CU_ASSERT_EQUAL(r, 0);
    500 
    501 	r = amdgpu_bo_list_destroy(bo_list);
    502 	CU_ASSERT_EQUAL(r, 0);
    503 
    504 	r = amdgpu_cs_ctx_free(context_handle);
    505 	CU_ASSERT_EQUAL(r, 0);
    506 }
    507 
    508 static void amdgpu_command_submission_gfx_cp_write_data(void)
    509 {
    510 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
    511 }
    512 
    513 static void amdgpu_command_submission_gfx_cp_const_fill(void)
    514 {
    515 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
    516 }
    517 
    518 static void amdgpu_command_submission_gfx_cp_copy_data(void)
    519 {
    520 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
    521 }
    522 
    523 static void amdgpu_bo_eviction_test(void)
    524 {
    525 	const int sdma_write_length = 1024;
    526 	const int pm4_dw = 256;
    527 	amdgpu_context_handle context_handle;
    528 	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
    529 	amdgpu_bo_handle *resources;
    530 	uint32_t *pm4;
    531 	struct amdgpu_cs_ib_info *ib_info;
    532 	struct amdgpu_cs_request *ibs_request;
    533 	uint64_t bo1_mc, bo2_mc;
    534 	volatile unsigned char *bo1_cpu, *bo2_cpu;
    535 	int i, j, r, loop1, loop2;
    536 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
    537 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
    538 	struct amdgpu_heap_info vram_info, gtt_info;
    539 
    540 	pm4 = calloc(pm4_dw, sizeof(*pm4));
    541 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
    542 
    543 	ib_info = calloc(1, sizeof(*ib_info));
    544 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
    545 
    546 	ibs_request = calloc(1, sizeof(*ibs_request));
    547 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    548 
    549 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    550 	CU_ASSERT_EQUAL(r, 0);
    551 
    552 	/* prepare resource */
    553 	resources = calloc(4, sizeof(amdgpu_bo_handle));
    554 	CU_ASSERT_NOT_EQUAL(resources, NULL);
    555 
    556 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
    557 				   0, &vram_info);
    558 	CU_ASSERT_EQUAL(r, 0);
    559 
    560 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
    561 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
    562 	CU_ASSERT_EQUAL(r, 0);
    563 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
    564 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
    565 	CU_ASSERT_EQUAL(r, 0);
    566 
    567 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
    568 				   0, &gtt_info);
    569 	CU_ASSERT_EQUAL(r, 0);
    570 
    571 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
    572 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
    573 	CU_ASSERT_EQUAL(r, 0);
    574 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
    575 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
    576 	CU_ASSERT_EQUAL(r, 0);
    577 
    578 
    579 
    580 	loop1 = loop2 = 0;
    581 	/* run 9 circle to test all mapping combination */
    582 	while(loop1 < 2) {
    583 		while(loop2 < 2) {
    584 			/* allocate UC bo1for sDMA use */
    585 			r = amdgpu_bo_alloc_and_map(device_handle,
    586 						    sdma_write_length, 4096,
    587 						    AMDGPU_GEM_DOMAIN_GTT,
    588 						    gtt_flags[loop1], &bo1,
    589 						    (void**)&bo1_cpu, &bo1_mc,
    590 						    &bo1_va_handle);
    591 			CU_ASSERT_EQUAL(r, 0);
    592 
    593 			/* set bo1 */
    594 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
    595 
    596 			/* allocate UC bo2 for sDMA use */
    597 			r = amdgpu_bo_alloc_and_map(device_handle,
    598 						    sdma_write_length, 4096,
    599 						    AMDGPU_GEM_DOMAIN_GTT,
    600 						    gtt_flags[loop2], &bo2,
    601 						    (void**)&bo2_cpu, &bo2_mc,
    602 						    &bo2_va_handle);
    603 			CU_ASSERT_EQUAL(r, 0);
    604 
    605 			/* clear bo2 */
    606 			memset((void*)bo2_cpu, 0, sdma_write_length);
    607 
    608 			resources[0] = bo1;
    609 			resources[1] = bo2;
    610 			resources[2] = vram_max[loop2];
    611 			resources[3] = gtt_max[loop2];
    612 
    613 			/* fulfill PM4: test DMA copy linear */
    614 			i = j = 0;
    615 			if (family_id == AMDGPU_FAMILY_SI) {
    616 				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
    617 							  sdma_write_length);
    618 				pm4[i++] = 0xffffffff & bo2_mc;
    619 				pm4[i++] = 0xffffffff & bo1_mc;
    620 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
    621 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
    622 			} else {
    623 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
    624 				if (family_id >= AMDGPU_FAMILY_AI)
    625 					pm4[i++] = sdma_write_length - 1;
    626 				else
    627 					pm4[i++] = sdma_write_length;
    628 				pm4[i++] = 0;
    629 				pm4[i++] = 0xffffffff & bo1_mc;
    630 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
    631 				pm4[i++] = 0xffffffff & bo2_mc;
    632 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
    633 			}
    634 
    635 			amdgpu_test_exec_cs_helper(context_handle,
    636 						   AMDGPU_HW_IP_DMA, 0,
    637 						   i, pm4,
    638 						   4, resources,
    639 						   ib_info, ibs_request);
    640 
    641 			/* verify if SDMA test result meets with expected */
    642 			i = 0;
    643 			while(i < sdma_write_length) {
    644 				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
    645 			}
    646 			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
    647 						     sdma_write_length);
    648 			CU_ASSERT_EQUAL(r, 0);
    649 			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
    650 						     sdma_write_length);
    651 			CU_ASSERT_EQUAL(r, 0);
    652 			loop2++;
    653 		}
    654 		loop2 = 0;
    655 		loop1++;
    656 	}
    657 	amdgpu_bo_free(vram_max[0]);
    658 	amdgpu_bo_free(vram_max[1]);
    659 	amdgpu_bo_free(gtt_max[0]);
    660 	amdgpu_bo_free(gtt_max[1]);
    661 	/* clean resources */
    662 	free(resources);
    663 	free(ibs_request);
    664 	free(ib_info);
    665 	free(pm4);
    666 
    667 	/* end of test */
    668 	r = amdgpu_cs_ctx_free(context_handle);
    669 	CU_ASSERT_EQUAL(r, 0);
    670 }
    671 
    672 
    673 static void amdgpu_command_submission_gfx(void)
    674 {
    675 	/* write data using the CP */
    676 	amdgpu_command_submission_gfx_cp_write_data();
    677 	/* const fill using the CP */
    678 	amdgpu_command_submission_gfx_cp_const_fill();
    679 	/* copy data using the CP */
    680 	amdgpu_command_submission_gfx_cp_copy_data();
    681 	/* separate IB buffers for multi-IB submission */
    682 	amdgpu_command_submission_gfx_separate_ibs();
    683 	/* shared IB buffer for multi-IB submission */
    684 	amdgpu_command_submission_gfx_shared_ib();
    685 }
    686 
    687 static void amdgpu_semaphore_test(void)
    688 {
    689 	amdgpu_context_handle context_handle[2];
    690 	amdgpu_semaphore_handle sem;
    691 	amdgpu_bo_handle ib_result_handle[2];
    692 	void *ib_result_cpu[2];
    693 	uint64_t ib_result_mc_address[2];
    694 	struct amdgpu_cs_request ibs_request[2] = {0};
    695 	struct amdgpu_cs_ib_info ib_info[2] = {0};
    696 	struct amdgpu_cs_fence fence_status = {0};
    697 	uint32_t *ptr;
    698 	uint32_t expired;
    699 	uint32_t sdma_nop, gfx_nop;
    700 	amdgpu_bo_list_handle bo_list[2];
    701 	amdgpu_va_handle va_handle[2];
    702 	int r, i;
    703 
    704 	if (family_id == AMDGPU_FAMILY_SI) {
    705 		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
    706 		gfx_nop = GFX_COMPUTE_NOP_SI;
    707 	} else {
    708 		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
    709 		gfx_nop = GFX_COMPUTE_NOP;
    710 	}
    711 
    712 	r = amdgpu_cs_create_semaphore(&sem);
    713 	CU_ASSERT_EQUAL(r, 0);
    714 	for (i = 0; i < 2; i++) {
    715 		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
    716 		CU_ASSERT_EQUAL(r, 0);
    717 
    718 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    719 					    AMDGPU_GEM_DOMAIN_GTT, 0,
    720 					    &ib_result_handle[i], &ib_result_cpu[i],
    721 					    &ib_result_mc_address[i], &va_handle[i]);
    722 		CU_ASSERT_EQUAL(r, 0);
    723 
    724 		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
    725 				       NULL, &bo_list[i]);
    726 		CU_ASSERT_EQUAL(r, 0);
    727 	}
    728 
    729 	/* 1. same context different engine */
    730 	ptr = ib_result_cpu[0];
    731 	ptr[0] = sdma_nop;
    732 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
    733 	ib_info[0].size = 1;
    734 
    735 	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
    736 	ibs_request[0].number_of_ibs = 1;
    737 	ibs_request[0].ibs = &ib_info[0];
    738 	ibs_request[0].resources = bo_list[0];
    739 	ibs_request[0].fence_info.handle = NULL;
    740 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
    741 	CU_ASSERT_EQUAL(r, 0);
    742 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
    743 	CU_ASSERT_EQUAL(r, 0);
    744 
    745 	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
    746 	CU_ASSERT_EQUAL(r, 0);
    747 	ptr = ib_result_cpu[1];
    748 	ptr[0] = gfx_nop;
    749 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
    750 	ib_info[1].size = 1;
    751 
    752 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
    753 	ibs_request[1].number_of_ibs = 1;
    754 	ibs_request[1].ibs = &ib_info[1];
    755 	ibs_request[1].resources = bo_list[1];
    756 	ibs_request[1].fence_info.handle = NULL;
    757 
    758 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
    759 	CU_ASSERT_EQUAL(r, 0);
    760 
    761 	fence_status.context = context_handle[0];
    762 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
    763 	fence_status.ip_instance = 0;
    764 	fence_status.fence = ibs_request[1].seq_no;
    765 	r = amdgpu_cs_query_fence_status(&fence_status,
    766 					 500000000, 0, &expired);
    767 	CU_ASSERT_EQUAL(r, 0);
    768 	CU_ASSERT_EQUAL(expired, true);
    769 
    770 	/* 2. same engine different context */
    771 	ptr = ib_result_cpu[0];
    772 	ptr[0] = gfx_nop;
    773 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
    774 	ib_info[0].size = 1;
    775 
    776 	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
    777 	ibs_request[0].number_of_ibs = 1;
    778 	ibs_request[0].ibs = &ib_info[0];
    779 	ibs_request[0].resources = bo_list[0];
    780 	ibs_request[0].fence_info.handle = NULL;
    781 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
    782 	CU_ASSERT_EQUAL(r, 0);
    783 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
    784 	CU_ASSERT_EQUAL(r, 0);
    785 
    786 	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
    787 	CU_ASSERT_EQUAL(r, 0);
    788 	ptr = ib_result_cpu[1];
    789 	ptr[0] = gfx_nop;
    790 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
    791 	ib_info[1].size = 1;
    792 
    793 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
    794 	ibs_request[1].number_of_ibs = 1;
    795 	ibs_request[1].ibs = &ib_info[1];
    796 	ibs_request[1].resources = bo_list[1];
    797 	ibs_request[1].fence_info.handle = NULL;
    798 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
    799 
    800 	CU_ASSERT_EQUAL(r, 0);
    801 
    802 	fence_status.context = context_handle[1];
    803 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
    804 	fence_status.ip_instance = 0;
    805 	fence_status.fence = ibs_request[1].seq_no;
    806 	r = amdgpu_cs_query_fence_status(&fence_status,
    807 					 500000000, 0, &expired);
    808 	CU_ASSERT_EQUAL(r, 0);
    809 	CU_ASSERT_EQUAL(expired, true);
    810 
    811 	for (i = 0; i < 2; i++) {
    812 		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
    813 					     ib_result_mc_address[i], 4096);
    814 		CU_ASSERT_EQUAL(r, 0);
    815 
    816 		r = amdgpu_bo_list_destroy(bo_list[i]);
    817 		CU_ASSERT_EQUAL(r, 0);
    818 
    819 		r = amdgpu_cs_ctx_free(context_handle[i]);
    820 		CU_ASSERT_EQUAL(r, 0);
    821 	}
    822 
    823 	r = amdgpu_cs_destroy_semaphore(sem);
    824 	CU_ASSERT_EQUAL(r, 0);
    825 }
    826 
    827 static void amdgpu_command_submission_compute_nop(void)
    828 {
    829 	amdgpu_context_handle context_handle;
    830 	amdgpu_bo_handle ib_result_handle;
    831 	void *ib_result_cpu;
    832 	uint64_t ib_result_mc_address;
    833 	struct amdgpu_cs_request ibs_request;
    834 	struct amdgpu_cs_ib_info ib_info;
    835 	struct amdgpu_cs_fence fence_status;
    836 	uint32_t *ptr;
    837 	uint32_t expired;
    838 	int r, instance;
    839 	amdgpu_bo_list_handle bo_list;
    840 	amdgpu_va_handle va_handle;
    841 	struct drm_amdgpu_info_hw_ip info;
    842 
    843 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
    844 	CU_ASSERT_EQUAL(r, 0);
    845 
    846 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
    847 	CU_ASSERT_EQUAL(r, 0);
    848 
    849 	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
    850 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    851 					    AMDGPU_GEM_DOMAIN_GTT, 0,
    852 					    &ib_result_handle, &ib_result_cpu,
    853 					    &ib_result_mc_address, &va_handle);
    854 		CU_ASSERT_EQUAL(r, 0);
    855 
    856 		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
    857 				       &bo_list);
    858 		CU_ASSERT_EQUAL(r, 0);
    859 
    860 		ptr = ib_result_cpu;
    861 		memset(ptr, 0, 16);
    862 		ptr[0]=PACKET3(PACKET3_NOP, 14);
    863 
    864 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
    865 		ib_info.ib_mc_address = ib_result_mc_address;
    866 		ib_info.size = 16;
    867 
    868 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
    869 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
    870 		ibs_request.ring = instance;
    871 		ibs_request.number_of_ibs = 1;
    872 		ibs_request.ibs = &ib_info;
    873 		ibs_request.resources = bo_list;
    874 		ibs_request.fence_info.handle = NULL;
    875 
    876 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
    877 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
    878 		CU_ASSERT_EQUAL(r, 0);
    879 
    880 		fence_status.context = context_handle;
    881 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
    882 		fence_status.ip_instance = 0;
    883 		fence_status.ring = instance;
    884 		fence_status.fence = ibs_request.seq_no;
    885 
    886 		r = amdgpu_cs_query_fence_status(&fence_status,
    887 						 AMDGPU_TIMEOUT_INFINITE,
    888 						 0, &expired);
    889 		CU_ASSERT_EQUAL(r, 0);
    890 
    891 		r = amdgpu_bo_list_destroy(bo_list);
    892 		CU_ASSERT_EQUAL(r, 0);
    893 
    894 		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
    895 					     ib_result_mc_address, 4096);
    896 		CU_ASSERT_EQUAL(r, 0);
    897 	}
    898 
    899 	r = amdgpu_cs_ctx_free(context_handle);
    900 	CU_ASSERT_EQUAL(r, 0);
    901 }
    902 
    903 static void amdgpu_command_submission_compute_cp_write_data(void)
    904 {
    905 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
    906 }
    907 
    908 static void amdgpu_command_submission_compute_cp_const_fill(void)
    909 {
    910 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
    911 }
    912 
    913 static void amdgpu_command_submission_compute_cp_copy_data(void)
    914 {
    915 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
    916 }
    917 
    918 static void amdgpu_command_submission_compute(void)
    919 {
    920 	/* write data using the CP */
    921 	amdgpu_command_submission_compute_cp_write_data();
    922 	/* const fill using the CP */
    923 	amdgpu_command_submission_compute_cp_const_fill();
    924 	/* copy data using the CP */
    925 	amdgpu_command_submission_compute_cp_copy_data();
    926 	/* nop test */
    927 	amdgpu_command_submission_compute_nop();
    928 }
    929 
    930 /*
    931  * caller need create/release:
    932  * pm4_src, resources, ib_info, and ibs_request
    933  * submit command stream described in ibs_request and wait for this IB accomplished
    934  */
    935 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
    936 				       unsigned ip_type,
    937 				       int instance, int pm4_dw, uint32_t *pm4_src,
    938 				       int res_cnt, amdgpu_bo_handle *resources,
    939 				       struct amdgpu_cs_ib_info *ib_info,
    940 				       struct amdgpu_cs_request *ibs_request)
    941 {
    942 	int r;
    943 	uint32_t expired;
    944 	uint32_t *ring_ptr;
    945 	amdgpu_bo_handle ib_result_handle;
    946 	void *ib_result_cpu;
    947 	uint64_t ib_result_mc_address;
    948 	struct amdgpu_cs_fence fence_status = {0};
    949 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
    950 	amdgpu_va_handle va_handle;
    951 
    952 	/* prepare CS */
    953 	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
    954 	CU_ASSERT_NOT_EQUAL(resources, NULL);
    955 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
    956 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    957 	CU_ASSERT_TRUE(pm4_dw <= 1024);
    958 
    959 	/* allocate IB */
    960 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
    961 				    AMDGPU_GEM_DOMAIN_GTT, 0,
    962 				    &ib_result_handle, &ib_result_cpu,
    963 				    &ib_result_mc_address, &va_handle);
    964 	CU_ASSERT_EQUAL(r, 0);
    965 
    966 	/* copy PM4 packet to ring from caller */
    967 	ring_ptr = ib_result_cpu;
    968 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
    969 
    970 	ib_info->ib_mc_address = ib_result_mc_address;
    971 	ib_info->size = pm4_dw;
    972 
    973 	ibs_request->ip_type = ip_type;
    974 	ibs_request->ring = instance;
    975 	ibs_request->number_of_ibs = 1;
    976 	ibs_request->ibs = ib_info;
    977 	ibs_request->fence_info.handle = NULL;
    978 
    979 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
    980 	all_res[res_cnt] = ib_result_handle;
    981 
    982 	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
    983 				  NULL, &ibs_request->resources);
    984 	CU_ASSERT_EQUAL(r, 0);
    985 
    986 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
    987 
    988 	/* submit CS */
    989 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
    990 	CU_ASSERT_EQUAL(r, 0);
    991 
    992 	r = amdgpu_bo_list_destroy(ibs_request->resources);
    993 	CU_ASSERT_EQUAL(r, 0);
    994 
    995 	fence_status.ip_type = ip_type;
    996 	fence_status.ip_instance = 0;
    997 	fence_status.ring = ibs_request->ring;
    998 	fence_status.context = context_handle;
    999 	fence_status.fence = ibs_request->seq_no;
   1000 
   1001 	/* wait for IB accomplished */
   1002 	r = amdgpu_cs_query_fence_status(&fence_status,
   1003 					 AMDGPU_TIMEOUT_INFINITE,
   1004 					 0, &expired);
   1005 	CU_ASSERT_EQUAL(r, 0);
   1006 	CU_ASSERT_EQUAL(expired, true);
   1007 
   1008 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
   1009 				     ib_result_mc_address, 4096);
   1010 	CU_ASSERT_EQUAL(r, 0);
   1011 }
   1012 
   1013 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
   1014 {
   1015 	const int sdma_write_length = 128;
   1016 	const int pm4_dw = 256;
   1017 	amdgpu_context_handle context_handle;
   1018 	amdgpu_bo_handle bo;
   1019 	amdgpu_bo_handle *resources;
   1020 	uint32_t *pm4;
   1021 	struct amdgpu_cs_ib_info *ib_info;
   1022 	struct amdgpu_cs_request *ibs_request;
   1023 	uint64_t bo_mc;
   1024 	volatile uint32_t *bo_cpu;
   1025 	int i, j, r, loop, ring_id;
   1026 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
   1027 	amdgpu_va_handle va_handle;
   1028 	struct drm_amdgpu_info_hw_ip hw_ip_info;
   1029 
   1030 	pm4 = calloc(pm4_dw, sizeof(*pm4));
   1031 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
   1032 
   1033 	ib_info = calloc(1, sizeof(*ib_info));
   1034 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
   1035 
   1036 	ibs_request = calloc(1, sizeof(*ibs_request));
   1037 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
   1038 
   1039 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
   1040 	CU_ASSERT_EQUAL(r, 0);
   1041 
   1042 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
   1043 	CU_ASSERT_EQUAL(r, 0);
   1044 
   1045 	/* prepare resource */
   1046 	resources = calloc(1, sizeof(amdgpu_bo_handle));
   1047 	CU_ASSERT_NOT_EQUAL(resources, NULL);
   1048 
   1049 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
   1050 		loop = 0;
   1051 		while(loop < 2) {
   1052 			/* allocate UC bo for sDMA use */
   1053 			r = amdgpu_bo_alloc_and_map(device_handle,
   1054 						    sdma_write_length * sizeof(uint32_t),
   1055 						    4096, AMDGPU_GEM_DOMAIN_GTT,
   1056 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
   1057 						    &bo_mc, &va_handle);
   1058 			CU_ASSERT_EQUAL(r, 0);
   1059 
   1060 			/* clear bo */
   1061 			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
   1062 
   1063 			resources[0] = bo;
   1064 
   1065 			/* fulfill PM4: test DMA write-linear */
   1066 			i = j = 0;
   1067 			if (ip_type == AMDGPU_HW_IP_DMA) {
   1068 				if (family_id == AMDGPU_FAMILY_SI)
   1069 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
   1070 								  sdma_write_length);
   1071 				else
   1072 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
   1073 							       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
   1074 				pm4[i++] = 0xffffffff & bo_mc;
   1075 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
   1076 				if (family_id >= AMDGPU_FAMILY_AI)
   1077 					pm4[i++] = sdma_write_length - 1;
   1078 				else if (family_id != AMDGPU_FAMILY_SI)
   1079 					pm4[i++] = sdma_write_length;
   1080 				while(j++ < sdma_write_length)
   1081 					pm4[i++] = 0xdeadbeaf;
   1082 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
   1083 				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
   1084 				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
   1085 				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
   1086 				pm4[i++] = 0xfffffffc & bo_mc;
   1087 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
   1088 				while(j++ < sdma_write_length)
   1089 					pm4[i++] = 0xdeadbeaf;
   1090 			}
   1091 
   1092 			amdgpu_test_exec_cs_helper(context_handle,
   1093 						   ip_type, ring_id,
   1094 						   i, pm4,
   1095 						   1, resources,
   1096 						   ib_info, ibs_request);
   1097 
   1098 			/* verify if SDMA test result meets with expected */
   1099 			i = 0;
   1100 			while(i < sdma_write_length) {
   1101 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
   1102 			}
   1103 
   1104 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
   1105 						     sdma_write_length * sizeof(uint32_t));
   1106 			CU_ASSERT_EQUAL(r, 0);
   1107 			loop++;
   1108 		}
   1109 	}
   1110 	/* clean resources */
   1111 	free(resources);
   1112 	free(ibs_request);
   1113 	free(ib_info);
   1114 	free(pm4);
   1115 
   1116 	/* end of test */
   1117 	r = amdgpu_cs_ctx_free(context_handle);
   1118 	CU_ASSERT_EQUAL(r, 0);
   1119 }
   1120 
   1121 static void amdgpu_command_submission_sdma_write_linear(void)
   1122 {
   1123 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
   1124 }
   1125 
   1126 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
   1127 {
   1128 	const int sdma_write_length = 1024 * 1024;
   1129 	const int pm4_dw = 256;
   1130 	amdgpu_context_handle context_handle;
   1131 	amdgpu_bo_handle bo;
   1132 	amdgpu_bo_handle *resources;
   1133 	uint32_t *pm4;
   1134 	struct amdgpu_cs_ib_info *ib_info;
   1135 	struct amdgpu_cs_request *ibs_request;
   1136 	uint64_t bo_mc;
   1137 	volatile uint32_t *bo_cpu;
   1138 	int i, j, r, loop, ring_id;
   1139 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
   1140 	amdgpu_va_handle va_handle;
   1141 	struct drm_amdgpu_info_hw_ip hw_ip_info;
   1142 
   1143 	pm4 = calloc(pm4_dw, sizeof(*pm4));
   1144 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
   1145 
   1146 	ib_info = calloc(1, sizeof(*ib_info));
   1147 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
   1148 
   1149 	ibs_request = calloc(1, sizeof(*ibs_request));
   1150 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
   1151 
   1152 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
   1153 	CU_ASSERT_EQUAL(r, 0);
   1154 
   1155 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
   1156 	CU_ASSERT_EQUAL(r, 0);
   1157 
   1158 	/* prepare resource */
   1159 	resources = calloc(1, sizeof(amdgpu_bo_handle));
   1160 	CU_ASSERT_NOT_EQUAL(resources, NULL);
   1161 
   1162 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
   1163 		loop = 0;
   1164 		while(loop < 2) {
   1165 			/* allocate UC bo for sDMA use */
   1166 			r = amdgpu_bo_alloc_and_map(device_handle,
   1167 						    sdma_write_length, 4096,
   1168 						    AMDGPU_GEM_DOMAIN_GTT,
   1169 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
   1170 						    &bo_mc, &va_handle);
   1171 			CU_ASSERT_EQUAL(r, 0);
   1172 
   1173 			/* clear bo */
   1174 			memset((void*)bo_cpu, 0, sdma_write_length);
   1175 
   1176 			resources[0] = bo;
   1177 
   1178 			/* fulfill PM4: test DMA const fill */
   1179 			i = j = 0;
   1180 			if (ip_type == AMDGPU_HW_IP_DMA) {
   1181 				if (family_id == AMDGPU_FAMILY_SI) {
   1182 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
   1183 								  0, 0, 0,
   1184 								  sdma_write_length / 4);
   1185 					pm4[i++] = 0xfffffffc & bo_mc;
   1186 					pm4[i++] = 0xdeadbeaf;
   1187 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
   1188 				} else {
   1189 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
   1190 							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
   1191 					pm4[i++] = 0xffffffff & bo_mc;
   1192 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
   1193 					pm4[i++] = 0xdeadbeaf;
   1194 					if (family_id >= AMDGPU_FAMILY_AI)
   1195 						pm4[i++] = sdma_write_length - 1;
   1196 					else
   1197 						pm4[i++] = sdma_write_length;
   1198 				}
   1199 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
   1200 				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
   1201 				if (family_id == AMDGPU_FAMILY_SI) {
   1202 					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
   1203 					pm4[i++] = 0xdeadbeaf;
   1204 					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
   1205 						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
   1206 						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
   1207 						   PACKET3_DMA_DATA_SI_CP_SYNC;
   1208 					pm4[i++] = 0xffffffff & bo_mc;
   1209 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
   1210 					pm4[i++] = sdma_write_length;
   1211 				} else {
   1212 					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
   1213 					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
   1214 						   PACKET3_DMA_DATA_DST_SEL(0) |
   1215 						   PACKET3_DMA_DATA_SRC_SEL(2) |
   1216 						   PACKET3_DMA_DATA_CP_SYNC;
   1217 					pm4[i++] = 0xdeadbeaf;
   1218 					pm4[i++] = 0;
   1219 					pm4[i++] = 0xfffffffc & bo_mc;
   1220 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
   1221 					pm4[i++] = sdma_write_length;
   1222 				}
   1223 			}
   1224 
   1225 			amdgpu_test_exec_cs_helper(context_handle,
   1226 						   ip_type, ring_id,
   1227 						   i, pm4,
   1228 						   1, resources,
   1229 						   ib_info, ibs_request);
   1230 
   1231 			/* verify if SDMA test result meets with expected */
   1232 			i = 0;
   1233 			while(i < (sdma_write_length / 4)) {
   1234 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
   1235 			}
   1236 
   1237 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
   1238 						     sdma_write_length);
   1239 			CU_ASSERT_EQUAL(r, 0);
   1240 			loop++;
   1241 		}
   1242 	}
   1243 	/* clean resources */
   1244 	free(resources);
   1245 	free(ibs_request);
   1246 	free(ib_info);
   1247 	free(pm4);
   1248 
   1249 	/* end of test */
   1250 	r = amdgpu_cs_ctx_free(context_handle);
   1251 	CU_ASSERT_EQUAL(r, 0);
   1252 }
   1253 
   1254 static void amdgpu_command_submission_sdma_const_fill(void)
   1255 {
   1256 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
   1257 }
   1258 
   1259 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
   1260 {
   1261 	const int sdma_write_length = 1024;
   1262 	const int pm4_dw = 256;
   1263 	amdgpu_context_handle context_handle;
   1264 	amdgpu_bo_handle bo1, bo2;
   1265 	amdgpu_bo_handle *resources;
   1266 	uint32_t *pm4;
   1267 	struct amdgpu_cs_ib_info *ib_info;
   1268 	struct amdgpu_cs_request *ibs_request;
   1269 	uint64_t bo1_mc, bo2_mc;
   1270 	volatile unsigned char *bo1_cpu, *bo2_cpu;
   1271 	int i, j, r, loop1, loop2, ring_id;
   1272 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
   1273 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
   1274 	struct drm_amdgpu_info_hw_ip hw_ip_info;
   1275 
   1276 	pm4 = calloc(pm4_dw, sizeof(*pm4));
   1277 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
   1278 
   1279 	ib_info = calloc(1, sizeof(*ib_info));
   1280 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
   1281 
   1282 	ibs_request = calloc(1, sizeof(*ibs_request));
   1283 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
   1284 
   1285 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
   1286 	CU_ASSERT_EQUAL(r, 0);
   1287 
   1288 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
   1289 	CU_ASSERT_EQUAL(r, 0);
   1290 
   1291 	/* prepare resource */
   1292 	resources = calloc(2, sizeof(amdgpu_bo_handle));
   1293 	CU_ASSERT_NOT_EQUAL(resources, NULL);
   1294 
   1295 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
   1296 		loop1 = loop2 = 0;
   1297 		/* run 9 circle to test all mapping combination */
   1298 		while(loop1 < 2) {
   1299 			while(loop2 < 2) {
   1300 				/* allocate UC bo1for sDMA use */
   1301 				r = amdgpu_bo_alloc_and_map(device_handle,
   1302 							    sdma_write_length, 4096,
   1303 							    AMDGPU_GEM_DOMAIN_GTT,
   1304 							    gtt_flags[loop1], &bo1,
   1305 							    (void**)&bo1_cpu, &bo1_mc,
   1306 							    &bo1_va_handle);
   1307 				CU_ASSERT_EQUAL(r, 0);
   1308 
   1309 				/* set bo1 */
   1310 				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
   1311 
   1312 				/* allocate UC bo2 for sDMA use */
   1313 				r = amdgpu_bo_alloc_and_map(device_handle,
   1314 							    sdma_write_length, 4096,
   1315 							    AMDGPU_GEM_DOMAIN_GTT,
   1316 							    gtt_flags[loop2], &bo2,
   1317 							    (void**)&bo2_cpu, &bo2_mc,
   1318 							    &bo2_va_handle);
   1319 				CU_ASSERT_EQUAL(r, 0);
   1320 
   1321 				/* clear bo2 */
   1322 				memset((void*)bo2_cpu, 0, sdma_write_length);
   1323 
   1324 				resources[0] = bo1;
   1325 				resources[1] = bo2;
   1326 
   1327 				/* fulfill PM4: test DMA copy linear */
   1328 				i = j = 0;
   1329 				if (ip_type == AMDGPU_HW_IP_DMA) {
   1330 					if (family_id == AMDGPU_FAMILY_SI) {
   1331 						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
   1332 									  0, 0, 0,
   1333 									  sdma_write_length);
   1334 						pm4[i++] = 0xffffffff & bo2_mc;
   1335 						pm4[i++] = 0xffffffff & bo1_mc;
   1336 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
   1337 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
   1338 					} else {
   1339 						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
   1340 								       SDMA_COPY_SUB_OPCODE_LINEAR,
   1341 								       0);
   1342 						if (family_id >= AMDGPU_FAMILY_AI)
   1343 							pm4[i++] = sdma_write_length - 1;
   1344 						else
   1345 							pm4[i++] = sdma_write_length;
   1346 						pm4[i++] = 0;
   1347 						pm4[i++] = 0xffffffff & bo1_mc;
   1348 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
   1349 						pm4[i++] = 0xffffffff & bo2_mc;
   1350 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
   1351 					}
   1352 				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
   1353 					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
   1354 					if (family_id == AMDGPU_FAMILY_SI) {
   1355 						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
   1356 						pm4[i++] = 0xfffffffc & bo1_mc;
   1357 						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
   1358 							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
   1359 							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
   1360 							   PACKET3_DMA_DATA_SI_CP_SYNC |
   1361 							   (0xffff00000000 & bo1_mc) >> 32;
   1362 						pm4[i++] = 0xfffffffc & bo2_mc;
   1363 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
   1364 						pm4[i++] = sdma_write_length;
   1365 					} else {
   1366 						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
   1367 						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
   1368 							   PACKET3_DMA_DATA_DST_SEL(0) |
   1369 							   PACKET3_DMA_DATA_SRC_SEL(0) |
   1370 							   PACKET3_DMA_DATA_CP_SYNC;
   1371 						pm4[i++] = 0xfffffffc & bo1_mc;
   1372 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
   1373 						pm4[i++] = 0xfffffffc & bo2_mc;
   1374 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
   1375 						pm4[i++] = sdma_write_length;
   1376 					}
   1377 				}
   1378 
   1379 				amdgpu_test_exec_cs_helper(context_handle,
   1380 							   ip_type, ring_id,
   1381 							   i, pm4,
   1382 							   2, resources,
   1383 							   ib_info, ibs_request);
   1384 
   1385 				/* verify if SDMA test result meets with expected */
   1386 				i = 0;
   1387 				while(i < sdma_write_length) {
   1388 					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
   1389 				}
   1390 				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
   1391 							     sdma_write_length);
   1392 				CU_ASSERT_EQUAL(r, 0);
   1393 				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
   1394 							     sdma_write_length);
   1395 				CU_ASSERT_EQUAL(r, 0);
   1396 				loop2++;
   1397 			}
   1398 			loop1++;
   1399 		}
   1400 	}
   1401 	/* clean resources */
   1402 	free(resources);
   1403 	free(ibs_request);
   1404 	free(ib_info);
   1405 	free(pm4);
   1406 
   1407 	/* end of test */
   1408 	r = amdgpu_cs_ctx_free(context_handle);
   1409 	CU_ASSERT_EQUAL(r, 0);
   1410 }
   1411 
   1412 static void amdgpu_command_submission_sdma_copy_linear(void)
   1413 {
   1414 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
   1415 }
   1416 
   1417 static void amdgpu_command_submission_sdma(void)
   1418 {
   1419 	amdgpu_command_submission_sdma_write_linear();
   1420 	amdgpu_command_submission_sdma_const_fill();
   1421 	amdgpu_command_submission_sdma_copy_linear();
   1422 }
   1423 
   1424 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
   1425 {
   1426 	amdgpu_context_handle context_handle;
   1427 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
   1428 	void *ib_result_cpu, *ib_result_ce_cpu;
   1429 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
   1430 	struct amdgpu_cs_request ibs_request[2] = {0};
   1431 	struct amdgpu_cs_ib_info ib_info[2];
   1432 	struct amdgpu_cs_fence fence_status[2] = {0};
   1433 	uint32_t *ptr;
   1434 	uint32_t expired;
   1435 	amdgpu_bo_list_handle bo_list;
   1436 	amdgpu_va_handle va_handle, va_handle_ce;
   1437 	int r;
   1438 	int i = 0, ib_cs_num = 2;
   1439 
   1440 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
   1441 	CU_ASSERT_EQUAL(r, 0);
   1442 
   1443 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
   1444 				    AMDGPU_GEM_DOMAIN_GTT, 0,
   1445 				    &ib_result_handle, &ib_result_cpu,
   1446 				    &ib_result_mc_address, &va_handle);
   1447 	CU_ASSERT_EQUAL(r, 0);
   1448 
   1449 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
   1450 				    AMDGPU_GEM_DOMAIN_GTT, 0,
   1451 				    &ib_result_ce_handle, &ib_result_ce_cpu,
   1452 				    &ib_result_ce_mc_address, &va_handle_ce);
   1453 	CU_ASSERT_EQUAL(r, 0);
   1454 
   1455 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
   1456 			       ib_result_ce_handle, &bo_list);
   1457 	CU_ASSERT_EQUAL(r, 0);
   1458 
   1459 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
   1460 
   1461 	/* IT_SET_CE_DE_COUNTERS */
   1462 	ptr = ib_result_ce_cpu;
   1463 	if (family_id != AMDGPU_FAMILY_SI) {
   1464 		ptr[i++] = 0xc0008900;
   1465 		ptr[i++] = 0;
   1466 	}
   1467 	ptr[i++] = 0xc0008400;
   1468 	ptr[i++] = 1;
   1469 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
   1470 	ib_info[0].size = i;
   1471 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
   1472 
   1473 	/* IT_WAIT_ON_CE_COUNTER */
   1474 	ptr = ib_result_cpu;
   1475 	ptr[0] = 0xc0008600;
   1476 	ptr[1] = 0x00000001;
   1477 	ib_info[1].ib_mc_address = ib_result_mc_address;
   1478 	ib_info[1].size = 2;
   1479 
   1480 	for (i = 0; i < ib_cs_num; i++) {
   1481 		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
   1482 		ibs_request[i].number_of_ibs = 2;
   1483 		ibs_request[i].ibs = ib_info;
   1484 		ibs_request[i].resources = bo_list;
   1485 		ibs_request[i].fence_info.handle = NULL;
   1486 	}
   1487 
   1488 	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
   1489 
   1490 	CU_ASSERT_EQUAL(r, 0);
   1491 
   1492 	for (i = 0; i < ib_cs_num; i++) {
   1493 		fence_status[i].context = context_handle;
   1494 		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
   1495 		fence_status[i].fence = ibs_request[i].seq_no;
   1496 	}
   1497 
   1498 	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
   1499 				AMDGPU_TIMEOUT_INFINITE,
   1500 				&expired, NULL);
   1501 	CU_ASSERT_EQUAL(r, 0);
   1502 
   1503 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
   1504 				     ib_result_mc_address, 4096);
   1505 	CU_ASSERT_EQUAL(r, 0);
   1506 
   1507 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
   1508 				     ib_result_ce_mc_address, 4096);
   1509 	CU_ASSERT_EQUAL(r, 0);
   1510 
   1511 	r = amdgpu_bo_list_destroy(bo_list);
   1512 	CU_ASSERT_EQUAL(r, 0);
   1513 
   1514 	r = amdgpu_cs_ctx_free(context_handle);
   1515 	CU_ASSERT_EQUAL(r, 0);
   1516 }
   1517 
   1518 static void amdgpu_command_submission_multi_fence(void)
   1519 {
   1520 	amdgpu_command_submission_multi_fence_wait_all(true);
   1521 	amdgpu_command_submission_multi_fence_wait_all(false);
   1522 }
   1523 
   1524 static void amdgpu_userptr_test(void)
   1525 {
   1526 	int i, r, j;
   1527 	uint32_t *pm4 = NULL;
   1528 	uint64_t bo_mc;
   1529 	void *ptr = NULL;
   1530 	int pm4_dw = 256;
   1531 	int sdma_write_length = 4;
   1532 	amdgpu_bo_handle handle;
   1533 	amdgpu_context_handle context_handle;
   1534 	struct amdgpu_cs_ib_info *ib_info;
   1535 	struct amdgpu_cs_request *ibs_request;
   1536 	amdgpu_bo_handle buf_handle;
   1537 	amdgpu_va_handle va_handle;
   1538 
   1539 	pm4 = calloc(pm4_dw, sizeof(*pm4));
   1540 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
   1541 
   1542 	ib_info = calloc(1, sizeof(*ib_info));
   1543 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
   1544 
   1545 	ibs_request = calloc(1, sizeof(*ibs_request));
   1546 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
   1547 
   1548 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
   1549 	CU_ASSERT_EQUAL(r, 0);
   1550 
   1551 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
   1552 	CU_ASSERT_NOT_EQUAL(ptr, NULL);
   1553 	memset(ptr, 0, BUFFER_SIZE);
   1554 
   1555 	r = amdgpu_create_bo_from_user_mem(device_handle,
   1556 					   ptr, BUFFER_SIZE, &buf_handle);
   1557 	CU_ASSERT_EQUAL(r, 0);
   1558 
   1559 	r = amdgpu_va_range_alloc(device_handle,
   1560 				  amdgpu_gpu_va_range_general,
   1561 				  BUFFER_SIZE, 1, 0, &bo_mc,
   1562 				  &va_handle, 0);
   1563 	CU_ASSERT_EQUAL(r, 0);
   1564 
   1565 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
   1566 	CU_ASSERT_EQUAL(r, 0);
   1567 
   1568 	handle = buf_handle;
   1569 
   1570 	j = i = 0;
   1571 
   1572 	if (family_id == AMDGPU_FAMILY_SI)
   1573 		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
   1574 				sdma_write_length);
   1575 	else
   1576 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
   1577 				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
   1578 	pm4[i++] = 0xffffffff & bo_mc;
   1579 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
   1580 	if (family_id >= AMDGPU_FAMILY_AI)
   1581 		pm4[i++] = sdma_write_length - 1;
   1582 	else if (family_id != AMDGPU_FAMILY_SI)
   1583 		pm4[i++] = sdma_write_length;
   1584 
   1585 	while (j++ < sdma_write_length)
   1586 		pm4[i++] = 0xdeadbeaf;
   1587 
   1588 	if (!fork()) {
   1589 		pm4[0] = 0x0;
   1590 		exit(0);
   1591 	}
   1592 
   1593 	amdgpu_test_exec_cs_helper(context_handle,
   1594 				   AMDGPU_HW_IP_DMA, 0,
   1595 				   i, pm4,
   1596 				   1, &handle,
   1597 				   ib_info, ibs_request);
   1598 	i = 0;
   1599 	while (i < sdma_write_length) {
   1600 		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
   1601 	}
   1602 	free(ibs_request);
   1603 	free(ib_info);
   1604 	free(pm4);
   1605 
   1606 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
   1607 	CU_ASSERT_EQUAL(r, 0);
   1608 	r = amdgpu_va_range_free(va_handle);
   1609 	CU_ASSERT_EQUAL(r, 0);
   1610 	r = amdgpu_bo_free(buf_handle);
   1611 	CU_ASSERT_EQUAL(r, 0);
   1612 	free(ptr);
   1613 
   1614 	r = amdgpu_cs_ctx_free(context_handle);
   1615 	CU_ASSERT_EQUAL(r, 0);
   1616 
   1617 	wait(NULL);
   1618 }
   1619 
   1620 static void amdgpu_sync_dependency_test(void)
   1621 {
   1622 	amdgpu_context_handle context_handle[2];
   1623 	amdgpu_bo_handle ib_result_handle;
   1624 	void *ib_result_cpu;
   1625 	uint64_t ib_result_mc_address;
   1626 	struct amdgpu_cs_request ibs_request;
   1627 	struct amdgpu_cs_ib_info ib_info;
   1628 	struct amdgpu_cs_fence fence_status;
   1629 	uint32_t expired;
   1630 	int i, j, r;
   1631 	amdgpu_bo_list_handle bo_list;
   1632 	amdgpu_va_handle va_handle;
   1633 	static uint32_t *ptr;
   1634 	uint64_t seq_no;
   1635 
   1636 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
   1637 	CU_ASSERT_EQUAL(r, 0);
   1638 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
   1639 	CU_ASSERT_EQUAL(r, 0);
   1640 
   1641 	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
   1642 			AMDGPU_GEM_DOMAIN_GTT, 0,
   1643 						    &ib_result_handle, &ib_result_cpu,
   1644 						    &ib_result_mc_address, &va_handle);
   1645 	CU_ASSERT_EQUAL(r, 0);
   1646 
   1647 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
   1648 			       &bo_list);
   1649 	CU_ASSERT_EQUAL(r, 0);
   1650 
   1651 	ptr = ib_result_cpu;
   1652 	i = 0;
   1653 
   1654 	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
   1655 
   1656 	/* Dispatch minimal init config and verify it's executed */
   1657 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
   1658 	ptr[i++] = 0x80000000;
   1659 	ptr[i++] = 0x80000000;
   1660 
   1661 	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
   1662 	ptr[i++] = 0x80000000;
   1663 
   1664 
   1665 	/* Program compute regs */
   1666 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
   1667 	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
   1668 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
   1669 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
   1670 
   1671 
   1672 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
   1673 	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
   1674 	/*
   1675 	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
   1676 	                                      SGPRS = 1
   1677 	                                      PRIORITY = 0
   1678 	                                      FLOAT_MODE = 192 (0xc0)
   1679 	                                      PRIV = 0
   1680 	                                      DX10_CLAMP = 1
   1681 	                                      DEBUG_MODE = 0
   1682 	                                      IEEE_MODE = 0
   1683 	                                      BULKY = 0
   1684 	                                      CDBG_USER = 0
   1685 	 *
   1686 	 */
   1687 	ptr[i++] = 0x002c0040;
   1688 
   1689 
   1690 	/*
   1691 	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
   1692 	                                      USER_SGPR = 8
   1693 	                                      TRAP_PRESENT = 0
   1694 	                                      TGID_X_EN = 0
   1695 	                                      TGID_Y_EN = 0
   1696 	                                      TGID_Z_EN = 0
   1697 	                                      TG_SIZE_EN = 0
   1698 	                                      TIDIG_COMP_CNT = 0
   1699 	                                      EXCP_EN_MSB = 0
   1700 	                                      LDS_SIZE = 0
   1701 	                                      EXCP_EN = 0
   1702 	 *
   1703 	 */
   1704 	ptr[i++] = 0x00000010;
   1705 
   1706 
   1707 /*
   1708  * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
   1709                                          WAVESIZE = 0
   1710  *
   1711  */
   1712 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
   1713 	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
   1714 	ptr[i++] = 0x00000100;
   1715 
   1716 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
   1717 	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
   1718 	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
   1719 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
   1720 
   1721 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
   1722 	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
   1723 	ptr[i++] = 0;
   1724 
   1725 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
   1726 	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
   1727 	ptr[i++] = 1;
   1728 	ptr[i++] = 1;
   1729 	ptr[i++] = 1;
   1730 
   1731 
   1732 	/* Dispatch */
   1733 	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
   1734 	ptr[i++] = 1;
   1735 	ptr[i++] = 1;
   1736 	ptr[i++] = 1;
   1737 	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
   1738 
   1739 
   1740 	while (i & 7)
   1741 		ptr[i++] =  0xffff1000; /* type3 nop packet */
   1742 
   1743 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
   1744 	ib_info.ib_mc_address = ib_result_mc_address;
   1745 	ib_info.size = i;
   1746 
   1747 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
   1748 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
   1749 	ibs_request.ring = 0;
   1750 	ibs_request.number_of_ibs = 1;
   1751 	ibs_request.ibs = &ib_info;
   1752 	ibs_request.resources = bo_list;
   1753 	ibs_request.fence_info.handle = NULL;
   1754 
   1755 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
   1756 	CU_ASSERT_EQUAL(r, 0);
   1757 	seq_no = ibs_request.seq_no;
   1758 
   1759 
   1760 
   1761 	/* Prepare second command with dependency on the first */
   1762 	j = i;
   1763 	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
   1764 	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
   1765 	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
   1766 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
   1767 	ptr[i++] = 99;
   1768 
   1769 	while (i & 7)
   1770 		ptr[i++] =  0xffff1000; /* type3 nop packet */
   1771 
   1772 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
   1773 	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
   1774 	ib_info.size = i - j;
   1775 
   1776 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
   1777 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
   1778 	ibs_request.ring = 0;
   1779 	ibs_request.number_of_ibs = 1;
   1780 	ibs_request.ibs = &ib_info;
   1781 	ibs_request.resources = bo_list;
   1782 	ibs_request.fence_info.handle = NULL;
   1783 
   1784 	ibs_request.number_of_dependencies = 1;
   1785 
   1786 	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
   1787 	ibs_request.dependencies[0].context = context_handle[1];
   1788 	ibs_request.dependencies[0].ip_instance = 0;
   1789 	ibs_request.dependencies[0].ring = 0;
   1790 	ibs_request.dependencies[0].fence = seq_no;
   1791 
   1792 
   1793 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
   1794 	CU_ASSERT_EQUAL(r, 0);
   1795 
   1796 
   1797 	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
   1798 	fence_status.context = context_handle[0];
   1799 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
   1800 	fence_status.ip_instance = 0;
   1801 	fence_status.ring = 0;
   1802 	fence_status.fence = ibs_request.seq_no;
   1803 
   1804 	r = amdgpu_cs_query_fence_status(&fence_status,
   1805 		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
   1806 	CU_ASSERT_EQUAL(r, 0);
   1807 
   1808 	/* Expect the second command to wait for shader to complete */
   1809 	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
   1810 
   1811 	r = amdgpu_bo_list_destroy(bo_list);
   1812 	CU_ASSERT_EQUAL(r, 0);
   1813 
   1814 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
   1815 				     ib_result_mc_address, 4096);
   1816 	CU_ASSERT_EQUAL(r, 0);
   1817 
   1818 	r = amdgpu_cs_ctx_free(context_handle[0]);
   1819 	CU_ASSERT_EQUAL(r, 0);
   1820 	r = amdgpu_cs_ctx_free(context_handle[1]);
   1821 	CU_ASSERT_EQUAL(r, 0);
   1822 
   1823 	free(ibs_request.dependencies);
   1824 }
   1825