1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 */ 24 25 /* This file implements randomized SDMA texture blit tests. */ 26 27 #include "r600_pipe_common.h" 28 #include "util/u_surface.h" 29 30 static uint64_t seed_xorshift128plus[2]; 31 32 /* Super fast random number generator. 33 * 34 * This rand_xorshift128plus function by Sebastiano Vigna belongs 35 * to the public domain. 36 */ 37 static uint64_t rand_xorshift128plus(void) 38 { 39 uint64_t *s = seed_xorshift128plus; 40 41 uint64_t s1 = s[0]; 42 const uint64_t s0 = s[1]; 43 s[0] = s0; 44 s1 ^= s1 << 23; 45 s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); 46 return s[1] + s0; 47 } 48 49 #define RAND_NUM_SIZE 8 50 51 /* The GPU blits are emulated on the CPU using these CPU textures. */ 52 53 struct cpu_texture { 54 uint8_t *ptr; 55 uint64_t size; 56 uint64_t layer_stride; 57 unsigned stride; 58 }; 59 60 static void alloc_cpu_texture(struct cpu_texture *tex, 61 struct pipe_resource *templ, int bpp) 62 { 63 tex->stride = align(templ->width0 * bpp, RAND_NUM_SIZE); 64 tex->layer_stride = (uint64_t)tex->stride * templ->height0; 65 tex->size = tex->layer_stride * templ->array_size; 66 tex->ptr = malloc(tex->size); 67 assert(tex->ptr); 68 } 69 70 static void set_random_pixels(struct pipe_context *ctx, 71 struct pipe_resource *tex, 72 struct cpu_texture *cpu) 73 { 74 struct pipe_transfer *t; 75 uint8_t *map; 76 int x,y,z; 77 78 map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_WRITE, 79 0, 0, 0, tex->width0, tex->height0, 80 tex->array_size, &t); 81 assert(map); 82 83 for (z = 0; z < tex->array_size; z++) { 84 for (y = 0; y < tex->height0; y++) { 85 uint64_t *ptr = (uint64_t*) 86 (map + t->layer_stride*z + t->stride*y); 87 uint64_t *ptr_cpu = (uint64_t*) 88 (cpu->ptr + cpu->layer_stride*z + cpu->stride*y); 89 unsigned size = cpu->stride / RAND_NUM_SIZE; 90 91 assert(t->stride % RAND_NUM_SIZE == 0); 92 assert(cpu->stride % RAND_NUM_SIZE == 0); 93 94 for (x = 0; x < size; x++) 95 *ptr++ = *ptr_cpu++ = rand_xorshift128plus(); 96 } 97 } 98 99 pipe_transfer_unmap(ctx, t); 100 } 101 102 static bool compare_textures(struct pipe_context *ctx, 103 struct pipe_resource *tex, 104 struct cpu_texture *cpu, int bpp) 105 { 106 struct pipe_transfer *t; 107 uint8_t *map; 108 int y,z; 109 bool pass = true; 110 111 map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_READ, 112 0, 0, 0, tex->width0, tex->height0, 113 tex->array_size, &t); 114 assert(map); 115 116 for (z = 0; z < tex->array_size; z++) { 117 for (y = 0; y < tex->height0; y++) { 118 uint8_t *ptr = map + t->layer_stride*z + t->stride*y; 119 uint8_t *cpu_ptr = cpu->ptr + 120 cpu->layer_stride*z + cpu->stride*y; 121 122 if (memcmp(ptr, cpu_ptr, tex->width0 * bpp)) { 123 pass = false; 124 goto done; 125 } 126 } 127 } 128 done: 129 pipe_transfer_unmap(ctx, t); 130 return pass; 131 } 132 133 static enum pipe_format get_format_from_bpp(int bpp) 134 { 135 switch (bpp) { 136 case 1: 137 return PIPE_FORMAT_R8_UINT; 138 case 2: 139 return PIPE_FORMAT_R16_UINT; 140 case 4: 141 return PIPE_FORMAT_R32_UINT; 142 case 8: 143 return PIPE_FORMAT_R32G32_UINT; 144 case 16: 145 return PIPE_FORMAT_R32G32B32A32_UINT; 146 default: 147 assert(0); 148 return PIPE_FORMAT_NONE; 149 } 150 } 151 152 static const char *array_mode_to_string(unsigned mode) 153 { 154 switch (mode) { 155 case RADEON_SURF_MODE_LINEAR_ALIGNED: 156 return "LINEAR_ALIGNED"; 157 case RADEON_SURF_MODE_1D: 158 return "1D_TILED_THIN1"; 159 case RADEON_SURF_MODE_2D: 160 return "2D_TILED_THIN1"; 161 default: 162 assert(0); 163 return " UNKNOWN"; 164 } 165 } 166 167 static unsigned generate_max_tex_side(unsigned max_tex_side) 168 { 169 switch (rand() % 4) { 170 case 0: 171 /* Try to hit large sizes in 1/4 of the cases. */ 172 return max_tex_side; 173 case 1: 174 /* Try to hit 1D tiling in 1/4 of the cases. */ 175 return 128; 176 default: 177 /* Try to hit common sizes in 2/4 of the cases. */ 178 return 2048; 179 } 180 } 181 182 void r600_test_dma(struct r600_common_screen *rscreen) 183 { 184 struct pipe_screen *screen = &rscreen->b; 185 struct pipe_context *ctx = screen->context_create(screen, NULL, 0); 186 struct r600_common_context *rctx = (struct r600_common_context*)ctx; 187 uint64_t max_alloc_size; 188 unsigned i, iterations, num_partial_copies, max_levels, max_tex_side; 189 unsigned num_pass = 0, num_fail = 0; 190 191 max_levels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); 192 max_tex_side = 1 << (max_levels - 1); 193 194 /* Max 128 MB allowed for both textures. */ 195 max_alloc_size = 128 * 1024 * 1024; 196 197 /* the seed for random test parameters */ 198 srand(0x9b47d95b); 199 /* the seed for random pixel data */ 200 seed_xorshift128plus[0] = 0x3bffb83978e24f88; 201 seed_xorshift128plus[1] = 0x9238d5d56c71cd35; 202 203 iterations = 1000000000; /* just kill it when you are bored */ 204 num_partial_copies = 30; 205 206 /* These parameters are randomly generated per test: 207 * - whether to do one whole-surface copy or N partial copies per test 208 * - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D) 209 * - which texture dimensions to use 210 * - whether to use VRAM (all tiling modes) and GTT (staging, linear 211 * only) allocations 212 * - random initial pixels in src 213 * - generate random subrectangle copies for partial blits 214 */ 215 for (i = 0; i < iterations; i++) { 216 struct pipe_resource tsrc = {}, tdst = {}, *src, *dst; 217 struct r600_texture *rdst; 218 struct r600_texture *rsrc; 219 struct cpu_texture src_cpu, dst_cpu; 220 unsigned bpp, max_width, max_height, max_depth, j, num; 221 unsigned gfx_blits = 0, dma_blits = 0, max_tex_side_gen; 222 unsigned max_tex_layers; 223 bool pass; 224 bool do_partial_copies = rand() & 1; 225 226 /* generate a random test case */ 227 tsrc.target = tdst.target = PIPE_TEXTURE_2D_ARRAY; 228 tsrc.depth0 = tdst.depth0 = 1; 229 230 bpp = 1 << (rand() % 5); 231 tsrc.format = tdst.format = get_format_from_bpp(bpp); 232 233 max_tex_side_gen = generate_max_tex_side(max_tex_side); 234 max_tex_layers = rand() % 4 ? 1 : 5; 235 236 tsrc.width0 = (rand() % max_tex_side_gen) + 1; 237 tsrc.height0 = (rand() % max_tex_side_gen) + 1; 238 tsrc.array_size = (rand() % max_tex_layers) + 1; 239 240 /* Have a 1/4 chance of getting power-of-two dimensions. */ 241 if (rand() % 4 == 0) { 242 tsrc.width0 = util_next_power_of_two(tsrc.width0); 243 tsrc.height0 = util_next_power_of_two(tsrc.height0); 244 } 245 246 if (!do_partial_copies) { 247 /* whole-surface copies only, same dimensions */ 248 tdst = tsrc; 249 } else { 250 max_tex_side_gen = generate_max_tex_side(max_tex_side); 251 max_tex_layers = rand() % 4 ? 1 : 5; 252 253 /* many partial copies, dimensions can be different */ 254 tdst.width0 = (rand() % max_tex_side_gen) + 1; 255 tdst.height0 = (rand() % max_tex_side_gen) + 1; 256 tdst.array_size = (rand() % max_tex_layers) + 1; 257 258 /* Have a 1/4 chance of getting power-of-two dimensions. */ 259 if (rand() % 4 == 0) { 260 tdst.width0 = util_next_power_of_two(tdst.width0); 261 tdst.height0 = util_next_power_of_two(tdst.height0); 262 } 263 } 264 265 /* check texture sizes */ 266 if ((uint64_t)tsrc.width0 * tsrc.height0 * tsrc.array_size * bpp + 267 (uint64_t)tdst.width0 * tdst.height0 * tdst.array_size * bpp > 268 max_alloc_size) { 269 /* too large, try again */ 270 i--; 271 continue; 272 } 273 274 /* VRAM + the tiling mode depends on dimensions (3/4 of cases), 275 * or GTT + linear only (1/4 of cases) 276 */ 277 tsrc.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING; 278 tdst.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING; 279 280 /* Allocate textures (both the GPU and CPU copies). 281 * The CPU will emulate what the GPU should be doing. 282 */ 283 src = screen->resource_create(screen, &tsrc); 284 dst = screen->resource_create(screen, &tdst); 285 assert(src); 286 assert(dst); 287 rdst = (struct r600_texture*)dst; 288 rsrc = (struct r600_texture*)src; 289 alloc_cpu_texture(&src_cpu, &tsrc, bpp); 290 alloc_cpu_texture(&dst_cpu, &tdst, bpp); 291 292 printf("%4u: dst = (%5u x %5u x %u, %s), " 293 " src = (%5u x %5u x %u, %s), bpp = %2u, ", 294 i, tdst.width0, tdst.height0, tdst.array_size, 295 array_mode_to_string(rdst->surface.level[0].mode), 296 tsrc.width0, tsrc.height0, tsrc.array_size, 297 array_mode_to_string(rsrc->surface.level[0].mode), bpp); 298 fflush(stdout); 299 300 /* set src pixels */ 301 set_random_pixels(ctx, src, &src_cpu); 302 303 /* clear dst pixels */ 304 rctx->clear_buffer(ctx, dst, 0, rdst->surface.surf_size, 0, true); 305 memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size); 306 307 /* preparation */ 308 max_width = MIN2(tsrc.width0, tdst.width0); 309 max_height = MIN2(tsrc.height0, tdst.height0); 310 max_depth = MIN2(tsrc.array_size, tdst.array_size); 311 312 num = do_partial_copies ? num_partial_copies : 1; 313 for (j = 0; j < num; j++) { 314 int width, height, depth; 315 int srcx, srcy, srcz, dstx, dsty, dstz; 316 struct pipe_box box; 317 unsigned old_num_draw_calls = rctx->num_draw_calls; 318 unsigned old_num_dma_calls = rctx->num_dma_calls; 319 320 if (!do_partial_copies) { 321 /* copy whole src to dst */ 322 width = max_width; 323 height = max_height; 324 depth = max_depth; 325 326 srcx = srcy = srcz = dstx = dsty = dstz = 0; 327 } else { 328 /* random sub-rectangle copies from src to dst */ 329 depth = (rand() % max_depth) + 1; 330 srcz = rand() % (tsrc.array_size - depth + 1); 331 dstz = rand() % (tdst.array_size - depth + 1); 332 333 /* special code path to hit the tiled partial copies */ 334 if (!rsrc->surface.is_linear && 335 !rdst->surface.is_linear && 336 rand() & 1) { 337 if (max_width < 8 || max_height < 8) 338 continue; 339 width = ((rand() % (max_width / 8)) + 1) * 8; 340 height = ((rand() % (max_height / 8)) + 1) * 8; 341 342 srcx = rand() % (tsrc.width0 - width + 1) & ~0x7; 343 srcy = rand() % (tsrc.height0 - height + 1) & ~0x7; 344 345 dstx = rand() % (tdst.width0 - width + 1) & ~0x7; 346 dsty = rand() % (tdst.height0 - height + 1) & ~0x7; 347 } else { 348 /* just make sure that it doesn't divide by zero */ 349 assert(max_width > 0 && max_height > 0); 350 351 width = (rand() % max_width) + 1; 352 height = (rand() % max_height) + 1; 353 354 srcx = rand() % (tsrc.width0 - width + 1); 355 srcy = rand() % (tsrc.height0 - height + 1); 356 357 dstx = rand() % (tdst.width0 - width + 1); 358 dsty = rand() % (tdst.height0 - height + 1); 359 } 360 361 /* special code path to hit out-of-bounds reads in L2T */ 362 if (rsrc->surface.is_linear && 363 !rdst->surface.is_linear && 364 rand() % 4 == 0) { 365 srcx = 0; 366 srcy = 0; 367 srcz = 0; 368 } 369 } 370 371 /* GPU copy */ 372 u_box_3d(srcx, srcy, srcz, width, height, depth, &box); 373 rctx->dma_copy(ctx, dst, 0, dstx, dsty, dstz, src, 0, &box); 374 375 /* See which engine was used. */ 376 gfx_blits += rctx->num_draw_calls > old_num_draw_calls; 377 dma_blits += rctx->num_dma_calls > old_num_dma_calls; 378 379 /* CPU copy */ 380 util_copy_box(dst_cpu.ptr, tdst.format, dst_cpu.stride, 381 dst_cpu.layer_stride, 382 dstx, dsty, dstz, width, height, depth, 383 src_cpu.ptr, src_cpu.stride, 384 src_cpu.layer_stride, 385 srcx, srcy, srcz); 386 } 387 388 pass = compare_textures(ctx, dst, &dst_cpu, bpp); 389 if (pass) 390 num_pass++; 391 else 392 num_fail++; 393 394 printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n", 395 gfx_blits, dma_blits, pass ? "pass" : "fail", 396 num_pass, num_pass+num_fail); 397 398 /* cleanup */ 399 pipe_resource_reference(&src, NULL); 400 pipe_resource_reference(&dst, NULL); 401 free(src_cpu.ptr); 402 free(dst_cpu.ptr); 403 } 404 405 ctx->destroy(ctx); 406 exit(0); 407 } 408