1 /* 2 * Copyright (C) 2011 Francisco Jerez. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial 15 * portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 */ 26 27 #include <fcntl.h> 28 #include <stdio.h> 29 #include <sys/stat.h> 30 #include <inttypes.h> 31 #include "pipe/p_state.h" 32 #include "pipe/p_context.h" 33 #include "pipe/p_screen.h" 34 #include "pipe/p_defines.h" 35 #include "pipe/p_shader_tokens.h" 36 #include "util/u_memory.h" 37 #include "util/u_inlines.h" 38 #include "util/u_sampler.h" 39 #include "util/u_format.h" 40 #include "tgsi/tgsi_text.h" 41 #include "pipe-loader/pipe_loader.h" 42 43 #define MAX_RESOURCES 4 44 45 struct context { 46 struct pipe_loader_device *dev; 47 struct pipe_screen *screen; 48 struct pipe_context *pipe; 49 void *hwcs; 50 void *hwsmp[MAX_RESOURCES]; 51 struct pipe_resource *tex[MAX_RESOURCES]; 52 bool tex_rw[MAX_RESOURCES]; 53 struct pipe_sampler_view *view[MAX_RESOURCES]; 54 struct pipe_surface *surf[MAX_RESOURCES]; 55 }; 56 57 #define DUMP_COMPUTE_PARAM(p, c) do { \ 58 uint64_t __v[4]; \ 59 int __i, __n; \ 60 \ 61 __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \ 62 printf("%s: {", #c); \ 63 \ 64 for (__i = 0; __i < __n / sizeof(*__v); ++__i) \ 65 printf(" %"PRIu64, __v[__i]); \ 66 \ 67 printf(" }\n"); \ 68 } while (0) 69 70 static void init_ctx(struct context *ctx) 71 { 72 int ret; 73 74 ret = pipe_loader_probe(&ctx->dev, 1); 75 assert(ret); 76 77 ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR); 78 assert(ctx->screen); 79 80 ctx->pipe = ctx->screen->context_create(ctx->screen, NULL); 81 assert(ctx->pipe); 82 83 DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION); 84 DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE); 85 DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); 86 } 87 88 static void destroy_ctx(struct context *ctx) 89 { 90 ctx->pipe->destroy(ctx->pipe); 91 ctx->screen->destroy(ctx->screen); 92 pipe_loader_release(&ctx->dev, 1); 93 FREE(ctx); 94 } 95 96 static char * 97 preprocess_prog(struct context *ctx, const char *src, const char *defs) 98 { 99 const char header[] = 100 "#define RGLOBAL RES[32767]\n" 101 "#define RLOCAL RES[32766]\n" 102 "#define RPRIVATE RES[32765]\n" 103 "#define RINPUT RES[32764]\n"; 104 char cmd[512]; 105 char tmp[] = "/tmp/test-compute.tgsi-XXXXXX"; 106 char *buf; 107 int fd, ret; 108 struct stat st; 109 FILE *p; 110 111 /* Open a temporary file */ 112 fd = mkstemp(tmp); 113 assert(fd >= 0); 114 snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s", 115 defs ? defs : "", tmp); 116 117 /* Preprocess */ 118 p = popen(cmd, "w"); 119 fwrite(header, strlen(header), 1, p); 120 fwrite(src, strlen(src), 1, p); 121 ret = pclose(p); 122 assert(!ret); 123 124 /* Read back */ 125 ret = fstat(fd, &st); 126 assert(!ret); 127 128 buf = malloc(st.st_size + 1); 129 ret = read(fd, buf, st.st_size); 130 assert(ret == st.st_size); 131 buf[ret] = 0; 132 133 /* Clean up */ 134 close(fd); 135 unlink(tmp); 136 137 return buf; 138 } 139 140 static void init_prog(struct context *ctx, unsigned local_sz, 141 unsigned private_sz, unsigned input_sz, 142 const char *src, const char *defs) 143 { 144 struct pipe_context *pipe = ctx->pipe; 145 struct tgsi_token prog[1024]; 146 struct pipe_compute_state cs = { 147 .prog = prog, 148 .req_local_mem = local_sz, 149 .req_private_mem = private_sz, 150 .req_input_mem = input_sz 151 }; 152 char *psrc = preprocess_prog(ctx, src, defs); 153 int ret; 154 155 ret = tgsi_text_translate(psrc, prog, Elements(prog)); 156 assert(ret); 157 free(psrc); 158 159 ctx->hwcs = pipe->create_compute_state(pipe, &cs); 160 assert(ctx->hwcs); 161 162 pipe->bind_compute_state(pipe, ctx->hwcs); 163 } 164 165 static void destroy_prog(struct context *ctx) 166 { 167 struct pipe_context *pipe = ctx->pipe; 168 169 pipe->delete_compute_state(pipe, ctx->hwcs); 170 ctx->hwcs = NULL; 171 } 172 173 static void init_tex(struct context *ctx, int slot, 174 enum pipe_texture_target target, bool rw, 175 enum pipe_format format, int w, int h, 176 void (*init)(void *, int, int, int)) 177 { 178 struct pipe_context *pipe = ctx->pipe; 179 struct pipe_resource **tex = &ctx->tex[slot]; 180 struct pipe_resource ttex = { 181 .target = target, 182 .format = format, 183 .width0 = w, 184 .height0 = h, 185 .depth0 = 1, 186 .array_size = 1, 187 .bind = (PIPE_BIND_SAMPLER_VIEW | 188 PIPE_BIND_COMPUTE_RESOURCE | 189 PIPE_BIND_GLOBAL) 190 }; 191 int dx = util_format_get_blocksize(format); 192 int dy = util_format_get_stride(format, w); 193 int nx = (target == PIPE_BUFFER ? (w / dx) : 194 util_format_get_nblocksx(format, w)); 195 int ny = (target == PIPE_BUFFER ? 1 : 196 util_format_get_nblocksy(format, h)); 197 struct pipe_transfer *xfer; 198 char *map; 199 int x, y; 200 201 *tex = ctx->screen->resource_create(ctx->screen, &ttex); 202 assert(*tex); 203 204 xfer = pipe->get_transfer(pipe, *tex, 0, PIPE_TRANSFER_WRITE, 205 &(struct pipe_box) { .width = w, 206 .height = h, 207 .depth = 1 }); 208 assert(xfer); 209 210 map = pipe->transfer_map(pipe, xfer); 211 assert(map); 212 213 for (y = 0; y < ny; ++y) { 214 for (x = 0; x < nx; ++x) { 215 init(map + y * dy + x * dx, slot, x, y); 216 } 217 } 218 219 pipe->transfer_unmap(pipe, xfer); 220 pipe->transfer_destroy(pipe, xfer); 221 222 ctx->tex_rw[slot] = rw; 223 } 224 225 static bool default_check(void *x, void *y, int sz) { 226 return !memcmp(x, y, sz); 227 } 228 229 static void check_tex(struct context *ctx, int slot, 230 void (*expect)(void *, int, int, int), 231 bool (*check)(void *, void *, int)) 232 { 233 struct pipe_context *pipe = ctx->pipe; 234 struct pipe_resource *tex = ctx->tex[slot]; 235 int dx = util_format_get_blocksize(tex->format); 236 int dy = util_format_get_stride(tex->format, tex->width0); 237 int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) : 238 util_format_get_nblocksx(tex->format, tex->width0)); 239 int ny = (tex->target == PIPE_BUFFER ? 1 : 240 util_format_get_nblocksy(tex->format, tex->height0)); 241 struct pipe_transfer *xfer; 242 char *map; 243 int x, y, i; 244 int err = 0; 245 246 if (!check) 247 check = default_check; 248 249 xfer = pipe->get_transfer(pipe, tex, 0, PIPE_TRANSFER_READ, 250 &(struct pipe_box) { .width = tex->width0, 251 .height = tex->height0, 252 .depth = 1 }); 253 assert(xfer); 254 255 map = pipe->transfer_map(pipe, xfer); 256 assert(map); 257 258 for (y = 0; y < ny; ++y) { 259 for (x = 0; x < nx; ++x) { 260 uint32_t exp[4]; 261 uint32_t *res = (uint32_t *)(map + y * dy + x * dx); 262 263 expect(exp, slot, x, y); 264 if (check(res, exp, dx) || (++err) > 20) 265 continue; 266 267 if (dx < 4) { 268 uint32_t u = 0, v = 0; 269 270 for (i = 0; i < dx; i++) { 271 u |= ((uint8_t *)exp)[i] << (8 * i); 272 v |= ((uint8_t *)res)[i] << (8 * i); 273 } 274 printf("(%d, %d): got 0x%x, expected 0x%x\n", 275 x, y, v, u); 276 } else { 277 for (i = 0; i < dx / 4; i++) { 278 printf("(%d, %d)[%d]: got 0x%x/%f," 279 " expected 0x%x/%f\n", x, y, i, 280 res[i], ((float *)res)[i], 281 exp[i], ((float *)exp)[i]); 282 } 283 } 284 } 285 } 286 287 pipe->transfer_unmap(pipe, xfer); 288 pipe->transfer_destroy(pipe, xfer); 289 290 if (err) 291 printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err); 292 else 293 printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y); 294 } 295 296 static void destroy_tex(struct context *ctx) 297 { 298 int i; 299 300 for (i = 0; i < MAX_RESOURCES; ++i) { 301 if (ctx->tex[i]) 302 pipe_resource_reference(&ctx->tex[i], NULL); 303 } 304 } 305 306 static void init_sampler_views(struct context *ctx, const int *slots) 307 { 308 struct pipe_context *pipe = ctx->pipe; 309 struct pipe_sampler_view tview; 310 int i; 311 312 for (i = 0; *slots >= 0; ++i, ++slots) { 313 u_sampler_view_default_template(&tview, ctx->tex[*slots], 314 ctx->tex[*slots]->format); 315 316 ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots], 317 &tview); 318 assert(ctx->view[i]); 319 } 320 321 pipe->set_compute_sampler_views(pipe, 0, i, ctx->view); 322 } 323 324 static void destroy_sampler_views(struct context *ctx) 325 { 326 struct pipe_context *pipe = ctx->pipe; 327 int i; 328 329 pipe->set_compute_sampler_views(pipe, 0, MAX_RESOURCES, NULL); 330 331 for (i = 0; i < MAX_RESOURCES; ++i) { 332 if (ctx->view[i]) { 333 pipe->sampler_view_destroy(pipe, ctx->view[i]); 334 ctx->view[i] = NULL; 335 } 336 } 337 } 338 339 static void init_compute_resources(struct context *ctx, const int *slots) 340 { 341 struct pipe_context *pipe = ctx->pipe; 342 int i; 343 344 for (i = 0; *slots >= 0; ++i, ++slots) { 345 struct pipe_surface tsurf = { 346 .format = ctx->tex[*slots]->format, 347 .usage = ctx->tex[*slots]->bind, 348 .writable = ctx->tex_rw[*slots] 349 }; 350 351 if (ctx->tex[*slots]->target == PIPE_BUFFER) 352 tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1; 353 354 ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots], 355 &tsurf); 356 assert(ctx->surf[i]); 357 } 358 359 pipe->set_compute_resources(pipe, 0, i, ctx->surf); 360 } 361 362 static void destroy_compute_resources(struct context *ctx) 363 { 364 struct pipe_context *pipe = ctx->pipe; 365 int i; 366 367 pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL); 368 369 for (i = 0; i < MAX_RESOURCES; ++i) { 370 if (ctx->surf[i]) { 371 pipe->surface_destroy(pipe, ctx->surf[i]); 372 ctx->surf[i] = NULL; 373 } 374 } 375 } 376 377 static void init_sampler_states(struct context *ctx, int n) 378 { 379 struct pipe_context *pipe = ctx->pipe; 380 struct pipe_sampler_state smp = { 381 .normalized_coords = 1, 382 }; 383 int i; 384 385 for (i = 0; i < n; ++i) { 386 ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp); 387 assert(ctx->hwsmp[i]); 388 } 389 390 pipe->bind_compute_sampler_states(pipe, 0, i, ctx->hwsmp); 391 } 392 393 static void destroy_sampler_states(struct context *ctx) 394 { 395 struct pipe_context *pipe = ctx->pipe; 396 int i; 397 398 pipe->bind_compute_sampler_states(pipe, 0, MAX_RESOURCES, NULL); 399 400 for (i = 0; i < MAX_RESOURCES; ++i) { 401 if (ctx->hwsmp[i]) { 402 pipe->delete_sampler_state(pipe, ctx->hwsmp[i]); 403 ctx->hwsmp[i] = NULL; 404 } 405 } 406 } 407 408 static void init_globals(struct context *ctx, const int *slots, 409 uint32_t **handles) 410 { 411 struct pipe_context *pipe = ctx->pipe; 412 struct pipe_resource *res[MAX_RESOURCES]; 413 int i; 414 415 for (i = 0; *slots >= 0; ++i, ++slots) 416 res[i] = ctx->tex[*slots]; 417 418 pipe->set_global_binding(pipe, 0, i, res, handles); 419 } 420 421 static void destroy_globals(struct context *ctx) 422 { 423 struct pipe_context *pipe = ctx->pipe; 424 425 pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL); 426 } 427 428 static void launch_grid(struct context *ctx, const uint *block_layout, 429 const uint *grid_layout, uint32_t pc, 430 const void *input) 431 { 432 struct pipe_context *pipe = ctx->pipe; 433 434 pipe->launch_grid(pipe, block_layout, grid_layout, pc, input); 435 } 436 437 static void test_system_values(struct context *ctx) 438 { 439 const char *src = "COMP\n" 440 "DCL RES[0], BUFFER, RAW, WR\n" 441 "DCL SV[0], BLOCK_ID[0]\n" 442 "DCL SV[1], BLOCK_SIZE[0]\n" 443 "DCL SV[2], GRID_SIZE[0]\n" 444 "DCL SV[3], THREAD_ID[0]\n" 445 "DCL TEMP[0], LOCAL\n" 446 "DCL TEMP[1], LOCAL\n" 447 "IMM UINT32 { 64, 0, 0, 0 }\n" 448 "IMM UINT32 { 16, 0, 0, 0 }\n" 449 "IMM UINT32 { 0, 0, 0, 0 }\n" 450 "\n" 451 "BGNSUB" 452 " UMUL TEMP[0], SV[0], SV[1]\n" 453 " UADD TEMP[0], TEMP[0], SV[3]\n" 454 " UMUL TEMP[1], SV[1], SV[2]\n" 455 " UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n" 456 " UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n" 457 " UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n" 458 " UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n" 459 " UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n" 460 " UMUL TEMP[0].x, TEMP[0], IMM[0]\n" 461 " STORE RES[0].xyzw, TEMP[0], SV[0]\n" 462 " UADD TEMP[0].x, TEMP[0], IMM[1]\n" 463 " STORE RES[0].xyzw, TEMP[0], SV[1]\n" 464 " UADD TEMP[0].x, TEMP[0], IMM[1]\n" 465 " STORE RES[0].xyzw, TEMP[0], SV[2]\n" 466 " UADD TEMP[0].x, TEMP[0], IMM[1]\n" 467 " STORE RES[0].xyzw, TEMP[0], SV[3]\n" 468 " RET\n" 469 "ENDSUB\n"; 470 void init(void *p, int s, int x, int y) { 471 *(uint32_t *)p = 0xdeadbeef; 472 } 473 void expect(void *p, int s, int x, int y) { 474 int id = x / 16, sv = (x % 16) / 4, c = x % 4; 475 int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 }; 476 int bsz[] = { 4, 3, 5, 1}; 477 int gsz[] = { 5, 4, 1, 1}; 478 479 switch (sv) { 480 case 0: 481 *(uint32_t *)p = tid[c] / bsz[c]; 482 break; 483 case 1: 484 *(uint32_t *)p = bsz[c]; 485 break; 486 case 2: 487 *(uint32_t *)p = gsz[c]; 488 break; 489 case 3: 490 *(uint32_t *)p = tid[c] % bsz[c]; 491 break; 492 } 493 } 494 495 printf("- %s\n", __func__); 496 497 init_prog(ctx, 0, 0, 0, src, NULL); 498 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 499 76800, 0, init); 500 init_compute_resources(ctx, (int []) { 0, -1 }); 501 launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL); 502 check_tex(ctx, 0, expect, NULL); 503 destroy_compute_resources(ctx); 504 destroy_tex(ctx); 505 destroy_prog(ctx); 506 } 507 508 static void test_resource_access(struct context *ctx) 509 { 510 const char *src = "COMP\n" 511 "DCL RES[0], BUFFER, RAW, WR\n" 512 "DCL RES[1], 2D, RAW, WR\n" 513 "DCL SV[0], BLOCK_ID[0]\n" 514 "DCL TEMP[0], LOCAL\n" 515 "DCL TEMP[1], LOCAL\n" 516 "IMM UINT32 { 15, 0, 0, 0 }\n" 517 "IMM UINT32 { 16, 1, 0, 0 }\n" 518 "\n" 519 " BGNSUB\n" 520 " UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n" 521 " AND TEMP[0].x, TEMP[0], IMM[0]\n" 522 " UMUL TEMP[0].x, TEMP[0], IMM[1]\n" 523 " LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n" 524 " UMUL TEMP[1], SV[0], IMM[1]\n" 525 " STORE RES[1].xyzw, TEMP[1], TEMP[0]\n" 526 " RET\n" 527 " ENDSUB\n"; 528 void init0(void *p, int s, int x, int y) { 529 *(float *)p = 8.0 - (float)x; 530 } 531 void init1(void *p, int s, int x, int y) { 532 *(uint32_t *)p = 0xdeadbeef; 533 } 534 void expect(void *p, int s, int x, int y) { 535 *(float *)p = 8.0 - (float)((x + 4*y) & 0x3f); 536 } 537 538 printf("- %s\n", __func__); 539 540 init_prog(ctx, 0, 0, 0, src, NULL); 541 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 542 256, 0, init0); 543 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 544 60, 12, init1); 545 init_compute_resources(ctx, (int []) { 0, 1, -1 }); 546 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL); 547 check_tex(ctx, 1, expect, NULL); 548 destroy_compute_resources(ctx); 549 destroy_tex(ctx); 550 destroy_prog(ctx); 551 } 552 553 static void test_function_calls(struct context *ctx) 554 { 555 const char *src = "COMP\n" 556 "DCL RES[0], 2D, RAW, WR\n" 557 "DCL SV[0], BLOCK_ID[0]\n" 558 "DCL SV[1], BLOCK_SIZE[0]\n" 559 "DCL SV[2], GRID_SIZE[0]\n" 560 "DCL SV[3], THREAD_ID[0]\n" 561 "DCL TEMP[0]\n" 562 "DCL TEMP[1]\n" 563 "DCL TEMP[2], LOCAL\n" 564 "IMM UINT32 { 0, 11, 22, 33 }\n" 565 "IMM FLT32 { 11, 33, 55, 99 }\n" 566 "IMM UINT32 { 4, 1, 0, 0 }\n" 567 "IMM UINT32 { 12, 0, 0, 0 }\n" 568 "\n" 569 "00: BGNSUB\n" 570 "01: UMUL TEMP[0].x, TEMP[0], TEMP[0]\n" 571 "02: UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n" 572 "03: USLT TEMP[0].x, TEMP[0], IMM[0]\n" 573 "04: RET\n" 574 "05: ENDSUB\n" 575 "06: BGNSUB\n" 576 "07: UMUL TEMP[0].x, TEMP[0], TEMP[0]\n" 577 "08: UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n" 578 "09: USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n" 579 "10: IF TEMP[0].xxxx\n" 580 "11: CAL :0\n" 581 "12: ENDIF\n" 582 "13: RET\n" 583 "14: ENDSUB\n" 584 "15: BGNSUB\n" 585 "16: UMUL TEMP[2], SV[0], SV[1]\n" 586 "17: UADD TEMP[2], TEMP[2], SV[3]\n" 587 "18: UMUL TEMP[2], TEMP[2], IMM[2]\n" 588 "00: MOV TEMP[1].x, IMM[2].wwww\n" 589 "19: LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n" 590 "20: CAL :6\n" 591 "21: STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n" 592 "22: RET\n" 593 "23: ENDSUB\n"; 594 void init(void *p, int s, int x, int y) { 595 *(uint32_t *)p = 15 * y + x; 596 } 597 void expect(void *p, int s, int x, int y) { 598 *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ; 599 } 600 601 printf("- %s\n", __func__); 602 603 init_prog(ctx, 0, 0, 0, src, NULL); 604 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 605 15, 12, init); 606 init_compute_resources(ctx, (int []) { 0, -1 }); 607 launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL); 608 check_tex(ctx, 0, expect, NULL); 609 destroy_compute_resources(ctx); 610 destroy_tex(ctx); 611 destroy_prog(ctx); 612 } 613 614 static void test_input_global(struct context *ctx) 615 { 616 const char *src = "COMP\n" 617 "DCL SV[0], THREAD_ID[0]\n" 618 "DCL TEMP[0], LOCAL\n" 619 "DCL TEMP[1], LOCAL\n" 620 "IMM UINT32 { 8, 0, 0, 0 }\n" 621 "\n" 622 " BGNSUB\n" 623 " UMUL TEMP[0], SV[0], IMM[0]\n" 624 " LOAD TEMP[1].xy, RINPUT, TEMP[0]\n" 625 " LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n" 626 " UADD TEMP[1].x, TEMP[0], -TEMP[1]\n" 627 " STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n" 628 " RET\n" 629 " ENDSUB\n"; 630 void init(void *p, int s, int x, int y) { 631 *(uint32_t *)p = 0xdeadbeef; 632 } 633 void expect(void *p, int s, int x, int y) { 634 *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0); 635 } 636 uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004, 637 0x10005, 0x10006, 0x10007, 0x10008 }; 638 639 printf("- %s\n", __func__); 640 641 init_prog(ctx, 0, 0, 32, src, NULL); 642 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); 643 init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); 644 init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); 645 init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); 646 init_globals(ctx, (int []){ 0, 1, 2, 3, -1 }, 647 (uint32_t *[]){ &input[1], &input[3], 648 &input[5], &input[7] }); 649 launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input); 650 check_tex(ctx, 0, expect, NULL); 651 check_tex(ctx, 1, expect, NULL); 652 check_tex(ctx, 2, expect, NULL); 653 check_tex(ctx, 3, expect, NULL); 654 destroy_globals(ctx); 655 destroy_tex(ctx); 656 destroy_prog(ctx); 657 } 658 659 static void test_private(struct context *ctx) 660 { 661 const char *src = "COMP\n" 662 "DCL RES[0], BUFFER, RAW, WR\n" 663 "DCL SV[0], BLOCK_ID[0]\n" 664 "DCL SV[1], BLOCK_SIZE[0]\n" 665 "DCL SV[2], THREAD_ID[0]\n" 666 "DCL TEMP[0], LOCAL\n" 667 "DCL TEMP[1], LOCAL\n" 668 "DCL TEMP[2], LOCAL\n" 669 "IMM UINT32 { 128, 0, 0, 0 }\n" 670 "IMM UINT32 { 4, 0, 0, 0 }\n" 671 "\n" 672 " BGNSUB\n" 673 " UMUL TEMP[0].x, SV[0], SV[1]\n" 674 " UADD TEMP[0].x, TEMP[0], SV[2]\n" 675 " MOV TEMP[1].x, IMM[0].wwww\n" 676 " BGNLOOP\n" 677 " USEQ TEMP[2].x, TEMP[1], IMM[0]\n" 678 " IF TEMP[2]\n" 679 " BRK\n" 680 " ENDIF\n" 681 " UDIV TEMP[2].x, TEMP[1], IMM[1]\n" 682 " UADD TEMP[2].x, TEMP[2], TEMP[0]\n" 683 " STORE RPRIVATE.x, TEMP[1], TEMP[2]\n" 684 " UADD TEMP[1].x, TEMP[1], IMM[1]\n" 685 " ENDLOOP\n" 686 " MOV TEMP[1].x, IMM[0].wwww\n" 687 " UMUL TEMP[0].x, TEMP[0], IMM[0]\n" 688 " BGNLOOP\n" 689 " USEQ TEMP[2].x, TEMP[1], IMM[0]\n" 690 " IF TEMP[2]\n" 691 " BRK\n" 692 " ENDIF\n" 693 " LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n" 694 " STORE RES[0].x, TEMP[0], TEMP[2]\n" 695 " UADD TEMP[0].x, TEMP[0], IMM[1]\n" 696 " UADD TEMP[1].x, TEMP[1], IMM[1]\n" 697 " ENDLOOP\n" 698 " RET\n" 699 " ENDSUB\n"; 700 void init(void *p, int s, int x, int y) { 701 *(uint32_t *)p = 0xdeadbeef; 702 } 703 void expect(void *p, int s, int x, int y) { 704 *(uint32_t *)p = (x / 32) + x % 32; 705 } 706 707 printf("- %s\n", __func__); 708 709 init_prog(ctx, 0, 128, 0, src, NULL); 710 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 711 32768, 0, init); 712 init_compute_resources(ctx, (int []) { 0, -1 }); 713 launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL); 714 check_tex(ctx, 0, expect, NULL); 715 destroy_compute_resources(ctx); 716 destroy_tex(ctx); 717 destroy_prog(ctx); 718 } 719 720 static void test_local(struct context *ctx) 721 { 722 const char *src = "COMP\n" 723 "DCL RES[0], BUFFER, RAW, WR\n" 724 "DCL SV[0], BLOCK_ID[0]\n" 725 "DCL SV[1], BLOCK_SIZE[0]\n" 726 "DCL SV[2], THREAD_ID[0]\n" 727 "DCL TEMP[0], LOCAL\n" 728 "DCL TEMP[1], LOCAL\n" 729 "DCL TEMP[2], LOCAL\n" 730 "IMM UINT32 { 1, 0, 0, 0 }\n" 731 "IMM UINT32 { 2, 0, 0, 0 }\n" 732 "IMM UINT32 { 4, 0, 0, 0 }\n" 733 "IMM UINT32 { 32, 0, 0, 0 }\n" 734 "IMM UINT32 { 128, 0, 0, 0 }\n" 735 "\n" 736 " BGNSUB\n" 737 " UMUL TEMP[0].x, SV[2], IMM[2]\n" 738 " STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n" 739 " MFENCE RLOCAL\n" 740 " USLT TEMP[1].x, SV[2], IMM[3]\n" 741 " IF TEMP[1]\n" 742 " UADD TEMP[1].x, TEMP[0], IMM[4]\n" 743 " BGNLOOP\n" 744 " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" 745 " USEQ TEMP[2].x, TEMP[2], IMM[0]\n" 746 " IF TEMP[2]\n" 747 " BRK\n" 748 " ENDIF\n" 749 " ENDLOOP\n" 750 " STORE RLOCAL.x, TEMP[0], IMM[0]\n" 751 " MFENCE RLOCAL\n" 752 " BGNLOOP\n" 753 " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" 754 " USEQ TEMP[2].x, TEMP[2], IMM[1]\n" 755 " IF TEMP[2]\n" 756 " BRK\n" 757 " ENDIF\n" 758 " ENDLOOP\n" 759 " ELSE\n" 760 " UADD TEMP[1].x, TEMP[0], -IMM[4]\n" 761 " BGNLOOP\n" 762 " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" 763 " USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n" 764 " IF TEMP[2]\n" 765 " BRK\n" 766 " ENDIF\n" 767 " ENDLOOP\n" 768 " STORE RLOCAL.x, TEMP[0], IMM[0]\n" 769 " MFENCE RLOCAL\n" 770 " BGNLOOP\n" 771 " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" 772 " USEQ TEMP[2].x, TEMP[2], IMM[0]\n" 773 " IF TEMP[2]\n" 774 " BRK\n" 775 " ENDIF\n" 776 " ENDLOOP\n" 777 " STORE RLOCAL.x, TEMP[0], IMM[1]\n" 778 " MFENCE RLOCAL\n" 779 " ENDIF\n" 780 " UMUL TEMP[1].x, SV[0], SV[1]\n" 781 " UMUL TEMP[1].x, TEMP[1], IMM[2]\n" 782 " UADD TEMP[1].x, TEMP[1], TEMP[0]\n" 783 " LOAD TEMP[0].x, RLOCAL, TEMP[0]\n" 784 " STORE RES[0].x, TEMP[1], TEMP[0]\n" 785 " RET\n" 786 " ENDSUB\n"; 787 void init(void *p, int s, int x, int y) { 788 *(uint32_t *)p = 0xdeadbeef; 789 } 790 void expect(void *p, int s, int x, int y) { 791 *(uint32_t *)p = x & 0x20 ? 2 : 1; 792 } 793 794 printf("- %s\n", __func__); 795 796 init_prog(ctx, 256, 0, 0, src, NULL); 797 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 798 4096, 0, init); 799 init_compute_resources(ctx, (int []) { 0, -1 }); 800 launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); 801 check_tex(ctx, 0, expect, NULL); 802 destroy_compute_resources(ctx); 803 destroy_tex(ctx); 804 destroy_prog(ctx); 805 } 806 807 static void test_sample(struct context *ctx) 808 { 809 const char *src = "COMP\n" 810 "DCL SVIEW[0], 2D, FLOAT\n" 811 "DCL RES[0], 2D, RAW, WR\n" 812 "DCL SAMP[0]\n" 813 "DCL SV[0], BLOCK_ID[0]\n" 814 "DCL TEMP[0], LOCAL\n" 815 "DCL TEMP[1], LOCAL\n" 816 "IMM UINT32 { 16, 1, 0, 0 }\n" 817 "IMM FLT32 { 128, 32, 0, 0 }\n" 818 "\n" 819 " BGNSUB\n" 820 " I2F TEMP[1], SV[0]\n" 821 " DIV TEMP[1], TEMP[1], IMM[1]\n" 822 " SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n" 823 " UMUL TEMP[0], SV[0], IMM[0]\n" 824 " STORE RES[0].xyzw, TEMP[0], TEMP[1]\n" 825 " RET\n" 826 " ENDSUB\n"; 827 void init(void *p, int s, int x, int y) { 828 *(float *)p = s ? 1 : x * y; 829 } 830 void expect(void *p, int s, int x, int y) { 831 switch (x % 4) { 832 case 0: 833 *(float *)p = x / 4 * y; 834 break; 835 case 1: 836 case 2: 837 *(float *)p = 0; 838 break; 839 case 3: 840 *(float *)p = 1; 841 break; 842 } 843 } 844 845 printf("- %s\n", __func__); 846 847 init_prog(ctx, 0, 0, 0, src, NULL); 848 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 849 128, 32, init); 850 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 851 512, 32, init); 852 init_compute_resources(ctx, (int []) { 1, -1 }); 853 init_sampler_views(ctx, (int []) { 0, -1 }); 854 init_sampler_states(ctx, 2); 855 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL); 856 check_tex(ctx, 1, expect, NULL); 857 destroy_sampler_states(ctx); 858 destroy_sampler_views(ctx); 859 destroy_compute_resources(ctx); 860 destroy_tex(ctx); 861 destroy_prog(ctx); 862 } 863 864 static void test_many_kern(struct context *ctx) 865 { 866 const char *src = "COMP\n" 867 "DCL RES[0], BUFFER, RAW, WR\n" 868 "DCL TEMP[0], LOCAL\n" 869 "IMM UINT32 { 0, 1, 2, 3 }\n" 870 "IMM UINT32 { 4, 0, 0, 0 }\n" 871 "\n" 872 " BGNSUB\n" 873 " UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n" 874 " STORE RES[0].x, TEMP[0], IMM[0].xxxx\n" 875 " RET\n" 876 " ENDSUB\n" 877 " BGNSUB\n" 878 " UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n" 879 " STORE RES[0].x, TEMP[0], IMM[0].yyyy\n" 880 " RET\n" 881 " ENDSUB\n" 882 " BGNSUB\n" 883 " UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n" 884 " STORE RES[0].x, TEMP[0], IMM[0].zzzz\n" 885 " RET\n" 886 " ENDSUB\n" 887 " BGNSUB\n" 888 " UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n" 889 " STORE RES[0].x, TEMP[0], IMM[0].wwww\n" 890 " RET\n" 891 " ENDSUB\n"; 892 void init(void *p, int s, int x, int y) { 893 *(uint32_t *)p = 0xdeadbeef; 894 } 895 void expect(void *p, int s, int x, int y) { 896 *(uint32_t *)p = x; 897 } 898 899 printf("- %s\n", __func__); 900 901 init_prog(ctx, 0, 0, 0, src, NULL); 902 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 903 16, 0, init); 904 init_compute_resources(ctx, (int []) { 0, -1 }); 905 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL); 906 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL); 907 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL); 908 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL); 909 check_tex(ctx, 0, expect, NULL); 910 destroy_compute_resources(ctx); 911 destroy_tex(ctx); 912 destroy_prog(ctx); 913 } 914 915 static void test_constant(struct context *ctx) 916 { 917 const char *src = "COMP\n" 918 "DCL RES[0], BUFFER, RAW\n" 919 "DCL RES[1], BUFFER, RAW, WR\n" 920 "DCL SV[0], BLOCK_ID[0]\n" 921 "DCL TEMP[0], LOCAL\n" 922 "DCL TEMP[1], LOCAL\n" 923 "IMM UINT32 { 4, 0, 0, 0 }\n" 924 "\n" 925 " BGNSUB\n" 926 " UMUL TEMP[0].x, SV[0], IMM[0]\n" 927 " LOAD TEMP[1].x, RES[0], TEMP[0]\n" 928 " STORE RES[1].x, TEMP[0], TEMP[1]\n" 929 " RET\n" 930 " ENDSUB\n"; 931 void init(void *p, int s, int x, int y) { 932 *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x; 933 } 934 void expect(void *p, int s, int x, int y) { 935 *(float *)p = 8.0 - (float)x; 936 } 937 938 printf("- %s\n", __func__); 939 940 init_prog(ctx, 0, 0, 0, src, NULL); 941 init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, 942 256, 0, init); 943 init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 944 256, 0, init); 945 init_compute_resources(ctx, (int []) { 0, 1, -1 }); 946 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL); 947 check_tex(ctx, 1, expect, NULL); 948 destroy_compute_resources(ctx); 949 destroy_tex(ctx); 950 destroy_prog(ctx); 951 } 952 953 static void test_resource_indirect(struct context *ctx) 954 { 955 const char *src = "COMP\n" 956 "DCL RES[0], BUFFER, RAW, WR\n" 957 "DCL RES[1..3], BUFFER, RAW\n" 958 "DCL SV[0], BLOCK_ID[0]\n" 959 "DCL TEMP[0], LOCAL\n" 960 "DCL TEMP[1], LOCAL\n" 961 "IMM UINT32 { 4, 0, 0, 0 }\n" 962 "\n" 963 " BGNSUB\n" 964 " UMUL TEMP[0].x, SV[0], IMM[0]\n" 965 " LOAD TEMP[1].x, RES[1], TEMP[0]\n" 966 " LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n" 967 " STORE RES[0].x, TEMP[0], TEMP[1]\n" 968 " RET\n" 969 " ENDSUB\n"; 970 void init(void *p, int s, int x, int y) { 971 *(uint32_t *)p = s == 0 ? 0xdeadbeef : 972 s == 1 ? x % 2 : 973 s == 2 ? 2 * x : 974 2 * x + 1; 975 } 976 void expect(void *p, int s, int x, int y) { 977 *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0); 978 } 979 980 printf("- %s\n", __func__); 981 982 init_prog(ctx, 0, 0, 0, src, NULL); 983 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 984 256, 0, init); 985 init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, 986 256, 0, init); 987 init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, 988 256, 0, init); 989 init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, 990 256, 0, init); 991 init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 }); 992 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL); 993 check_tex(ctx, 0, expect, NULL); 994 destroy_compute_resources(ctx); 995 destroy_tex(ctx); 996 destroy_prog(ctx); 997 } 998 999 enum pipe_format surface_fmts[] = { 1000 PIPE_FORMAT_B8G8R8A8_UNORM, 1001 PIPE_FORMAT_B8G8R8X8_UNORM, 1002 PIPE_FORMAT_A8R8G8B8_UNORM, 1003 PIPE_FORMAT_X8R8G8B8_UNORM, 1004 PIPE_FORMAT_X8R8G8B8_UNORM, 1005 PIPE_FORMAT_L8_UNORM, 1006 PIPE_FORMAT_A8_UNORM, 1007 PIPE_FORMAT_I8_UNORM, 1008 PIPE_FORMAT_L8A8_UNORM, 1009 PIPE_FORMAT_R32_FLOAT, 1010 PIPE_FORMAT_R32G32_FLOAT, 1011 PIPE_FORMAT_R32G32B32A32_FLOAT, 1012 PIPE_FORMAT_R32_UNORM, 1013 PIPE_FORMAT_R32G32_UNORM, 1014 PIPE_FORMAT_R32G32B32A32_UNORM, 1015 PIPE_FORMAT_R32_SNORM, 1016 PIPE_FORMAT_R32G32_SNORM, 1017 PIPE_FORMAT_R32G32B32A32_SNORM, 1018 PIPE_FORMAT_R8_UINT, 1019 PIPE_FORMAT_R8G8_UINT, 1020 PIPE_FORMAT_R8G8B8A8_UINT, 1021 PIPE_FORMAT_R8_SINT, 1022 PIPE_FORMAT_R8G8_SINT, 1023 PIPE_FORMAT_R8G8B8A8_SINT, 1024 PIPE_FORMAT_R32_UINT, 1025 PIPE_FORMAT_R32G32_UINT, 1026 PIPE_FORMAT_R32G32B32A32_UINT, 1027 PIPE_FORMAT_R32_SINT, 1028 PIPE_FORMAT_R32G32_SINT, 1029 PIPE_FORMAT_R32G32B32A32_SINT 1030 }; 1031 1032 static void test_surface_ld(struct context *ctx) 1033 { 1034 const char *src = "COMP\n" 1035 "DCL RES[0], 2D\n" 1036 "DCL RES[1], 2D, RAW, WR\n" 1037 "DCL SV[0], BLOCK_ID[0]\n" 1038 "DCL TEMP[0], LOCAL\n" 1039 "DCL TEMP[1], LOCAL\n" 1040 "IMM UINT32 { 16, 1, 0, 0 }\n" 1041 "\n" 1042 " BGNSUB\n" 1043 " LOAD TEMP[1], RES[0], SV[0]\n" 1044 " UMUL TEMP[0], SV[0], IMM[0]\n" 1045 " STORE RES[1].xyzw, TEMP[0], TEMP[1]\n" 1046 " RET\n" 1047 " ENDSUB\n"; 1048 int i = 0; 1049 void init0f(void *p, int s, int x, int y) { 1050 float v[] = { 1.0, -.75, .50, -.25 }; 1051 util_format_write_4f(surface_fmts[i], v, 0, 1052 p, 0, 0, 0, 1, 1); 1053 } 1054 void init0i(void *p, int s, int x, int y) { 1055 int v[] = { 0xffffffff, 0xffff, 0xff, 0xf }; 1056 util_format_write_4i(surface_fmts[i], v, 0, 1057 p, 0, 0, 0, 1, 1); 1058 } 1059 void init1(void *p, int s, int x, int y) { 1060 *(uint32_t *)p = 0xdeadbeef; 1061 } 1062 void expectf(void *p, int s, int x, int y) { 1063 float v[4], w[4]; 1064 init0f(v, s, x / 4, y); 1065 util_format_read_4f(surface_fmts[i], w, 0, 1066 v, 0, 0, 0, 1, 1); 1067 *(float *)p = w[x % 4]; 1068 } 1069 void expecti(void *p, int s, int x, int y) { 1070 int32_t v[4], w[4]; 1071 init0i(v, s, x / 4, y); 1072 util_format_read_4i(surface_fmts[i], w, 0, 1073 v, 0, 0, 0, 1, 1); 1074 *(uint32_t *)p = w[x % 4]; 1075 } 1076 1077 printf("- %s\n", __func__); 1078 1079 init_prog(ctx, 0, 0, 0, src, NULL); 1080 1081 for (i = 0; i < Elements(surface_fmts); i++) { 1082 bool is_int = util_format_is_pure_integer(surface_fmts[i]); 1083 1084 printf(" - %s\n", util_format_name(surface_fmts[i])); 1085 1086 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i], 1087 128, 32, (is_int ? init0i : init0f)); 1088 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 1089 512, 32, init1); 1090 init_compute_resources(ctx, (int []) { 0, 1, -1 }); 1091 init_sampler_states(ctx, 2); 1092 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, 1093 NULL); 1094 check_tex(ctx, 1, (is_int ? expecti : expectf), NULL); 1095 destroy_sampler_states(ctx); 1096 destroy_compute_resources(ctx); 1097 destroy_tex(ctx); 1098 } 1099 1100 destroy_prog(ctx); 1101 } 1102 1103 static void test_surface_st(struct context *ctx) 1104 { 1105 const char *src = "COMP\n" 1106 "DCL RES[0], 2D, RAW\n" 1107 "DCL RES[1], 2D, WR\n" 1108 "DCL SV[0], BLOCK_ID[0]\n" 1109 "DCL TEMP[0], LOCAL\n" 1110 "DCL TEMP[1], LOCAL\n" 1111 "IMM UINT32 { 16, 1, 0, 0 }\n" 1112 "\n" 1113 " BGNSUB\n" 1114 " UMUL TEMP[0], SV[0], IMM[0]\n" 1115 " LOAD TEMP[1], RES[0], TEMP[0]\n" 1116 " STORE RES[1], SV[0], TEMP[1]\n" 1117 " RET\n" 1118 " ENDSUB\n"; 1119 int i = 0; 1120 void init0f(void *p, int s, int x, int y) { 1121 float v[] = { 1.0, -.75, 0.5, -.25 }; 1122 *(float *)p = v[x % 4]; 1123 } 1124 void init0i(void *p, int s, int x, int y) { 1125 int v[] = { 0xffffffff, 0xffff, 0xff, 0xf }; 1126 *(int32_t *)p = v[x % 4]; 1127 } 1128 void init1(void *p, int s, int x, int y) { 1129 memset(p, 1, util_format_get_blocksize(surface_fmts[i])); 1130 } 1131 void expectf(void *p, int s, int x, int y) { 1132 float vf[4]; 1133 int j; 1134 1135 for (j = 0; j < 4; j++) 1136 init0f(&vf[j], s, 4 * x + j, y); 1137 util_format_write_4f(surface_fmts[i], vf, 0, 1138 p, 0, 0, 0, 1, 1); 1139 } 1140 void expects(void *p, int s, int x, int y) { 1141 int32_t v[4]; 1142 int j; 1143 1144 for (j = 0; j < 4; j++) 1145 init0i(&v[j], s, 4 * x + j, y); 1146 util_format_write_4i(surface_fmts[i], v, 0, 1147 p, 0, 0, 0, 1, 1); 1148 } 1149 void expectu(void *p, int s, int x, int y) { 1150 uint32_t v[4]; 1151 int j; 1152 1153 for (j = 0; j < 4; j++) 1154 init0i(&v[j], s, 4 * x + j, y); 1155 util_format_write_4ui(surface_fmts[i], v, 0, 1156 p, 0, 0, 0, 1, 1); 1157 } 1158 bool check(void *x, void *y, int sz) { 1159 int j; 1160 1161 if (util_format_is_float(surface_fmts[i])) { 1162 return fabs(*(float *)x - *(float *)y) < 3.92156863e-3; 1163 1164 } else if ((sz % 4) == 0) { 1165 for (j = 0; j < sz / 4; j++) 1166 if (abs(((uint32_t *)x)[j] - 1167 ((uint32_t *)y)[j]) > 1) 1168 return false; 1169 return true; 1170 } else { 1171 return !memcmp(x, y, sz); 1172 } 1173 } 1174 1175 printf("- %s\n", __func__); 1176 1177 init_prog(ctx, 0, 0, 0, src, NULL); 1178 1179 for (i = 0; i < Elements(surface_fmts); i++) { 1180 bool is_signed = (util_format_description(surface_fmts[i]) 1181 ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED); 1182 bool is_int = util_format_is_pure_integer(surface_fmts[i]); 1183 1184 printf(" - %s\n", util_format_name(surface_fmts[i])); 1185 1186 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 1187 512, 32, (is_int ? init0i : init0f)); 1188 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i], 1189 128, 32, init1); 1190 init_compute_resources(ctx, (int []) { 0, 1, -1 }); 1191 init_sampler_states(ctx, 2); 1192 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, 1193 NULL); 1194 check_tex(ctx, 1, (is_int && is_signed ? expects : 1195 is_int && !is_signed ? expectu : 1196 expectf), check); 1197 destroy_sampler_states(ctx); 1198 destroy_compute_resources(ctx); 1199 destroy_tex(ctx); 1200 } 1201 1202 destroy_prog(ctx); 1203 } 1204 1205 static void test_barrier(struct context *ctx) 1206 { 1207 const char *src = "COMP\n" 1208 "DCL RES[0], BUFFER, RAW, WR\n" 1209 "DCL SV[0], BLOCK_ID[0]\n" 1210 "DCL SV[1], BLOCK_SIZE[0]\n" 1211 "DCL SV[2], THREAD_ID[0]\n" 1212 "DCL TEMP[0], LOCAL\n" 1213 "DCL TEMP[1], LOCAL\n" 1214 "DCL TEMP[2], LOCAL\n" 1215 "DCL TEMP[3], LOCAL\n" 1216 "IMM UINT32 { 1, 0, 0, 0 }\n" 1217 "IMM UINT32 { 4, 0, 0, 0 }\n" 1218 "IMM UINT32 { 32, 0, 0, 0 }\n" 1219 "\n" 1220 " BGNSUB\n" 1221 " UMUL TEMP[0].x, SV[2], IMM[1]\n" 1222 " MOV TEMP[1].x, IMM[0].wwww\n" 1223 " BGNLOOP\n" 1224 " BARRIER\n" 1225 " STORE RLOCAL.x, TEMP[0], TEMP[1]\n" 1226 " BARRIER\n" 1227 " MOV TEMP[2].x, IMM[0].wwww\n" 1228 " BGNLOOP\n" 1229 " UMUL TEMP[3].x, TEMP[2], IMM[1]\n" 1230 " LOAD TEMP[3].x, RLOCAL, TEMP[3]\n" 1231 " USNE TEMP[3].x, TEMP[3], TEMP[1]\n" 1232 " IF TEMP[3]\n" 1233 " END\n" 1234 " ENDIF\n" 1235 " UADD TEMP[2].x, TEMP[2], IMM[0]\n" 1236 " USEQ TEMP[3].x, TEMP[2], SV[1]\n" 1237 " IF TEMP[3]\n" 1238 " BRK\n" 1239 " ENDIF\n" 1240 " ENDLOOP\n" 1241 " UADD TEMP[1].x, TEMP[1], IMM[0]\n" 1242 " USEQ TEMP[2].x, TEMP[1], IMM[2]\n" 1243 " IF TEMP[2]\n" 1244 " BRK\n" 1245 " ENDIF\n" 1246 " ENDLOOP\n" 1247 " UMUL TEMP[1].x, SV[0], SV[1]\n" 1248 " UMUL TEMP[1].x, TEMP[1], IMM[1]\n" 1249 " UADD TEMP[1].x, TEMP[1], TEMP[0]\n" 1250 " LOAD TEMP[0].x, RLOCAL, TEMP[0]\n" 1251 " STORE RES[0].x, TEMP[1], TEMP[0]\n" 1252 " RET\n" 1253 " ENDSUB\n"; 1254 void init(void *p, int s, int x, int y) { 1255 *(uint32_t *)p = 0xdeadbeef; 1256 } 1257 void expect(void *p, int s, int x, int y) { 1258 *(uint32_t *)p = 31; 1259 } 1260 1261 printf("- %s\n", __func__); 1262 1263 init_prog(ctx, 256, 0, 0, src, NULL); 1264 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 1265 4096, 0, init); 1266 init_compute_resources(ctx, (int []) { 0, -1 }); 1267 launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); 1268 check_tex(ctx, 0, expect, NULL); 1269 destroy_compute_resources(ctx); 1270 destroy_tex(ctx); 1271 destroy_prog(ctx); 1272 } 1273 1274 static void test_atom_ops(struct context *ctx, bool global) 1275 { 1276 const char *src = "COMP\n" 1277 "#ifdef TARGET_GLOBAL\n" 1278 "#define target RES[0]\n" 1279 "#else\n" 1280 "#define target RLOCAL\n" 1281 "#endif\n" 1282 "" 1283 "DCL RES[0], BUFFER, RAW, WR\n" 1284 "#define threadid SV[0]\n" 1285 "DCL threadid, THREAD_ID[0]\n" 1286 "" 1287 "#define offset TEMP[0]\n" 1288 "DCL offset, LOCAL\n" 1289 "#define tmp TEMP[1]\n" 1290 "DCL tmp, LOCAL\n" 1291 "" 1292 "#define k0 IMM[0]\n" 1293 "IMM UINT32 { 0, 0, 0, 0 }\n" 1294 "#define k1 IMM[1]\n" 1295 "IMM UINT32 { 1, 0, 0, 0 }\n" 1296 "#define k2 IMM[2]\n" 1297 "IMM UINT32 { 2, 0, 0, 0 }\n" 1298 "#define k3 IMM[3]\n" 1299 "IMM UINT32 { 3, 0, 0, 0 }\n" 1300 "#define k4 IMM[4]\n" 1301 "IMM UINT32 { 4, 0, 0, 0 }\n" 1302 "#define k5 IMM[5]\n" 1303 "IMM UINT32 { 5, 0, 0, 0 }\n" 1304 "#define k6 IMM[6]\n" 1305 "IMM UINT32 { 6, 0, 0, 0 }\n" 1306 "#define k7 IMM[7]\n" 1307 "IMM UINT32 { 7, 0, 0, 0 }\n" 1308 "#define k8 IMM[8]\n" 1309 "IMM UINT32 { 8, 0, 0, 0 }\n" 1310 "#define k9 IMM[9]\n" 1311 "IMM UINT32 { 9, 0, 0, 0 }\n" 1312 "#define korig IMM[10].xxxx\n" 1313 "#define karg IMM[10].yyyy\n" 1314 "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n" 1315 "\n" 1316 " BGNSUB\n" 1317 " UMUL offset.x, threadid, k4\n" 1318 " STORE target.x, offset, korig\n" 1319 " USEQ tmp.x, threadid, k0\n" 1320 " IF tmp\n" 1321 " ATOMUADD tmp.x, target, offset, karg\n" 1322 " ATOMUADD tmp.x, target, offset, tmp\n" 1323 " ENDIF\n" 1324 " USEQ tmp.x, threadid, k1\n" 1325 " IF tmp\n" 1326 " ATOMXCHG tmp.x, target, offset, karg\n" 1327 " ATOMXCHG tmp.x, target, offset, tmp\n" 1328 " ENDIF\n" 1329 " USEQ tmp.x, threadid, k2\n" 1330 " IF tmp\n" 1331 " ATOMCAS tmp.x, target, offset, korig, karg\n" 1332 " ATOMCAS tmp.x, target, offset, tmp, k0\n" 1333 " ENDIF\n" 1334 " USEQ tmp.x, threadid, k3\n" 1335 " IF tmp\n" 1336 " ATOMAND tmp.x, target, offset, karg\n" 1337 " ATOMAND tmp.x, target, offset, tmp\n" 1338 " ENDIF\n" 1339 " USEQ tmp.x, threadid, k4\n" 1340 " IF tmp\n" 1341 " ATOMOR tmp.x, target, offset, karg\n" 1342 " ATOMOR tmp.x, target, offset, tmp\n" 1343 " ENDIF\n" 1344 " USEQ tmp.x, threadid, k5\n" 1345 " IF tmp\n" 1346 " ATOMXOR tmp.x, target, offset, karg\n" 1347 " ATOMXOR tmp.x, target, offset, tmp\n" 1348 " ENDIF\n" 1349 " USEQ tmp.x, threadid, k6\n" 1350 " IF tmp\n" 1351 " ATOMUMIN tmp.x, target, offset, karg\n" 1352 " ATOMUMIN tmp.x, target, offset, tmp\n" 1353 " ENDIF\n" 1354 " USEQ tmp.x, threadid, k7\n" 1355 " IF tmp\n" 1356 " ATOMUMAX tmp.x, target, offset, karg\n" 1357 " ATOMUMAX tmp.x, target, offset, tmp\n" 1358 " ENDIF\n" 1359 " USEQ tmp.x, threadid, k8\n" 1360 " IF tmp\n" 1361 " ATOMIMIN tmp.x, target, offset, karg\n" 1362 " ATOMIMIN tmp.x, target, offset, tmp\n" 1363 " ENDIF\n" 1364 " USEQ tmp.x, threadid, k9\n" 1365 " IF tmp\n" 1366 " ATOMIMAX tmp.x, target, offset, karg\n" 1367 " ATOMIMAX tmp.x, target, offset, tmp\n" 1368 " ENDIF\n" 1369 "#ifdef TARGET_LOCAL\n" 1370 " LOAD tmp.x, RLOCAL, offset\n" 1371 " STORE RES[0].x, offset, tmp\n" 1372 "#endif\n" 1373 " RET\n" 1374 " ENDSUB\n"; 1375 1376 void init(void *p, int s, int x, int y) { 1377 *(uint32_t *)p = 0xbad; 1378 } 1379 void expect(void *p, int s, int x, int y) { 1380 switch (x) { 1381 case 0: 1382 *(uint32_t *)p = 0xce6c8eef; 1383 break; 1384 case 1: 1385 *(uint32_t *)p = 0xdeadbeef; 1386 break; 1387 case 2: 1388 *(uint32_t *)p = 0x11111111; 1389 break; 1390 case 3: 1391 *(uint32_t *)p = 0x10011001; 1392 break; 1393 case 4: 1394 *(uint32_t *)p = 0xdfbdbfff; 1395 break; 1396 case 5: 1397 *(uint32_t *)p = 0x11111111; 1398 break; 1399 case 6: 1400 *(uint32_t *)p = 0x11111111; 1401 break; 1402 case 7: 1403 *(uint32_t *)p = 0xdeadbeef; 1404 break; 1405 case 8: 1406 *(uint32_t *)p = 0xdeadbeef; 1407 break; 1408 case 9: 1409 *(uint32_t *)p = 0x11111111; 1410 break; 1411 } 1412 } 1413 1414 printf("- %s (%s)\n", __func__, global ? "global" : "local"); 1415 1416 init_prog(ctx, 40, 0, 0, src, 1417 (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL")); 1418 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 1419 40, 0, init); 1420 init_compute_resources(ctx, (int []) { 0, -1 }); 1421 launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL); 1422 check_tex(ctx, 0, expect, NULL); 1423 destroy_compute_resources(ctx); 1424 destroy_tex(ctx); 1425 destroy_prog(ctx); 1426 } 1427 1428 static void test_atom_race(struct context *ctx, bool global) 1429 { 1430 const char *src = "COMP\n" 1431 "#ifdef TARGET_GLOBAL\n" 1432 "#define target RES[0]\n" 1433 "#else\n" 1434 "#define target RLOCAL\n" 1435 "#endif\n" 1436 "" 1437 "DCL RES[0], BUFFER, RAW, WR\n" 1438 "" 1439 "#define blockid SV[0]\n" 1440 "DCL blockid, BLOCK_ID[0]\n" 1441 "#define blocksz SV[1]\n" 1442 "DCL blocksz, BLOCK_SIZE[0]\n" 1443 "#define threadid SV[2]\n" 1444 "DCL threadid, THREAD_ID[0]\n" 1445 "" 1446 "#define offset TEMP[0]\n" 1447 "DCL offset, LOCAL\n" 1448 "#define arg TEMP[1]\n" 1449 "DCL arg, LOCAL\n" 1450 "#define count TEMP[2]\n" 1451 "DCL count, LOCAL\n" 1452 "#define vlocal TEMP[3]\n" 1453 "DCL vlocal, LOCAL\n" 1454 "#define vshared TEMP[4]\n" 1455 "DCL vshared, LOCAL\n" 1456 "#define last TEMP[5]\n" 1457 "DCL last, LOCAL\n" 1458 "#define tmp0 TEMP[6]\n" 1459 "DCL tmp0, LOCAL\n" 1460 "#define tmp1 TEMP[7]\n" 1461 "DCL tmp1, LOCAL\n" 1462 "" 1463 "#define k0 IMM[0]\n" 1464 "IMM UINT32 { 0, 0, 0, 0 }\n" 1465 "#define k1 IMM[1]\n" 1466 "IMM UINT32 { 1, 0, 0, 0 }\n" 1467 "#define k4 IMM[2]\n" 1468 "IMM UINT32 { 4, 0, 0, 0 }\n" 1469 "#define k32 IMM[3]\n" 1470 "IMM UINT32 { 32, 0, 0, 0 }\n" 1471 "#define k128 IMM[4]\n" 1472 "IMM UINT32 { 128, 0, 0, 0 }\n" 1473 "#define kdeadcafe IMM[5]\n" 1474 "IMM UINT32 { 3735931646, 0, 0, 0 }\n" 1475 "#define kallowed_set IMM[6]\n" 1476 "IMM UINT32 { 559035650, 0, 0, 0 }\n" 1477 "#define k11111111 IMM[7]\n" 1478 "IMM UINT32 { 286331153, 0, 0, 0 }\n" 1479 "\n" 1480 " BGNSUB\n" 1481 " MOV offset.x, threadid\n" 1482 "#ifdef TARGET_GLOBAL\n" 1483 " UMUL tmp0.x, blockid, blocksz\n" 1484 " UADD offset.x, offset, tmp0\n" 1485 "#endif\n" 1486 " UMUL offset.x, offset, k4\n" 1487 " USLT tmp0.x, threadid, k32\n" 1488 " STORE target.x, offset, k0\n" 1489 " BARRIER\n" 1490 " IF tmp0\n" 1491 " MOV vlocal.x, k0\n" 1492 " MOV arg.x, kdeadcafe\n" 1493 " BGNLOOP\n" 1494 " INEG arg.x, arg\n" 1495 " ATOMUADD vshared.x, target, offset, arg\n" 1496 " SFENCE target\n" 1497 " USNE tmp0.x, vshared, vlocal\n" 1498 " IF tmp0\n" 1499 " BRK\n" 1500 " ENDIF\n" 1501 " UADD vlocal.x, vlocal, arg\n" 1502 " ENDLOOP\n" 1503 " UADD vlocal.x, vshared, arg\n" 1504 " LOAD vshared.x, target, offset\n" 1505 " USEQ tmp0.x, vshared, vlocal\n" 1506 " STORE target.x, offset, tmp0\n" 1507 " ELSE\n" 1508 " UADD offset.x, offset, -k128\n" 1509 " MOV count.x, k0\n" 1510 " MOV last.x, k0\n" 1511 " BGNLOOP\n" 1512 " LOAD vshared.x, target, offset\n" 1513 " USEQ tmp0.x, vshared, kallowed_set.xxxx\n" 1514 " USEQ tmp1.x, vshared, kallowed_set.yyyy\n" 1515 " OR tmp0.x, tmp0, tmp1\n" 1516 " IF tmp0\n" 1517 " USEQ tmp0.x, vshared, last\n" 1518 " IF tmp0\n" 1519 " CONT\n" 1520 " ENDIF\n" 1521 " MOV last.x, vshared\n" 1522 " ELSE\n" 1523 " END\n" 1524 " ENDIF\n" 1525 " UADD count.x, count, k1\n" 1526 " USEQ tmp0.x, count, k128\n" 1527 " IF tmp0\n" 1528 " BRK\n" 1529 " ENDIF\n" 1530 " ENDLOOP\n" 1531 " ATOMXCHG tmp0.x, target, offset, k11111111\n" 1532 " UADD offset.x, offset, k128\n" 1533 " ATOMXCHG tmp0.x, target, offset, k11111111\n" 1534 " SFENCE target\n" 1535 " ENDIF\n" 1536 "#ifdef TARGET_LOCAL\n" 1537 " LOAD tmp0.x, RLOCAL, offset\n" 1538 " UMUL tmp1.x, blockid, blocksz\n" 1539 " UMUL tmp1.x, tmp1, k4\n" 1540 " UADD offset.x, offset, tmp1\n" 1541 " STORE RES[0].x, offset, tmp0\n" 1542 "#endif\n" 1543 " RET\n" 1544 " ENDSUB\n"; 1545 1546 void init(void *p, int s, int x, int y) { 1547 *(uint32_t *)p = 0xdeadbeef; 1548 } 1549 void expect(void *p, int s, int x, int y) { 1550 *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff; 1551 } 1552 1553 printf("- %s (%s)\n", __func__, global ? "global" : "local"); 1554 1555 init_prog(ctx, 256, 0, 0, src, 1556 (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL")); 1557 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 1558 4096, 0, init); 1559 init_compute_resources(ctx, (int []) { 0, -1 }); 1560 launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); 1561 check_tex(ctx, 0, expect, NULL); 1562 destroy_compute_resources(ctx); 1563 destroy_tex(ctx); 1564 destroy_prog(ctx); 1565 } 1566 1567 int main(int argc, char *argv[]) 1568 { 1569 struct context *ctx = CALLOC_STRUCT(context); 1570 1571 init_ctx(ctx); 1572 test_system_values(ctx); 1573 test_resource_access(ctx); 1574 test_function_calls(ctx); 1575 test_input_global(ctx); 1576 test_private(ctx); 1577 test_local(ctx); 1578 test_sample(ctx); 1579 test_many_kern(ctx); 1580 test_constant(ctx); 1581 test_resource_indirect(ctx); 1582 test_surface_ld(ctx); 1583 test_surface_st(ctx); 1584 test_barrier(ctx); 1585 test_atom_ops(ctx, true); 1586 test_atom_race(ctx, true); 1587 test_atom_ops(ctx, false); 1588 test_atom_race(ctx, false); 1589 destroy_ctx(ctx); 1590 1591 return 0; 1592 } 1593