Home | History | Annotate | Download | only in trivial
      1 /*
      2  * Copyright (C) 2011 Francisco Jerez.
      3  * All Rights Reserved.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining
      6  * a copy of this software and associated documentation files (the
      7  * "Software"), to deal in the Software without restriction, including
      8  * without limitation the rights to use, copy, modify, merge, publish,
      9  * distribute, sublicense, and/or sell copies of the Software, and to
     10  * permit persons to whom the Software is furnished to do so, subject to
     11  * the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the
     14  * next paragraph) shall be included in all copies or substantial
     15  * portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  *
     25  */
     26 
     27 #include <fcntl.h>
     28 #include <stdio.h>
     29 #include <sys/stat.h>
     30 #include <inttypes.h>
     31 #include "pipe/p_state.h"
     32 #include "pipe/p_context.h"
     33 #include "pipe/p_screen.h"
     34 #include "pipe/p_defines.h"
     35 #include "pipe/p_shader_tokens.h"
     36 #include "util/u_memory.h"
     37 #include "util/u_inlines.h"
     38 #include "util/u_sampler.h"
     39 #include "util/u_format.h"
     40 #include "tgsi/tgsi_text.h"
     41 #include "pipe-loader/pipe_loader.h"
     42 
     43 #define MAX_RESOURCES 4
     44 
     45 struct context {
     46         struct pipe_loader_device *dev;
     47         struct pipe_screen *screen;
     48         struct pipe_context *pipe;
     49         void *hwcs;
     50         void *hwsmp[MAX_RESOURCES];
     51         struct pipe_resource *tex[MAX_RESOURCES];
     52         bool tex_rw[MAX_RESOURCES];
     53         struct pipe_sampler_view *view[MAX_RESOURCES];
     54         struct pipe_surface *surf[MAX_RESOURCES];
     55 };
     56 
     57 #define DUMP_COMPUTE_PARAM(p, c) do {                                   \
     58                 uint64_t __v[4];                                        \
     59                 int __i, __n;                                           \
     60                                                                         \
     61                 __n = ctx->screen->get_compute_param(ctx->screen,       \
     62                                                      PIPE_SHADER_IR_TGSI, \
     63                                                      c, __v);           \
     64                 printf("%s: {", #c);                                    \
     65                                                                         \
     66                 for (__i = 0; __i < __n / sizeof(*__v); ++__i)          \
     67                         printf(" %"PRIu64, __v[__i]);                   \
     68                                                                         \
     69                 printf(" }\n");                                         \
     70         } while (0)
     71 
     72 static void init_ctx(struct context *ctx)
     73 {
     74         int ret;
     75 
     76         ret = pipe_loader_probe(&ctx->dev, 1);
     77         assert(ret);
     78 
     79         ctx->screen = pipe_loader_create_screen(ctx->dev);
     80         assert(ctx->screen);
     81 
     82         ctx->pipe = ctx->screen->context_create(ctx->screen, NULL, 0);
     83         assert(ctx->pipe);
     84 
     85         DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION);
     86         DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE);
     87         DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
     88 }
     89 
     90 static void destroy_ctx(struct context *ctx)
     91 {
     92         ctx->pipe->destroy(ctx->pipe);
     93         ctx->screen->destroy(ctx->screen);
     94         pipe_loader_release(&ctx->dev, 1);
     95         FREE(ctx);
     96 }
     97 
     98 static char *
     99 preprocess_prog(struct context *ctx, const char *src, const char *defs)
    100 {
    101         const char header[] =
    102                 "#define RGLOBAL        RES[32767]\n"
    103                 "#define RLOCAL         RES[32766]\n"
    104                 "#define RPRIVATE       RES[32765]\n"
    105                 "#define RINPUT         RES[32764]\n";
    106         char cmd[512];
    107         char tmp[] = "/tmp/test-compute.tgsi-XXXXXX";
    108         char *buf;
    109         int fd, ret;
    110         struct stat st;
    111         FILE *p;
    112 
    113         /* Open a temporary file */
    114         fd = mkstemp(tmp);
    115         assert(fd >= 0);
    116         snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s",
    117                  defs ? defs : "", tmp);
    118 
    119         /* Preprocess */
    120         p = popen(cmd, "w");
    121         fwrite(header, strlen(header), 1, p);
    122         fwrite(src, strlen(src), 1, p);
    123         ret = pclose(p);
    124         assert(!ret);
    125 
    126         /* Read back */
    127         ret = fstat(fd, &st);
    128         assert(!ret);
    129 
    130         buf = malloc(st.st_size + 1);
    131         ret = read(fd, buf, st.st_size);
    132         assert(ret == st.st_size);
    133         buf[ret] = 0;
    134 
    135         /* Clean up */
    136         close(fd);
    137         unlink(tmp);
    138 
    139         return buf;
    140 }
    141 
    142 static void init_prog(struct context *ctx, unsigned local_sz,
    143                       unsigned private_sz, unsigned input_sz,
    144                       const char *src, const char *defs)
    145 {
    146         struct pipe_context *pipe = ctx->pipe;
    147         struct tgsi_token prog[1024];
    148         struct pipe_compute_state cs = {
    149                 .ir_type = PIPE_SHADER_IR_TGSI,
    150                 .prog = prog,
    151                 .req_local_mem = local_sz,
    152                 .req_private_mem = private_sz,
    153                 .req_input_mem = input_sz
    154         };
    155         char *psrc = preprocess_prog(ctx, src, defs);
    156         int ret;
    157 
    158         ret = tgsi_text_translate(psrc, prog, ARRAY_SIZE(prog));
    159         assert(ret);
    160         free(psrc);
    161 
    162         ctx->hwcs = pipe->create_compute_state(pipe, &cs);
    163         assert(ctx->hwcs);
    164 
    165         pipe->bind_compute_state(pipe, ctx->hwcs);
    166 }
    167 
    168 static void destroy_prog(struct context *ctx)
    169 {
    170         struct pipe_context *pipe = ctx->pipe;
    171 
    172         pipe->delete_compute_state(pipe, ctx->hwcs);
    173         ctx->hwcs = NULL;
    174 }
    175 
    176 static void init_tex(struct context *ctx, int slot,
    177                      enum pipe_texture_target target, bool rw,
    178                      enum pipe_format format, int w, int h,
    179                      void (*init)(void *, int, int, int))
    180 {
    181         struct pipe_context *pipe = ctx->pipe;
    182         struct pipe_resource **tex = &ctx->tex[slot];
    183         struct pipe_resource ttex = {
    184                 .target = target,
    185                 .format = format,
    186                 .width0 = w,
    187                 .height0 = h,
    188                 .depth0 = 1,
    189                 .array_size = 1,
    190                 .bind = (PIPE_BIND_SAMPLER_VIEW |
    191                          PIPE_BIND_COMPUTE_RESOURCE |
    192                          PIPE_BIND_GLOBAL)
    193         };
    194         int dx = util_format_get_blocksize(format);
    195         int dy = util_format_get_stride(format, w);
    196         int nx = (target == PIPE_BUFFER ? (w / dx) :
    197                   util_format_get_nblocksx(format, w));
    198         int ny = (target == PIPE_BUFFER ? 1 :
    199                   util_format_get_nblocksy(format, h));
    200         struct pipe_transfer *xfer;
    201         char *map;
    202         int x, y;
    203 
    204         *tex = ctx->screen->resource_create(ctx->screen, &ttex);
    205         assert(*tex);
    206 
    207         map = pipe->transfer_map(pipe, *tex, 0, PIPE_TRANSFER_WRITE,
    208                                   &(struct pipe_box) { .width = w,
    209                                                   .height = h,
    210                                                   .depth = 1 }, &xfer);
    211         assert(xfer);
    212         assert(map);
    213 
    214         for (y = 0; y < ny; ++y) {
    215                 for (x = 0; x < nx; ++x) {
    216                         init(map + y * dy + x * dx, slot, x, y);
    217                 }
    218         }
    219 
    220         pipe->transfer_unmap(pipe, xfer);
    221 
    222         ctx->tex_rw[slot] = rw;
    223 }
    224 
    225 static bool default_check(void *x, void *y, int sz) {
    226         return !memcmp(x, y, sz);
    227 }
    228 
    229 static void check_tex(struct context *ctx, int slot,
    230                       void (*expect)(void *, int, int, int),
    231                       bool (*check)(void *, void *, int))
    232 {
    233         struct pipe_context *pipe = ctx->pipe;
    234         struct pipe_resource *tex = ctx->tex[slot];
    235         int dx = util_format_get_blocksize(tex->format);
    236         int dy = util_format_get_stride(tex->format, tex->width0);
    237         int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) :
    238                   util_format_get_nblocksx(tex->format, tex->width0));
    239         int ny = (tex->target == PIPE_BUFFER ? 1 :
    240                   util_format_get_nblocksy(tex->format, tex->height0));
    241         struct pipe_transfer *xfer;
    242         char *map;
    243         int x, y, i;
    244         int err = 0;
    245 
    246         if (!check)
    247                 check = default_check;
    248 
    249         map = pipe->transfer_map(pipe, tex, 0, PIPE_TRANSFER_READ,
    250                                   &(struct pipe_box) { .width = tex->width0,
    251                                         .height = tex->height0,
    252                                         .depth = 1 }, &xfer);
    253         assert(xfer);
    254         assert(map);
    255 
    256         for (y = 0; y < ny; ++y) {
    257                 for (x = 0; x < nx; ++x) {
    258                         uint32_t exp[4];
    259                         uint32_t *res = (uint32_t *)(map + y * dy + x * dx);
    260 
    261                         expect(exp, slot, x, y);
    262                         if (check(res, exp, dx) || (++err) > 20)
    263                                 continue;
    264 
    265                         if (dx < 4) {
    266                                 uint32_t u = 0, v = 0;
    267 
    268                                 for (i = 0; i < dx; i++) {
    269                                         u |= ((uint8_t *)exp)[i] << (8 * i);
    270                                         v |= ((uint8_t *)res)[i] << (8 * i);
    271                                 }
    272                                 printf("(%d, %d): got 0x%x, expected 0x%x\n",
    273                                        x, y, v, u);
    274                         } else {
    275                                 for (i = 0; i < dx / 4; i++) {
    276                                         printf("(%d, %d)[%d]: got 0x%x/%f,"
    277                                                " expected 0x%x/%f\n", x, y, i,
    278                                                res[i], ((float *)res)[i],
    279                                                exp[i], ((float *)exp)[i]);
    280                                 }
    281                         }
    282                 }
    283         }
    284 
    285         pipe->transfer_unmap(pipe, xfer);
    286 
    287         if (err)
    288                 printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err);
    289         else
    290                 printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y);
    291 }
    292 
    293 static void destroy_tex(struct context *ctx)
    294 {
    295         int i;
    296 
    297         for (i = 0; i < MAX_RESOURCES; ++i) {
    298                 if (ctx->tex[i])
    299                         pipe_resource_reference(&ctx->tex[i], NULL);
    300         }
    301 }
    302 
    303 static void init_sampler_views(struct context *ctx, const int *slots)
    304 {
    305         struct pipe_context *pipe = ctx->pipe;
    306         struct pipe_sampler_view tview;
    307         int i;
    308 
    309         for (i = 0; *slots >= 0; ++i, ++slots) {
    310                 u_sampler_view_default_template(&tview, ctx->tex[*slots],
    311                                                 ctx->tex[*slots]->format);
    312 
    313                 ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots],
    314                                                          &tview);
    315                 assert(ctx->view[i]);
    316         }
    317 
    318         pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, i, ctx->view);
    319 }
    320 
    321 static void destroy_sampler_views(struct context *ctx)
    322 {
    323         struct pipe_context *pipe = ctx->pipe;
    324         int i;
    325 
    326         pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, MAX_RESOURCES, NULL);
    327 
    328         for (i = 0; i < MAX_RESOURCES; ++i) {
    329                 if (ctx->view[i]) {
    330                         pipe->sampler_view_destroy(pipe, ctx->view[i]);
    331                         ctx->view[i] = NULL;
    332                 }
    333         }
    334 }
    335 
    336 static void init_compute_resources(struct context *ctx, const int *slots)
    337 {
    338         struct pipe_context *pipe = ctx->pipe;
    339         int i;
    340 
    341         for (i = 0; *slots >= 0; ++i, ++slots) {
    342                 struct pipe_surface tsurf = {
    343                         .format = ctx->tex[*slots]->format,
    344                         .writable = ctx->tex_rw[*slots]
    345                 };
    346 
    347                 if (ctx->tex[*slots]->target == PIPE_BUFFER)
    348                         tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1;
    349 
    350                 ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots],
    351                                                     &tsurf);
    352                 assert(ctx->surf[i]);
    353         }
    354 
    355         pipe->set_compute_resources(pipe, 0, i, ctx->surf);
    356 }
    357 
    358 static void destroy_compute_resources(struct context *ctx)
    359 {
    360         struct pipe_context *pipe = ctx->pipe;
    361         int i;
    362 
    363         pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL);
    364 
    365         for (i = 0; i < MAX_RESOURCES; ++i) {
    366                 if (ctx->surf[i]) {
    367                         pipe->surface_destroy(pipe, ctx->surf[i]);
    368                         ctx->surf[i] = NULL;
    369                 }
    370         }
    371 }
    372 
    373 static void init_sampler_states(struct context *ctx, int n)
    374 {
    375         struct pipe_context *pipe = ctx->pipe;
    376         struct pipe_sampler_state smp = {
    377                 .normalized_coords = 1,
    378         };
    379         int i;
    380 
    381         for (i = 0; i < n; ++i) {
    382                 ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp);
    383                 assert(ctx->hwsmp[i]);
    384         }
    385 
    386         pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE, 0, i, ctx->hwsmp);
    387 }
    388 
    389 static void destroy_sampler_states(struct context *ctx)
    390 {
    391         struct pipe_context *pipe = ctx->pipe;
    392         int i;
    393 
    394         pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE,
    395 				  0, MAX_RESOURCES, NULL);
    396 
    397         for (i = 0; i < MAX_RESOURCES; ++i) {
    398                 if (ctx->hwsmp[i]) {
    399                         pipe->delete_sampler_state(pipe, ctx->hwsmp[i]);
    400                         ctx->hwsmp[i] = NULL;
    401                 }
    402         }
    403 }
    404 
    405 static void init_globals(struct context *ctx, const int *slots,
    406                          uint32_t **handles)
    407 {
    408         struct pipe_context *pipe = ctx->pipe;
    409         struct pipe_resource *res[MAX_RESOURCES];
    410         int i;
    411 
    412         for (i = 0; *slots >= 0; ++i, ++slots)
    413                 res[i] = ctx->tex[*slots];
    414 
    415         pipe->set_global_binding(pipe, 0, i, res, handles);
    416 }
    417 
    418 static void destroy_globals(struct context *ctx)
    419 {
    420         struct pipe_context *pipe = ctx->pipe;
    421 
    422         pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL);
    423 }
    424 
    425 static void launch_grid(struct context *ctx, const uint *block_layout,
    426                         const uint *grid_layout, uint32_t pc,
    427                         void *input)
    428 {
    429         struct pipe_context *pipe = ctx->pipe;
    430         struct pipe_grid_info info;
    431         int i;
    432 
    433         for (i = 0; i < 3; i++) {
    434                 info.block[i] = block_layout[i];
    435                 info.grid[i] = grid_layout[i];
    436         }
    437         info.pc = pc;
    438         info.input = input;
    439 
    440         pipe->launch_grid(pipe, &info);
    441 }
    442 
    443 static void test_default_init(void *p, int s, int x, int y)
    444 {
    445         *(uint32_t *)p = 0xdeadbeef;
    446 }
    447 
    448 /* test_system_values */
    449 static void test_system_values_expect(void *p, int s, int x, int y)
    450 {
    451         int id = x / 16, sv = (x % 16) / 4, c = x % 4;
    452         int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 };
    453         int bsz[] = { 4, 3, 5, 1};
    454         int gsz[] = { 5, 4, 1, 1};
    455 
    456         switch (sv) {
    457         case 0:
    458                 *(uint32_t *)p = tid[c] / bsz[c];
    459                 break;
    460         case 1:
    461                 *(uint32_t *)p = bsz[c];
    462                 break;
    463         case 2:
    464                 *(uint32_t *)p = gsz[c];
    465                 break;
    466         case 3:
    467                 *(uint32_t *)p = tid[c] % bsz[c];
    468                 break;
    469         }
    470 }
    471 
    472 static void test_system_values(struct context *ctx)
    473 {
    474         const char *src = "COMP\n"
    475                 "DCL RES[0], BUFFER, RAW, WR\n"
    476                 "DCL SV[0], BLOCK_ID[0]\n"
    477                 "DCL SV[1], BLOCK_SIZE[0]\n"
    478                 "DCL SV[2], GRID_SIZE[0]\n"
    479                 "DCL SV[3], THREAD_ID[0]\n"
    480                 "DCL TEMP[0], LOCAL\n"
    481                 "DCL TEMP[1], LOCAL\n"
    482                 "IMM UINT32 { 64, 0, 0, 0 }\n"
    483                 "IMM UINT32 { 16, 0, 0, 0 }\n"
    484                 "IMM UINT32 { 0, 0, 0, 0 }\n"
    485                 "\n"
    486                 "BGNSUB"
    487                 "  UMUL TEMP[0], SV[0], SV[1]\n"
    488                 "  UADD TEMP[0], TEMP[0], SV[3]\n"
    489                 "  UMUL TEMP[1], SV[1], SV[2]\n"
    490                 "  UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n"
    491                 "  UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n"
    492                 "  UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n"
    493                 "  UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n"
    494                 "  UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n"
    495                 "  UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
    496                 "  STORE RES[0].xyzw, TEMP[0], SV[0]\n"
    497                 "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
    498                 "  STORE RES[0].xyzw, TEMP[0], SV[1]\n"
    499                 "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
    500                 "  STORE RES[0].xyzw, TEMP[0], SV[2]\n"
    501                 "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
    502                 "  STORE RES[0].xyzw, TEMP[0], SV[3]\n"
    503                 "  RET\n"
    504                 "ENDSUB\n";
    505 
    506         printf("- %s\n", __func__);
    507 
    508         init_prog(ctx, 0, 0, 0, src, NULL);
    509         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    510                  76800, 0, test_default_init);
    511         init_compute_resources(ctx, (int []) { 0, -1 });
    512         launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL);
    513         check_tex(ctx, 0, test_system_values_expect, NULL);
    514         destroy_compute_resources(ctx);
    515         destroy_tex(ctx);
    516         destroy_prog(ctx);
    517 }
    518 
    519 /* test_resource_access */
    520 static void test_resource_access_init0(void *p, int s, int x, int y)
    521 {
    522         *(float *)p = 8.0 - (float)x;
    523 }
    524 
    525 static void test_resource_access_expect(void *p, int s, int x, int y)
    526 {
    527         *(float *)p = 8.0 - (float)((x + 4 * y) & 0x3f);
    528 }
    529 
    530 static void test_resource_access(struct context *ctx)
    531 {
    532         const char *src = "COMP\n"
    533                 "DCL RES[0], BUFFER, RAW, WR\n"
    534                 "DCL RES[1], 2D, RAW, WR\n"
    535                 "DCL SV[0], BLOCK_ID[0]\n"
    536                 "DCL TEMP[0], LOCAL\n"
    537                 "DCL TEMP[1], LOCAL\n"
    538                 "IMM UINT32 { 15, 0, 0, 0 }\n"
    539                 "IMM UINT32 { 16, 1, 0, 0 }\n"
    540                 "\n"
    541                 "    BGNSUB\n"
    542                 "       UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n"
    543                 "       AND TEMP[0].x, TEMP[0], IMM[0]\n"
    544                 "       UMUL TEMP[0].x, TEMP[0], IMM[1]\n"
    545                 "       LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n"
    546                 "       UMUL TEMP[1], SV[0], IMM[1]\n"
    547                 "       STORE RES[1].xyzw, TEMP[1], TEMP[0]\n"
    548                 "       RET\n"
    549                 "    ENDSUB\n";
    550 
    551         printf("- %s\n", __func__);
    552 
    553         init_prog(ctx, 0, 0, 0, src, NULL);
    554         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    555                  256, 0, test_resource_access_init0);
    556         init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
    557                  60, 12, test_default_init);
    558         init_compute_resources(ctx, (int []) { 0, 1, -1 });
    559         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL);
    560         check_tex(ctx, 1, test_resource_access_expect, NULL);
    561         destroy_compute_resources(ctx);
    562         destroy_tex(ctx);
    563         destroy_prog(ctx);
    564 }
    565 
    566 /* test_function_calls */
    567 static void test_function_calls_init(void *p, int s, int x, int y)
    568 {
    569         *(uint32_t *)p = 15 * y + x;
    570 }
    571 
    572 static void test_function_calls_expect(void *p, int s, int x, int y)
    573 {
    574         *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ;
    575 }
    576 
    577 static void test_function_calls(struct context *ctx)
    578 {
    579         const char *src = "COMP\n"
    580                 "DCL RES[0], 2D, RAW, WR\n"
    581                 "DCL SV[0], BLOCK_ID[0]\n"
    582                 "DCL SV[1], BLOCK_SIZE[0]\n"
    583                 "DCL SV[2], GRID_SIZE[0]\n"
    584                 "DCL SV[3], THREAD_ID[0]\n"
    585                 "DCL TEMP[0]\n"
    586                 "DCL TEMP[1]\n"
    587                 "DCL TEMP[2], LOCAL\n"
    588                 "IMM UINT32 { 0, 11, 22, 33 }\n"
    589                 "IMM FLT32 { 11, 33, 55, 99 }\n"
    590                 "IMM UINT32 { 4, 1, 0, 0 }\n"
    591                 "IMM UINT32 { 12, 0, 0, 0 }\n"
    592                 "\n"
    593                 "00: BGNSUB\n"
    594                 "01:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
    595                 "02:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
    596                 "03:  USLT TEMP[0].x, TEMP[0], IMM[0]\n"
    597                 "04:  RET\n"
    598                 "05: ENDSUB\n"
    599                 "06: BGNSUB\n"
    600                 "07:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
    601                 "08:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
    602                 "09:  USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n"
    603                 "10:  IF TEMP[0].xxxx\n"
    604                 "11:   CAL :0\n"
    605                 "12:  ENDIF\n"
    606                 "13:  RET\n"
    607                 "14: ENDSUB\n"
    608                 "15: BGNSUB\n"
    609                 "16:  UMUL TEMP[2], SV[0], SV[1]\n"
    610                 "17:  UADD TEMP[2], TEMP[2], SV[3]\n"
    611                 "18:  UMUL TEMP[2], TEMP[2], IMM[2]\n"
    612                 "00:  MOV TEMP[1].x, IMM[2].wwww\n"
    613                 "19:  LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n"
    614                 "20:  CAL :6\n"
    615                 "21:  STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n"
    616                 "22:  RET\n"
    617                 "23: ENDSUB\n";
    618 
    619         printf("- %s\n", __func__);
    620 
    621         init_prog(ctx, 0, 0, 0, src, NULL);
    622         init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
    623                  15, 12, test_function_calls_init);
    624         init_compute_resources(ctx, (int []) { 0, -1 });
    625         launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL);
    626         check_tex(ctx, 0, test_function_calls_expect, NULL);
    627         destroy_compute_resources(ctx);
    628         destroy_tex(ctx);
    629         destroy_prog(ctx);
    630 }
    631 
    632 /* test_input_global */
    633 static void test_input_global_expect(void *p, int s, int x, int y)
    634 {
    635         *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0);
    636 }
    637 
    638 static void test_input_global(struct context *ctx)
    639 {
    640         const char *src = "COMP\n"
    641                 "DCL SV[0], THREAD_ID[0]\n"
    642                 "DCL TEMP[0], LOCAL\n"
    643                 "DCL TEMP[1], LOCAL\n"
    644                 "IMM UINT32 { 8, 0, 0, 0 }\n"
    645                 "\n"
    646                 "    BGNSUB\n"
    647                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
    648                 "       LOAD TEMP[1].xy, RINPUT, TEMP[0]\n"
    649                 "       LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n"
    650                 "       UADD TEMP[1].x, TEMP[0], -TEMP[1]\n"
    651                 "       STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n"
    652                 "       RET\n"
    653                 "    ENDSUB\n";
    654         uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004,
    655                               0x10005, 0x10006, 0x10007, 0x10008 };
    656 
    657         printf("- %s\n", __func__);
    658 
    659         init_prog(ctx, 0, 0, 32, src, NULL);
    660         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
    661                  test_default_init);
    662         init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
    663                  test_default_init);
    664         init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
    665                  test_default_init);
    666         init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
    667                  test_default_init);
    668         init_globals(ctx, (int []){ 0, 1, 2, 3, -1 },
    669                      (uint32_t *[]){ &input[1], &input[3],
    670                                      &input[5], &input[7] });
    671         launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input);
    672         check_tex(ctx, 0, test_input_global_expect, NULL);
    673         check_tex(ctx, 1, test_input_global_expect, NULL);
    674         check_tex(ctx, 2, test_input_global_expect, NULL);
    675         check_tex(ctx, 3, test_input_global_expect, NULL);
    676         destroy_globals(ctx);
    677         destroy_tex(ctx);
    678         destroy_prog(ctx);
    679 }
    680 
    681 /* test_private */
    682 static void test_private_expect(void *p, int s, int x, int y)
    683 {
    684         *(uint32_t *)p = (x / 32) + x % 32;
    685 }
    686 
    687 static void test_private(struct context *ctx)
    688 {
    689         const char *src = "COMP\n"
    690                 "DCL RES[0], BUFFER, RAW, WR\n"
    691                 "DCL SV[0], BLOCK_ID[0]\n"
    692                 "DCL SV[1], BLOCK_SIZE[0]\n"
    693                 "DCL SV[2], THREAD_ID[0]\n"
    694                 "DCL TEMP[0], LOCAL\n"
    695                 "DCL TEMP[1], LOCAL\n"
    696                 "DCL TEMP[2], LOCAL\n"
    697                 "IMM UINT32 { 128, 0, 0, 0 }\n"
    698                 "IMM UINT32 { 4, 0, 0, 0 }\n"
    699                 "\n"
    700                 "    BGNSUB\n"
    701                 "       UMUL TEMP[0].x, SV[0], SV[1]\n"
    702                 "       UADD TEMP[0].x, TEMP[0], SV[2]\n"
    703                 "       MOV TEMP[1].x, IMM[0].wwww\n"
    704                 "       BGNLOOP\n"
    705                 "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
    706                 "               IF TEMP[2]\n"
    707                 "                       BRK\n"
    708                 "               ENDIF\n"
    709                 "               UDIV TEMP[2].x, TEMP[1], IMM[1]\n"
    710                 "               UADD TEMP[2].x, TEMP[2], TEMP[0]\n"
    711                 "               STORE RPRIVATE.x, TEMP[1], TEMP[2]\n"
    712                 "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
    713                 "       ENDLOOP\n"
    714                 "       MOV TEMP[1].x, IMM[0].wwww\n"
    715                 "       UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
    716                 "       BGNLOOP\n"
    717                 "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
    718                 "               IF TEMP[2]\n"
    719                 "                       BRK\n"
    720                 "               ENDIF\n"
    721                 "               LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n"
    722                 "               STORE RES[0].x, TEMP[0], TEMP[2]\n"
    723                 "               UADD TEMP[0].x, TEMP[0], IMM[1]\n"
    724                 "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
    725                 "       ENDLOOP\n"
    726                 "       RET\n"
    727                 "    ENDSUB\n";
    728 
    729         printf("- %s\n", __func__);
    730 
    731         init_prog(ctx, 0, 128, 0, src, NULL);
    732         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    733                  32768, 0, test_default_init);
    734         init_compute_resources(ctx, (int []) { 0, -1 });
    735         launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
    736         check_tex(ctx, 0, test_private_expect, NULL);
    737         destroy_compute_resources(ctx);
    738         destroy_tex(ctx);
    739         destroy_prog(ctx);
    740 }
    741 
    742 /* test_local */
    743 static void test_local_expect(void *p, int s, int x, int y)
    744 {
    745         *(uint32_t *)p = x & 0x20 ? 2 : 1;
    746 }
    747 
    748 static void test_local(struct context *ctx)
    749 {
    750         const char *src = "COMP\n"
    751                 "DCL RES[0], BUFFER, RAW, WR\n"
    752                 "DCL SV[0], BLOCK_ID[0]\n"
    753                 "DCL SV[1], BLOCK_SIZE[0]\n"
    754                 "DCL SV[2], THREAD_ID[0]\n"
    755                 "DCL TEMP[0], LOCAL\n"
    756                 "DCL TEMP[1], LOCAL\n"
    757                 "DCL TEMP[2], LOCAL\n"
    758                 "IMM UINT32 { 1, 0, 0, 0 }\n"
    759                 "IMM UINT32 { 2, 0, 0, 0 }\n"
    760                 "IMM UINT32 { 4, 0, 0, 0 }\n"
    761                 "IMM UINT32 { 32, 0, 0, 0 }\n"
    762                 "IMM UINT32 { 128, 0, 0, 0 }\n"
    763                 "\n"
    764                 "    BGNSUB\n"
    765                 "       UMUL TEMP[0].x, SV[2], IMM[2]\n"
    766                 "       STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n"
    767                 "       MFENCE RLOCAL\n"
    768                 "       USLT TEMP[1].x, SV[2], IMM[3]\n"
    769                 "       IF TEMP[1]\n"
    770                 "               UADD TEMP[1].x, TEMP[0], IMM[4]\n"
    771                 "               BGNLOOP\n"
    772                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
    773                 "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
    774                 "                       IF TEMP[2]\n"
    775                 "                               BRK\n"
    776                 "                       ENDIF\n"
    777                 "               ENDLOOP\n"
    778                 "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
    779                 "               MFENCE RLOCAL\n"
    780                 "               BGNLOOP\n"
    781                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
    782                 "                       USEQ TEMP[2].x, TEMP[2], IMM[1]\n"
    783                 "                       IF TEMP[2]\n"
    784                 "                               BRK\n"
    785                 "                       ENDIF\n"
    786                 "               ENDLOOP\n"
    787                 "       ELSE\n"
    788                 "               UADD TEMP[1].x, TEMP[0], -IMM[4]\n"
    789                 "               BGNLOOP\n"
    790                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
    791                 "                       USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n"
    792                 "                       IF TEMP[2]\n"
    793                 "                               BRK\n"
    794                 "                       ENDIF\n"
    795                 "               ENDLOOP\n"
    796                 "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
    797                 "               MFENCE RLOCAL\n"
    798                 "               BGNLOOP\n"
    799                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
    800                 "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
    801                 "                       IF TEMP[2]\n"
    802                 "                               BRK\n"
    803                 "                       ENDIF\n"
    804                 "               ENDLOOP\n"
    805                 "               STORE RLOCAL.x, TEMP[0], IMM[1]\n"
    806                 "               MFENCE RLOCAL\n"
    807                 "       ENDIF\n"
    808                 "       UMUL TEMP[1].x, SV[0], SV[1]\n"
    809                 "       UMUL TEMP[1].x, TEMP[1], IMM[2]\n"
    810                 "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
    811                 "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
    812                 "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
    813                 "       RET\n"
    814                 "    ENDSUB\n";
    815 
    816         printf("- %s\n", __func__);
    817 
    818         init_prog(ctx, 256, 0, 0, src, NULL);
    819         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    820                  4096, 0, test_default_init);
    821         init_compute_resources(ctx, (int []) { 0, -1 });
    822         launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
    823         check_tex(ctx, 0, test_local_expect, NULL);
    824         destroy_compute_resources(ctx);
    825         destroy_tex(ctx);
    826         destroy_prog(ctx);
    827 }
    828 
    829 /* test_sample */
    830 static void test_sample_init(void *p, int s, int x, int y)
    831 {
    832         *(float *)p = s ? 1 : x * y;
    833 }
    834 
    835 static void test_sample_expect(void *p, int s, int x, int y)
    836 {
    837         switch (x % 4) {
    838         case 0:
    839                 *(float *)p = x / 4 * y;
    840                 break;
    841         case 1:
    842         case 2:
    843                 *(float *)p = 0;
    844                 break;
    845         case 3:
    846                 *(float *)p = 1;
    847                 break;
    848         }
    849 }
    850 
    851 static void test_sample(struct context *ctx)
    852 {
    853         const char *src = "COMP\n"
    854                 "DCL SVIEW[0], 2D, FLOAT\n"
    855                 "DCL RES[0], 2D, RAW, WR\n"
    856                 "DCL SAMP[0]\n"
    857                 "DCL SV[0], BLOCK_ID[0]\n"
    858                 "DCL TEMP[0], LOCAL\n"
    859                 "DCL TEMP[1], LOCAL\n"
    860                 "IMM UINT32 { 16, 1, 0, 0 }\n"
    861                 "IMM FLT32 { 128, 32, 0, 0 }\n"
    862                 "\n"
    863                 "    BGNSUB\n"
    864                 "       I2F TEMP[1], SV[0]\n"
    865                 "       DIV TEMP[1], TEMP[1], IMM[1]\n"
    866                 "       SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n"
    867                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
    868                 "       STORE RES[0].xyzw, TEMP[0], TEMP[1]\n"
    869                 "       RET\n"
    870                 "    ENDSUB\n";
    871 
    872         printf("- %s\n", __func__);
    873 
    874         init_prog(ctx, 0, 0, 0, src, NULL);
    875         init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
    876                  128, 32, test_sample_init);
    877         init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
    878                  512, 32, test_sample_init);
    879         init_compute_resources(ctx, (int []) { 1, -1 });
    880         init_sampler_views(ctx, (int []) { 0, -1 });
    881         init_sampler_states(ctx, 2);
    882         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL);
    883         check_tex(ctx, 1, test_sample_expect, NULL);
    884         destroy_sampler_states(ctx);
    885         destroy_sampler_views(ctx);
    886         destroy_compute_resources(ctx);
    887         destroy_tex(ctx);
    888         destroy_prog(ctx);
    889 }
    890 
    891 /* test_many_kern */
    892 static void test_many_kern_expect(void *p, int s, int x, int y)
    893 {
    894         *(uint32_t *)p = x;
    895 }
    896 
    897 static void test_many_kern(struct context *ctx)
    898 {
    899         const char *src = "COMP\n"
    900                 "DCL RES[0], BUFFER, RAW, WR\n"
    901                 "DCL TEMP[0], LOCAL\n"
    902                 "IMM UINT32 { 0, 1, 2, 3 }\n"
    903                 "IMM UINT32 { 4, 0, 0, 0 }\n"
    904                 "\n"
    905                 "    BGNSUB\n"
    906                 "       UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n"
    907                 "       STORE RES[0].x, TEMP[0], IMM[0].xxxx\n"
    908                 "       RET\n"
    909                 "    ENDSUB\n"
    910                 "    BGNSUB\n"
    911                 "       UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n"
    912                 "       STORE RES[0].x, TEMP[0], IMM[0].yyyy\n"
    913                 "       RET\n"
    914                 "    ENDSUB\n"
    915                 "    BGNSUB\n"
    916                 "       UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n"
    917                 "       STORE RES[0].x, TEMP[0], IMM[0].zzzz\n"
    918                 "       RET\n"
    919                 "    ENDSUB\n"
    920                 "    BGNSUB\n"
    921                 "       UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n"
    922                 "       STORE RES[0].x, TEMP[0], IMM[0].wwww\n"
    923                 "       RET\n"
    924                 "    ENDSUB\n";
    925 
    926         printf("- %s\n", __func__);
    927 
    928         init_prog(ctx, 0, 0, 0, src, NULL);
    929         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    930                  16, 0, test_default_init);
    931         init_compute_resources(ctx, (int []) { 0, -1 });
    932         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
    933         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL);
    934         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL);
    935         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL);
    936         check_tex(ctx, 0, test_many_kern_expect, NULL);
    937         destroy_compute_resources(ctx);
    938         destroy_tex(ctx);
    939         destroy_prog(ctx);
    940 }
    941 
    942 /* test_constant */
    943 static void test_constant_init(void *p, int s, int x, int y)
    944 {
    945         *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x;
    946 }
    947 
    948 static void test_constant_expect(void *p, int s, int x, int y)
    949 {
    950         *(float *)p = 8.0 - (float)x;
    951 }
    952 
    953 static void test_constant(struct context *ctx)
    954 {
    955         const char *src = "COMP\n"
    956                 "DCL RES[0], BUFFER, RAW\n"
    957                 "DCL RES[1], BUFFER, RAW, WR\n"
    958                 "DCL SV[0], BLOCK_ID[0]\n"
    959                 "DCL TEMP[0], LOCAL\n"
    960                 "DCL TEMP[1], LOCAL\n"
    961                 "IMM UINT32 { 4, 0, 0, 0 }\n"
    962                 "\n"
    963                 "    BGNSUB\n"
    964                 "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
    965                 "       LOAD TEMP[1].x, RES[0], TEMP[0]\n"
    966                 "       STORE RES[1].x, TEMP[0], TEMP[1]\n"
    967                 "       RET\n"
    968                 "    ENDSUB\n";
    969 
    970         printf("- %s\n", __func__);
    971 
    972         init_prog(ctx, 0, 0, 0, src, NULL);
    973         init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
    974                  256, 0, test_constant_init);
    975         init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    976                  256, 0, test_constant_init);
    977         init_compute_resources(ctx, (int []) { 0, 1, -1 });
    978         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
    979         check_tex(ctx, 1, test_constant_expect, NULL);
    980         destroy_compute_resources(ctx);
    981         destroy_tex(ctx);
    982         destroy_prog(ctx);
    983 }
    984 
    985 /* test_resource_indirect */
    986 static void test_resource_indirect_init(void *p, int s, int x, int y)
    987 {
    988         *(uint32_t *)p = s == 0 ? 0xdeadbeef :
    989                 s == 1 ? x % 2 :
    990                 s == 2 ? 2 * x :
    991                 2 * x + 1;
    992 }
    993 
    994 static void test_resource_indirect_expect(void *p, int s, int x, int y)
    995 {
    996         *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0);
    997 }
    998 
    999 static void test_resource_indirect(struct context *ctx)
   1000 {
   1001         const char *src = "COMP\n"
   1002                 "DCL RES[0], BUFFER, RAW, WR\n"
   1003                 "DCL RES[1..3], BUFFER, RAW\n"
   1004                 "DCL SV[0], BLOCK_ID[0]\n"
   1005                 "DCL TEMP[0], LOCAL\n"
   1006                 "DCL TEMP[1], LOCAL\n"
   1007                 "IMM UINT32 { 4, 0, 0, 0 }\n"
   1008                 "\n"
   1009                 "    BGNSUB\n"
   1010                 "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
   1011                 "       LOAD TEMP[1].x, RES[1], TEMP[0]\n"
   1012                 "       LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n"
   1013                 "       STORE RES[0].x, TEMP[0], TEMP[1]\n"
   1014                 "       RET\n"
   1015                 "    ENDSUB\n";
   1016 
   1017         printf("- %s\n", __func__);
   1018 
   1019         init_prog(ctx, 0, 0, 0, src, NULL);
   1020         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
   1021                  256, 0, test_resource_indirect_init);
   1022         init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
   1023                  256, 0, test_resource_indirect_init);
   1024         init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
   1025                  256, 0, test_resource_indirect_init);
   1026         init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
   1027                  256, 0, test_resource_indirect_init);
   1028         init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 });
   1029         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
   1030         check_tex(ctx, 0, test_resource_indirect_expect, NULL);
   1031         destroy_compute_resources(ctx);
   1032         destroy_tex(ctx);
   1033         destroy_prog(ctx);
   1034 }
   1035 
   1036 /* test_surface_ld */
   1037 enum pipe_format surface_fmts[] = {
   1038         PIPE_FORMAT_B8G8R8A8_UNORM,
   1039         PIPE_FORMAT_B8G8R8X8_UNORM,
   1040         PIPE_FORMAT_A8R8G8B8_UNORM,
   1041         PIPE_FORMAT_X8R8G8B8_UNORM,
   1042         PIPE_FORMAT_X8R8G8B8_UNORM,
   1043         PIPE_FORMAT_L8_UNORM,
   1044         PIPE_FORMAT_A8_UNORM,
   1045         PIPE_FORMAT_I8_UNORM,
   1046         PIPE_FORMAT_L8A8_UNORM,
   1047         PIPE_FORMAT_R32_FLOAT,
   1048         PIPE_FORMAT_R32G32_FLOAT,
   1049         PIPE_FORMAT_R32G32B32A32_FLOAT,
   1050         PIPE_FORMAT_R32_UNORM,
   1051         PIPE_FORMAT_R32G32_UNORM,
   1052         PIPE_FORMAT_R32G32B32A32_UNORM,
   1053         PIPE_FORMAT_R32_SNORM,
   1054         PIPE_FORMAT_R32G32_SNORM,
   1055         PIPE_FORMAT_R32G32B32A32_SNORM,
   1056         PIPE_FORMAT_R8_UINT,
   1057         PIPE_FORMAT_R8G8_UINT,
   1058         PIPE_FORMAT_R8G8B8A8_UINT,
   1059         PIPE_FORMAT_R8_SINT,
   1060         PIPE_FORMAT_R8G8_SINT,
   1061         PIPE_FORMAT_R8G8B8A8_SINT,
   1062         PIPE_FORMAT_R32_UINT,
   1063         PIPE_FORMAT_R32G32_UINT,
   1064         PIPE_FORMAT_R32G32B32A32_UINT,
   1065         PIPE_FORMAT_R32_SINT,
   1066         PIPE_FORMAT_R32G32_SINT,
   1067         PIPE_FORMAT_R32G32B32A32_SINT
   1068 };
   1069 
   1070 static void test_surface_ld_init0f(void *p, int s, int x, int y)
   1071 {
   1072         float v[] = { 1.0, -.75, .50, -.25 };
   1073         int i = 0;
   1074 
   1075         util_format_write_4f(surface_fmts[i], v, 0, p, 0, 0, 0, 1, 1);
   1076 }
   1077 
   1078 static void test_surface_ld_init0i(void *p, int s, int x, int y)
   1079 {
   1080         int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
   1081         int i = 0;
   1082 
   1083         util_format_write_4i(surface_fmts[i], v, 0, p, 0, 0, 0, 1, 1);
   1084 }
   1085 
   1086 static void test_surface_ld_expectf(void *p, int s, int x, int y)
   1087 {
   1088         float v[4], w[4];
   1089         int i = 0;
   1090 
   1091         test_surface_ld_init0f(v, s, x / 4, y);
   1092         util_format_read_4f(surface_fmts[i], w, 0, v, 0, 0, 0, 1, 1);
   1093         *(float *)p = w[x % 4];
   1094 }
   1095 
   1096 static void test_surface_ld_expecti(void *p, int s, int x, int y)
   1097 {
   1098         int32_t v[4], w[4];
   1099         int i = 0;
   1100 
   1101         test_surface_ld_init0i(v, s, x / 4, y);
   1102         util_format_read_4i(surface_fmts[i], w, 0, v, 0, 0, 0, 1, 1);
   1103         *(uint32_t *)p = w[x % 4];
   1104 }
   1105 
   1106 static void test_surface_ld(struct context *ctx)
   1107 {
   1108         const char *src = "COMP\n"
   1109                 "DCL RES[0], 2D\n"
   1110                 "DCL RES[1], 2D, RAW, WR\n"
   1111                 "DCL SV[0], BLOCK_ID[0]\n"
   1112                 "DCL TEMP[0], LOCAL\n"
   1113                 "DCL TEMP[1], LOCAL\n"
   1114                 "IMM UINT32 { 16, 1, 0, 0 }\n"
   1115                 "\n"
   1116                 "    BGNSUB\n"
   1117                 "       LOAD TEMP[1], RES[0], SV[0]\n"
   1118                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
   1119                 "       STORE RES[1].xyzw, TEMP[0], TEMP[1]\n"
   1120                 "       RET\n"
   1121                 "    ENDSUB\n";
   1122         int i = 0;
   1123 
   1124         printf("- %s\n", __func__);
   1125 
   1126         init_prog(ctx, 0, 0, 0, src, NULL);
   1127 
   1128         for (i = 0; i < ARRAY_SIZE(surface_fmts); i++) {
   1129                 bool is_int = util_format_is_pure_integer(surface_fmts[i]);
   1130 
   1131                 printf("   - %s\n", util_format_name(surface_fmts[i]));
   1132 
   1133                 if (!ctx->screen->is_format_supported(ctx->screen,
   1134                        surface_fmts[i], PIPE_TEXTURE_2D, 1,
   1135                        PIPE_BIND_COMPUTE_RESOURCE)) {
   1136                    printf("(unsupported)\n");
   1137                    continue;
   1138                 }
   1139 
   1140                 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i],
   1141                          128, 32, (is_int ? test_surface_ld_init0i : test_surface_ld_init0f));
   1142                 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
   1143                          512, 32, test_default_init);
   1144                 init_compute_resources(ctx, (int []) { 0, 1, -1 });
   1145                 init_sampler_states(ctx, 2);
   1146                 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
   1147                             NULL);
   1148                 check_tex(ctx, 1, (is_int ? test_surface_ld_expecti : test_surface_ld_expectf), NULL);
   1149                 destroy_sampler_states(ctx);
   1150                 destroy_compute_resources(ctx);
   1151                 destroy_tex(ctx);
   1152         }
   1153 
   1154         destroy_prog(ctx);
   1155 }
   1156 
   1157 /* test_surface_st */
   1158 static void test_surface_st_init0f(void *p, int s, int x, int y)
   1159 {
   1160         float v[] = { 1.0, -.75, 0.5, -.25 };
   1161         *(float *)p = v[x % 4];
   1162 }
   1163 
   1164 static void test_surface_st_init0i(void *p, int s, int x, int y)
   1165 {
   1166         int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
   1167         *(int32_t *)p = v[x % 4];
   1168 }
   1169 
   1170 static void test_surface_st_init1(void *p, int s, int x, int y)
   1171 {
   1172         int i = 0;
   1173         memset(p, 1, util_format_get_blocksize(surface_fmts[i]));
   1174 }
   1175 
   1176 static void test_surface_st_expectf(void *p, int s, int x, int y)
   1177 {
   1178         float vf[4];
   1179         int i = 0, j;
   1180 
   1181         for (j = 0; j < 4; j++)
   1182                 test_surface_st_init0f(&vf[j], s, 4 * x + j, y);
   1183         util_format_write_4f(surface_fmts[i], vf, 0, p, 0, 0, 0, 1, 1);
   1184 }
   1185 
   1186 static void test_surface_st_expects(void *p, int s, int x, int y)
   1187 {
   1188         int32_t v[4];
   1189         int i = 0, j;
   1190 
   1191         for (j = 0; j < 4; j++)
   1192                 test_surface_st_init0i(&v[j], s, 4 * x + j, y);
   1193         util_format_write_4i(surface_fmts[i], v, 0, p, 0, 0, 0, 1, 1);
   1194 }
   1195 
   1196 static void test_surface_st_expectu(void *p, int s, int x, int y)
   1197 {
   1198         uint32_t v[4];
   1199         int i = 0, j;
   1200 
   1201         for (j = 0; j < 4; j++)
   1202                 test_surface_st_init0i(&v[j], s, 4 * x + j, y);
   1203         util_format_write_4ui(surface_fmts[i], v, 0, p, 0, 0, 0, 1, 1);
   1204 }
   1205 
   1206 static bool test_surface_st_check(void *x, void *y, int sz)
   1207 {
   1208         int i = 0, j;
   1209 
   1210         if (util_format_is_float(surface_fmts[i])) {
   1211                 return fabs(*(float *)x - *(float *)y) < 3.92156863e-3;
   1212 
   1213         } else if ((sz % 4) == 0) {
   1214                 for (j = 0; j < sz / 4; j++)
   1215                         if (abs(((uint32_t *)x)[j] -
   1216                                 ((uint32_t *)y)[j]) > 1)
   1217                                 return false;
   1218                 return true;
   1219         } else {
   1220                 return !memcmp(x, y, sz);
   1221         }
   1222 }
   1223 
   1224 static void test_surface_st(struct context *ctx)
   1225 {
   1226         const char *src = "COMP\n"
   1227                 "DCL RES[0], 2D, RAW\n"
   1228                 "DCL RES[1], 2D, WR\n"
   1229                 "DCL SV[0], BLOCK_ID[0]\n"
   1230                 "DCL TEMP[0], LOCAL\n"
   1231                 "DCL TEMP[1], LOCAL\n"
   1232                 "IMM UINT32 { 16, 1, 0, 0 }\n"
   1233                 "\n"
   1234                 "    BGNSUB\n"
   1235                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
   1236                 "       LOAD TEMP[1], RES[0], TEMP[0]\n"
   1237                 "       STORE RES[1], SV[0], TEMP[1]\n"
   1238                 "       RET\n"
   1239                 "    ENDSUB\n";
   1240         int i = 0;
   1241 
   1242         printf("- %s\n", __func__);
   1243 
   1244         init_prog(ctx, 0, 0, 0, src, NULL);
   1245 
   1246         for (i = 0; i < ARRAY_SIZE(surface_fmts); i++) {
   1247                 bool is_signed = (util_format_description(surface_fmts[i])
   1248                                   ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED);
   1249                 bool is_int = util_format_is_pure_integer(surface_fmts[i]);
   1250 
   1251                 printf("   - %s\n", util_format_name(surface_fmts[i]));
   1252 
   1253                 if (!ctx->screen->is_format_supported(ctx->screen,
   1254                        surface_fmts[i], PIPE_TEXTURE_2D, 1,
   1255                        PIPE_BIND_COMPUTE_RESOURCE)) {
   1256                    printf("(unsupported)\n");
   1257                    continue;
   1258                 }
   1259 
   1260                 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
   1261                          512, 32, (is_int ? test_surface_st_init0i : test_surface_st_init0f));
   1262                 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i],
   1263                          128, 32, test_surface_st_init1);
   1264                 init_compute_resources(ctx, (int []) { 0, 1, -1 });
   1265                 init_sampler_states(ctx, 2);
   1266                 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
   1267                             NULL);
   1268                 check_tex(ctx, 1, (is_int && is_signed ? test_surface_st_expects :
   1269                                    is_int && !is_signed ? test_surface_st_expectu :
   1270                                    test_surface_st_expectf), test_surface_st_check);
   1271                 destroy_sampler_states(ctx);
   1272                 destroy_compute_resources(ctx);
   1273                 destroy_tex(ctx);
   1274         }
   1275 
   1276         destroy_prog(ctx);
   1277 }
   1278 
   1279 /* test_barrier */
   1280 static void test_barrier_expect(void *p, int s, int x, int y)
   1281 {
   1282         *(uint32_t *)p = 31;
   1283 }
   1284 
   1285 static void test_barrier(struct context *ctx)
   1286 {
   1287         const char *src = "COMP\n"
   1288                 "DCL RES[0], BUFFER, RAW, WR\n"
   1289                 "DCL SV[0], BLOCK_ID[0]\n"
   1290                 "DCL SV[1], BLOCK_SIZE[0]\n"
   1291                 "DCL SV[2], THREAD_ID[0]\n"
   1292                 "DCL TEMP[0], LOCAL\n"
   1293                 "DCL TEMP[1], LOCAL\n"
   1294                 "DCL TEMP[2], LOCAL\n"
   1295                 "DCL TEMP[3], LOCAL\n"
   1296                 "IMM UINT32 { 1, 0, 0, 0 }\n"
   1297                 "IMM UINT32 { 4, 0, 0, 0 }\n"
   1298                 "IMM UINT32 { 32, 0, 0, 0 }\n"
   1299                 "\n"
   1300                 "    BGNSUB\n"
   1301                 "       UMUL TEMP[0].x, SV[2], IMM[1]\n"
   1302                 "       MOV TEMP[1].x, IMM[0].wwww\n"
   1303                 "       BGNLOOP\n"
   1304                 "               BARRIER\n"
   1305                 "               STORE RLOCAL.x, TEMP[0], TEMP[1]\n"
   1306                 "               BARRIER\n"
   1307                 "               MOV TEMP[2].x, IMM[0].wwww\n"
   1308                 "               BGNLOOP\n"
   1309                 "                       UMUL TEMP[3].x, TEMP[2], IMM[1]\n"
   1310                 "                       LOAD TEMP[3].x, RLOCAL, TEMP[3]\n"
   1311                 "                       USNE TEMP[3].x, TEMP[3], TEMP[1]\n"
   1312                 "                       IF TEMP[3]\n"
   1313                 "                               END\n"
   1314                 "                       ENDIF\n"
   1315                 "                       UADD TEMP[2].x, TEMP[2], IMM[0]\n"
   1316                 "                       USEQ TEMP[3].x, TEMP[2], SV[1]\n"
   1317                 "                       IF TEMP[3]\n"
   1318                 "                               BRK\n"
   1319                 "                       ENDIF\n"
   1320                 "               ENDLOOP\n"
   1321                 "               UADD TEMP[1].x, TEMP[1], IMM[0]\n"
   1322                 "               USEQ TEMP[2].x, TEMP[1], IMM[2]\n"
   1323                 "               IF TEMP[2]\n"
   1324                 "                       BRK\n"
   1325                 "               ENDIF\n"
   1326                 "       ENDLOOP\n"
   1327                 "       UMUL TEMP[1].x, SV[0], SV[1]\n"
   1328                 "       UMUL TEMP[1].x, TEMP[1], IMM[1]\n"
   1329                 "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
   1330                 "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
   1331                 "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
   1332                 "       RET\n"
   1333                 "    ENDSUB\n";
   1334 
   1335         printf("- %s\n", __func__);
   1336 
   1337         init_prog(ctx, 256, 0, 0, src, NULL);
   1338         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
   1339                  4096, 0, test_default_init);
   1340         init_compute_resources(ctx, (int []) { 0, -1 });
   1341         launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
   1342         check_tex(ctx, 0, test_barrier_expect, NULL);
   1343         destroy_compute_resources(ctx);
   1344         destroy_tex(ctx);
   1345         destroy_prog(ctx);
   1346 }
   1347 
   1348 /* test_atom_ops */
   1349 static void test_atom_ops_init(void *p, int s, int x, int y)
   1350 {
   1351         *(uint32_t *)p = 0xbad;
   1352 }
   1353 
   1354 static void test_atom_ops_expect(void *p, int s, int x, int y)
   1355 {
   1356         switch (x) {
   1357         case 0:
   1358                 *(uint32_t *)p = 0xce6c8eef;
   1359                 break;
   1360         case 1:
   1361                 *(uint32_t *)p = 0xdeadbeef;
   1362                 break;
   1363         case 2:
   1364                 *(uint32_t *)p = 0x11111111;
   1365                 break;
   1366         case 3:
   1367                 *(uint32_t *)p = 0x10011001;
   1368                 break;
   1369         case 4:
   1370                 *(uint32_t *)p = 0xdfbdbfff;
   1371                 break;
   1372         case 5:
   1373                 *(uint32_t *)p = 0x11111111;
   1374                 break;
   1375         case 6:
   1376                 *(uint32_t *)p = 0x11111111;
   1377                 break;
   1378         case 7:
   1379                 *(uint32_t *)p = 0xdeadbeef;
   1380                 break;
   1381         case 8:
   1382                 *(uint32_t *)p = 0xdeadbeef;
   1383                 break;
   1384         case 9:
   1385                 *(uint32_t *)p = 0x11111111;
   1386                 break;
   1387         }
   1388 }
   1389 
   1390 static void test_atom_ops(struct context *ctx, bool global)
   1391 {
   1392         const char *src = "COMP\n"
   1393                 "#ifdef TARGET_GLOBAL\n"
   1394                 "#define target RES[0]\n"
   1395                 "#else\n"
   1396                 "#define target RLOCAL\n"
   1397                 "#endif\n"
   1398                 ""
   1399                 "DCL RES[0], BUFFER, RAW, WR\n"
   1400                 "#define threadid SV[0]\n"
   1401                 "DCL threadid, THREAD_ID[0]\n"
   1402                 ""
   1403                 "#define offset TEMP[0]\n"
   1404                 "DCL offset, LOCAL\n"
   1405                 "#define tmp TEMP[1]\n"
   1406                 "DCL tmp, LOCAL\n"
   1407                 ""
   1408                 "#define k0 IMM[0]\n"
   1409                 "IMM UINT32 { 0, 0, 0, 0 }\n"
   1410                 "#define k1 IMM[1]\n"
   1411                 "IMM UINT32 { 1, 0, 0, 0 }\n"
   1412                 "#define k2 IMM[2]\n"
   1413                 "IMM UINT32 { 2, 0, 0, 0 }\n"
   1414                 "#define k3 IMM[3]\n"
   1415                 "IMM UINT32 { 3, 0, 0, 0 }\n"
   1416                 "#define k4 IMM[4]\n"
   1417                 "IMM UINT32 { 4, 0, 0, 0 }\n"
   1418                 "#define k5 IMM[5]\n"
   1419                 "IMM UINT32 { 5, 0, 0, 0 }\n"
   1420                 "#define k6 IMM[6]\n"
   1421                 "IMM UINT32 { 6, 0, 0, 0 }\n"
   1422                 "#define k7 IMM[7]\n"
   1423                 "IMM UINT32 { 7, 0, 0, 0 }\n"
   1424                 "#define k8 IMM[8]\n"
   1425                 "IMM UINT32 { 8, 0, 0, 0 }\n"
   1426                 "#define k9 IMM[9]\n"
   1427                 "IMM UINT32 { 9, 0, 0, 0 }\n"
   1428                 "#define korig IMM[10].xxxx\n"
   1429                 "#define karg IMM[10].yyyy\n"
   1430                 "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n"
   1431                 "\n"
   1432                 "    BGNSUB\n"
   1433                 "       UMUL offset.x, threadid, k4\n"
   1434                 "       STORE target.x, offset, korig\n"
   1435                 "       USEQ tmp.x, threadid, k0\n"
   1436                 "       IF tmp\n"
   1437                 "               ATOMUADD tmp.x, target, offset, karg\n"
   1438                 "               ATOMUADD tmp.x, target, offset, tmp\n"
   1439                 "       ENDIF\n"
   1440                 "       USEQ tmp.x, threadid, k1\n"
   1441                 "       IF tmp\n"
   1442                 "               ATOMXCHG tmp.x, target, offset, karg\n"
   1443                 "               ATOMXCHG tmp.x, target, offset, tmp\n"
   1444                 "       ENDIF\n"
   1445                 "       USEQ tmp.x, threadid, k2\n"
   1446                 "       IF tmp\n"
   1447                 "               ATOMCAS tmp.x, target, offset, korig, karg\n"
   1448                 "               ATOMCAS tmp.x, target, offset, tmp, k0\n"
   1449                 "       ENDIF\n"
   1450                 "       USEQ tmp.x, threadid, k3\n"
   1451                 "       IF tmp\n"
   1452                 "               ATOMAND tmp.x, target, offset, karg\n"
   1453                 "               ATOMAND tmp.x, target, offset, tmp\n"
   1454                 "       ENDIF\n"
   1455                 "       USEQ tmp.x, threadid, k4\n"
   1456                 "       IF tmp\n"
   1457                 "               ATOMOR tmp.x, target, offset, karg\n"
   1458                 "               ATOMOR tmp.x, target, offset, tmp\n"
   1459                 "       ENDIF\n"
   1460                 "       USEQ tmp.x, threadid, k5\n"
   1461                 "       IF tmp\n"
   1462                 "               ATOMXOR tmp.x, target, offset, karg\n"
   1463                 "               ATOMXOR tmp.x, target, offset, tmp\n"
   1464                 "       ENDIF\n"
   1465                 "       USEQ tmp.x, threadid, k6\n"
   1466                 "       IF tmp\n"
   1467                 "               ATOMUMIN tmp.x, target, offset, karg\n"
   1468                 "               ATOMUMIN tmp.x, target, offset, tmp\n"
   1469                 "       ENDIF\n"
   1470                 "       USEQ tmp.x, threadid, k7\n"
   1471                 "       IF tmp\n"
   1472                 "               ATOMUMAX tmp.x, target, offset, karg\n"
   1473                 "               ATOMUMAX tmp.x, target, offset, tmp\n"
   1474                 "       ENDIF\n"
   1475                 "       USEQ tmp.x, threadid, k8\n"
   1476                 "       IF tmp\n"
   1477                 "               ATOMIMIN tmp.x, target, offset, karg\n"
   1478                 "               ATOMIMIN tmp.x, target, offset, tmp\n"
   1479                 "       ENDIF\n"
   1480                 "       USEQ tmp.x, threadid, k9\n"
   1481                 "       IF tmp\n"
   1482                 "               ATOMIMAX tmp.x, target, offset, karg\n"
   1483                 "               ATOMIMAX tmp.x, target, offset, tmp\n"
   1484                 "       ENDIF\n"
   1485                 "#ifdef TARGET_LOCAL\n"
   1486                 "       LOAD tmp.x, RLOCAL, offset\n"
   1487                 "       STORE RES[0].x, offset, tmp\n"
   1488                 "#endif\n"
   1489                 "       RET\n"
   1490                 "    ENDSUB\n";
   1491 
   1492         printf("- %s (%s)\n", __func__, global ? "global" : "local");
   1493 
   1494         init_prog(ctx, 40, 0, 0, src,
   1495                   (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
   1496         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
   1497                  40, 0, test_atom_ops_init);
   1498         init_compute_resources(ctx, (int []) { 0, -1 });
   1499         launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
   1500         check_tex(ctx, 0, test_atom_ops_expect, NULL);
   1501         destroy_compute_resources(ctx);
   1502         destroy_tex(ctx);
   1503         destroy_prog(ctx);
   1504 }
   1505 
   1506 /* test_atom_race */
   1507 static void test_atom_race_expect(void *p, int s, int x, int y)
   1508 {
   1509         *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff;
   1510 }
   1511 
   1512 static void test_atom_race(struct context *ctx, bool global)
   1513 {
   1514         const char *src = "COMP\n"
   1515                 "#ifdef TARGET_GLOBAL\n"
   1516                 "#define target RES[0]\n"
   1517                 "#else\n"
   1518                 "#define target RLOCAL\n"
   1519                 "#endif\n"
   1520                 ""
   1521                 "DCL RES[0], BUFFER, RAW, WR\n"
   1522                 ""
   1523                 "#define blockid SV[0]\n"
   1524                 "DCL blockid, BLOCK_ID[0]\n"
   1525                 "#define blocksz SV[1]\n"
   1526                 "DCL blocksz, BLOCK_SIZE[0]\n"
   1527                 "#define threadid SV[2]\n"
   1528                 "DCL threadid, THREAD_ID[0]\n"
   1529                 ""
   1530                 "#define offset TEMP[0]\n"
   1531                 "DCL offset, LOCAL\n"
   1532                 "#define arg TEMP[1]\n"
   1533                 "DCL arg, LOCAL\n"
   1534                 "#define count TEMP[2]\n"
   1535                 "DCL count, LOCAL\n"
   1536                 "#define vlocal TEMP[3]\n"
   1537                 "DCL vlocal, LOCAL\n"
   1538                 "#define vshared TEMP[4]\n"
   1539                 "DCL vshared, LOCAL\n"
   1540                 "#define last TEMP[5]\n"
   1541                 "DCL last, LOCAL\n"
   1542                 "#define tmp0 TEMP[6]\n"
   1543                 "DCL tmp0, LOCAL\n"
   1544                 "#define tmp1 TEMP[7]\n"
   1545                 "DCL tmp1, LOCAL\n"
   1546                 ""
   1547                 "#define k0 IMM[0]\n"
   1548                 "IMM UINT32 { 0, 0, 0, 0 }\n"
   1549                 "#define k1 IMM[1]\n"
   1550                 "IMM UINT32 { 1, 0, 0, 0 }\n"
   1551                 "#define k4 IMM[2]\n"
   1552                 "IMM UINT32 { 4, 0, 0, 0 }\n"
   1553                 "#define k32 IMM[3]\n"
   1554                 "IMM UINT32 { 32, 0, 0, 0 }\n"
   1555                 "#define k128 IMM[4]\n"
   1556                 "IMM UINT32 { 128, 0, 0, 0 }\n"
   1557                 "#define kdeadcafe IMM[5]\n"
   1558                 "IMM UINT32 { 3735931646, 0, 0, 0 }\n"
   1559                 "#define kallowed_set IMM[6]\n"
   1560                 "IMM UINT32 { 559035650, 0, 0, 0 }\n"
   1561                 "#define k11111111 IMM[7]\n"
   1562                 "IMM UINT32 { 286331153, 0, 0, 0 }\n"
   1563                 "\n"
   1564                 "    BGNSUB\n"
   1565                 "       MOV offset.x, threadid\n"
   1566                 "#ifdef TARGET_GLOBAL\n"
   1567                 "       UMUL tmp0.x, blockid, blocksz\n"
   1568                 "       UADD offset.x, offset, tmp0\n"
   1569                 "#endif\n"
   1570                 "       UMUL offset.x, offset, k4\n"
   1571                 "       USLT tmp0.x, threadid, k32\n"
   1572                 "       STORE target.x, offset, k0\n"
   1573                 "       BARRIER\n"
   1574                 "       IF tmp0\n"
   1575                 "               MOV vlocal.x, k0\n"
   1576                 "               MOV arg.x, kdeadcafe\n"
   1577                 "               BGNLOOP\n"
   1578                 "                       INEG arg.x, arg\n"
   1579                 "                       ATOMUADD vshared.x, target, offset, arg\n"
   1580                 "                       SFENCE target\n"
   1581                 "                       USNE tmp0.x, vshared, vlocal\n"
   1582                 "                       IF tmp0\n"
   1583                 "                               BRK\n"
   1584                 "                       ENDIF\n"
   1585                 "                       UADD vlocal.x, vlocal, arg\n"
   1586                 "               ENDLOOP\n"
   1587                 "               UADD vlocal.x, vshared, arg\n"
   1588                 "               LOAD vshared.x, target, offset\n"
   1589                 "               USEQ tmp0.x, vshared, vlocal\n"
   1590                 "               STORE target.x, offset, tmp0\n"
   1591                 "       ELSE\n"
   1592                 "               UADD offset.x, offset, -k128\n"
   1593                 "               MOV count.x, k0\n"
   1594                 "               MOV last.x, k0\n"
   1595                 "               BGNLOOP\n"
   1596                 "                       LOAD vshared.x, target, offset\n"
   1597                 "                       USEQ tmp0.x, vshared, kallowed_set.xxxx\n"
   1598                 "                       USEQ tmp1.x, vshared, kallowed_set.yyyy\n"
   1599                 "                       OR tmp0.x, tmp0, tmp1\n"
   1600                 "                       IF tmp0\n"
   1601                 "                               USEQ tmp0.x, vshared, last\n"
   1602                 "                               IF tmp0\n"
   1603                 "                                       CONT\n"
   1604                 "                               ENDIF\n"
   1605                 "                               MOV last.x, vshared\n"
   1606                 "                       ELSE\n"
   1607                 "                               END\n"
   1608                 "                       ENDIF\n"
   1609                 "                       UADD count.x, count, k1\n"
   1610                 "                       USEQ tmp0.x, count, k128\n"
   1611                 "                       IF tmp0\n"
   1612                 "                               BRK\n"
   1613                 "                       ENDIF\n"
   1614                 "               ENDLOOP\n"
   1615                 "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
   1616                 "               UADD offset.x, offset, k128\n"
   1617                 "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
   1618                 "               SFENCE target\n"
   1619                 "       ENDIF\n"
   1620                 "#ifdef TARGET_LOCAL\n"
   1621                 "       LOAD tmp0.x, RLOCAL, offset\n"
   1622                 "       UMUL tmp1.x, blockid, blocksz\n"
   1623                 "       UMUL tmp1.x, tmp1, k4\n"
   1624                 "       UADD offset.x, offset, tmp1\n"
   1625                 "       STORE RES[0].x, offset, tmp0\n"
   1626                 "#endif\n"
   1627                 "       RET\n"
   1628                 "    ENDSUB\n";
   1629 
   1630         printf("- %s (%s)\n", __func__, global ? "global" : "local");
   1631 
   1632         init_prog(ctx, 256, 0, 0, src,
   1633                   (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
   1634         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
   1635                  4096, 0, test_default_init);
   1636         init_compute_resources(ctx, (int []) { 0, -1 });
   1637         launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
   1638         check_tex(ctx, 0, test_atom_race_expect, NULL);
   1639         destroy_compute_resources(ctx);
   1640         destroy_tex(ctx);
   1641         destroy_prog(ctx);
   1642 }
   1643 
   1644 int main(int argc, char *argv[])
   1645 {
   1646         struct context *ctx = CALLOC_STRUCT(context);
   1647 
   1648         unsigned tests = (argc > 1) ? strtoul(argv[1], NULL, 0) : ~0;
   1649 
   1650         init_ctx(ctx);
   1651 
   1652         if (tests & (1 << 0))
   1653            test_system_values(ctx);
   1654         if (tests & (1 << 1))
   1655            test_resource_access(ctx);
   1656         if (tests & (1 << 2))
   1657            test_function_calls(ctx);
   1658         if (tests & (1 << 3))
   1659            test_input_global(ctx);
   1660         if (tests & (1 << 4))
   1661            test_private(ctx);
   1662         if (tests & (1 << 5))
   1663            test_local(ctx);
   1664         if (tests & (1 << 6))
   1665            test_sample(ctx);
   1666         if (tests & (1 << 7))
   1667            test_many_kern(ctx);
   1668         if (tests & (1 << 8))
   1669            test_constant(ctx);
   1670         if (tests & (1 << 9))
   1671            test_resource_indirect(ctx);
   1672         if (tests & (1 << 10))
   1673            test_surface_ld(ctx);
   1674         if (tests & (1 << 11))
   1675            test_surface_st(ctx);
   1676         if (tests & (1 << 12))
   1677            test_barrier(ctx);
   1678         if (tests & (1 << 13))
   1679            test_atom_ops(ctx, true);
   1680         if (tests & (1 << 14))
   1681            test_atom_race(ctx, true);
   1682         if (tests & (1 << 15))
   1683            test_atom_ops(ctx, false);
   1684         if (tests & (1 << 16))
   1685            test_atom_race(ctx, false);
   1686 
   1687         destroy_ctx(ctx);
   1688 
   1689         return 0;
   1690 }
   1691