Home | History | Annotate | Download | only in trivial
      1 /*
      2  * Copyright (C) 2011 Francisco Jerez.
      3  * All Rights Reserved.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining
      6  * a copy of this software and associated documentation files (the
      7  * "Software"), to deal in the Software without restriction, including
      8  * without limitation the rights to use, copy, modify, merge, publish,
      9  * distribute, sublicense, and/or sell copies of the Software, and to
     10  * permit persons to whom the Software is furnished to do so, subject to
     11  * the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the
     14  * next paragraph) shall be included in all copies or substantial
     15  * portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  *
     25  */
     26 
     27 #include <fcntl.h>
     28 #include <stdio.h>
     29 #include <sys/stat.h>
     30 #include <inttypes.h>
     31 #include "pipe/p_state.h"
     32 #include "pipe/p_context.h"
     33 #include "pipe/p_screen.h"
     34 #include "pipe/p_defines.h"
     35 #include "pipe/p_shader_tokens.h"
     36 #include "util/u_memory.h"
     37 #include "util/u_inlines.h"
     38 #include "util/u_sampler.h"
     39 #include "util/u_format.h"
     40 #include "tgsi/tgsi_text.h"
     41 #include "pipe-loader/pipe_loader.h"
     42 
     43 #define MAX_RESOURCES 4
     44 
     45 struct context {
     46         struct pipe_loader_device *dev;
     47         struct pipe_screen *screen;
     48         struct pipe_context *pipe;
     49         void *hwcs;
     50         void *hwsmp[MAX_RESOURCES];
     51         struct pipe_resource *tex[MAX_RESOURCES];
     52         bool tex_rw[MAX_RESOURCES];
     53         struct pipe_sampler_view *view[MAX_RESOURCES];
     54         struct pipe_surface *surf[MAX_RESOURCES];
     55 };
     56 
     57 #define DUMP_COMPUTE_PARAM(p, c) do {                                   \
     58                 uint64_t __v[4];                                        \
     59                 int __i, __n;                                           \
     60                                                                         \
     61                 __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \
     62                 printf("%s: {", #c);                                    \
     63                                                                         \
     64                 for (__i = 0; __i < __n / sizeof(*__v); ++__i)          \
     65                         printf(" %"PRIu64, __v[__i]);                   \
     66                                                                         \
     67                 printf(" }\n");                                         \
     68         } while (0)
     69 
     70 static void init_ctx(struct context *ctx)
     71 {
     72         int ret;
     73 
     74         ret = pipe_loader_probe(&ctx->dev, 1);
     75         assert(ret);
     76 
     77         ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR);
     78         assert(ctx->screen);
     79 
     80         ctx->pipe = ctx->screen->context_create(ctx->screen, NULL);
     81         assert(ctx->pipe);
     82 
     83         DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION);
     84         DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE);
     85         DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
     86 }
     87 
     88 static void destroy_ctx(struct context *ctx)
     89 {
     90         ctx->pipe->destroy(ctx->pipe);
     91         ctx->screen->destroy(ctx->screen);
     92         pipe_loader_release(&ctx->dev, 1);
     93         FREE(ctx);
     94 }
     95 
     96 static char *
     97 preprocess_prog(struct context *ctx, const char *src, const char *defs)
     98 {
     99         const char header[] =
    100                 "#define RGLOBAL        RES[32767]\n"
    101                 "#define RLOCAL         RES[32766]\n"
    102                 "#define RPRIVATE       RES[32765]\n"
    103                 "#define RINPUT         RES[32764]\n";
    104         char cmd[512];
    105         char tmp[] = "/tmp/test-compute.tgsi-XXXXXX";
    106         char *buf;
    107         int fd, ret;
    108         struct stat st;
    109         FILE *p;
    110 
    111         /* Open a temporary file */
    112         fd = mkstemp(tmp);
    113         assert(fd >= 0);
    114         snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s",
    115                  defs ? defs : "", tmp);
    116 
    117         /* Preprocess */
    118         p = popen(cmd, "w");
    119         fwrite(header, strlen(header), 1, p);
    120         fwrite(src, strlen(src), 1, p);
    121         ret = pclose(p);
    122         assert(!ret);
    123 
    124         /* Read back */
    125         ret = fstat(fd, &st);
    126         assert(!ret);
    127 
    128         buf = malloc(st.st_size + 1);
    129         ret = read(fd, buf, st.st_size);
    130         assert(ret == st.st_size);
    131         buf[ret] = 0;
    132 
    133         /* Clean up */
    134         close(fd);
    135         unlink(tmp);
    136 
    137         return buf;
    138 }
    139 
    140 static void init_prog(struct context *ctx, unsigned local_sz,
    141                       unsigned private_sz, unsigned input_sz,
    142                       const char *src, const char *defs)
    143 {
    144         struct pipe_context *pipe = ctx->pipe;
    145         struct tgsi_token prog[1024];
    146         struct pipe_compute_state cs = {
    147                 .prog = prog,
    148                 .req_local_mem = local_sz,
    149                 .req_private_mem = private_sz,
    150                 .req_input_mem = input_sz
    151         };
    152         char *psrc = preprocess_prog(ctx, src, defs);
    153         int ret;
    154 
    155         ret = tgsi_text_translate(psrc, prog, Elements(prog));
    156         assert(ret);
    157         free(psrc);
    158 
    159         ctx->hwcs = pipe->create_compute_state(pipe, &cs);
    160         assert(ctx->hwcs);
    161 
    162         pipe->bind_compute_state(pipe, ctx->hwcs);
    163 }
    164 
    165 static void destroy_prog(struct context *ctx)
    166 {
    167         struct pipe_context *pipe = ctx->pipe;
    168 
    169         pipe->delete_compute_state(pipe, ctx->hwcs);
    170         ctx->hwcs = NULL;
    171 }
    172 
    173 static void init_tex(struct context *ctx, int slot,
    174                      enum pipe_texture_target target, bool rw,
    175                      enum pipe_format format, int w, int h,
    176                      void (*init)(void *, int, int, int))
    177 {
    178         struct pipe_context *pipe = ctx->pipe;
    179         struct pipe_resource **tex = &ctx->tex[slot];
    180         struct pipe_resource ttex = {
    181                 .target = target,
    182                 .format = format,
    183                 .width0 = w,
    184                 .height0 = h,
    185                 .depth0 = 1,
    186                 .array_size = 1,
    187                 .bind = (PIPE_BIND_SAMPLER_VIEW |
    188                          PIPE_BIND_COMPUTE_RESOURCE |
    189                          PIPE_BIND_GLOBAL)
    190         };
    191         int dx = util_format_get_blocksize(format);
    192         int dy = util_format_get_stride(format, w);
    193         int nx = (target == PIPE_BUFFER ? (w / dx) :
    194                   util_format_get_nblocksx(format, w));
    195         int ny = (target == PIPE_BUFFER ? 1 :
    196                   util_format_get_nblocksy(format, h));
    197         struct pipe_transfer *xfer;
    198         char *map;
    199         int x, y;
    200 
    201         *tex = ctx->screen->resource_create(ctx->screen, &ttex);
    202         assert(*tex);
    203 
    204         xfer = pipe->get_transfer(pipe, *tex, 0, PIPE_TRANSFER_WRITE,
    205                                   &(struct pipe_box) { .width = w,
    206                                                   .height = h,
    207                                                   .depth = 1 });
    208         assert(xfer);
    209 
    210         map = pipe->transfer_map(pipe, xfer);
    211         assert(map);
    212 
    213         for (y = 0; y < ny; ++y) {
    214                 for (x = 0; x < nx; ++x) {
    215                         init(map + y * dy + x * dx, slot, x, y);
    216                 }
    217         }
    218 
    219         pipe->transfer_unmap(pipe, xfer);
    220         pipe->transfer_destroy(pipe, xfer);
    221 
    222         ctx->tex_rw[slot] = rw;
    223 }
    224 
    225 static bool default_check(void *x, void *y, int sz) {
    226         return !memcmp(x, y, sz);
    227 }
    228 
    229 static void check_tex(struct context *ctx, int slot,
    230                       void (*expect)(void *, int, int, int),
    231                       bool (*check)(void *, void *, int))
    232 {
    233         struct pipe_context *pipe = ctx->pipe;
    234         struct pipe_resource *tex = ctx->tex[slot];
    235         int dx = util_format_get_blocksize(tex->format);
    236         int dy = util_format_get_stride(tex->format, tex->width0);
    237         int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) :
    238                   util_format_get_nblocksx(tex->format, tex->width0));
    239         int ny = (tex->target == PIPE_BUFFER ? 1 :
    240                   util_format_get_nblocksy(tex->format, tex->height0));
    241         struct pipe_transfer *xfer;
    242         char *map;
    243         int x, y, i;
    244         int err = 0;
    245 
    246         if (!check)
    247                 check = default_check;
    248 
    249         xfer = pipe->get_transfer(pipe, tex, 0, PIPE_TRANSFER_READ,
    250                                   &(struct pipe_box) { .width = tex->width0,
    251                                         .height = tex->height0,
    252                                         .depth = 1 });
    253         assert(xfer);
    254 
    255         map = pipe->transfer_map(pipe, xfer);
    256         assert(map);
    257 
    258         for (y = 0; y < ny; ++y) {
    259                 for (x = 0; x < nx; ++x) {
    260                         uint32_t exp[4];
    261                         uint32_t *res = (uint32_t *)(map + y * dy + x * dx);
    262 
    263                         expect(exp, slot, x, y);
    264                         if (check(res, exp, dx) || (++err) > 20)
    265                                 continue;
    266 
    267                         if (dx < 4) {
    268                                 uint32_t u = 0, v = 0;
    269 
    270                                 for (i = 0; i < dx; i++) {
    271                                         u |= ((uint8_t *)exp)[i] << (8 * i);
    272                                         v |= ((uint8_t *)res)[i] << (8 * i);
    273                                 }
    274                                 printf("(%d, %d): got 0x%x, expected 0x%x\n",
    275                                        x, y, v, u);
    276                         } else {
    277                                 for (i = 0; i < dx / 4; i++) {
    278                                         printf("(%d, %d)[%d]: got 0x%x/%f,"
    279                                                " expected 0x%x/%f\n", x, y, i,
    280                                                res[i], ((float *)res)[i],
    281                                                exp[i], ((float *)exp)[i]);
    282                                 }
    283                         }
    284                 }
    285         }
    286 
    287         pipe->transfer_unmap(pipe, xfer);
    288         pipe->transfer_destroy(pipe, xfer);
    289 
    290         if (err)
    291                 printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err);
    292         else
    293                 printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y);
    294 }
    295 
    296 static void destroy_tex(struct context *ctx)
    297 {
    298         int i;
    299 
    300         for (i = 0; i < MAX_RESOURCES; ++i) {
    301                 if (ctx->tex[i])
    302                         pipe_resource_reference(&ctx->tex[i], NULL);
    303         }
    304 }
    305 
    306 static void init_sampler_views(struct context *ctx, const int *slots)
    307 {
    308         struct pipe_context *pipe = ctx->pipe;
    309         struct pipe_sampler_view tview;
    310         int i;
    311 
    312         for (i = 0; *slots >= 0; ++i, ++slots) {
    313                 u_sampler_view_default_template(&tview, ctx->tex[*slots],
    314                                                 ctx->tex[*slots]->format);
    315 
    316                 ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots],
    317                                                          &tview);
    318                 assert(ctx->view[i]);
    319         }
    320 
    321         pipe->set_compute_sampler_views(pipe, 0, i, ctx->view);
    322 }
    323 
    324 static void destroy_sampler_views(struct context *ctx)
    325 {
    326         struct pipe_context *pipe = ctx->pipe;
    327         int i;
    328 
    329         pipe->set_compute_sampler_views(pipe, 0, MAX_RESOURCES, NULL);
    330 
    331         for (i = 0; i < MAX_RESOURCES; ++i) {
    332                 if (ctx->view[i]) {
    333                         pipe->sampler_view_destroy(pipe, ctx->view[i]);
    334                         ctx->view[i] = NULL;
    335                 }
    336         }
    337 }
    338 
    339 static void init_compute_resources(struct context *ctx, const int *slots)
    340 {
    341         struct pipe_context *pipe = ctx->pipe;
    342         int i;
    343 
    344         for (i = 0; *slots >= 0; ++i, ++slots) {
    345                 struct pipe_surface tsurf = {
    346                         .format = ctx->tex[*slots]->format,
    347                         .usage = ctx->tex[*slots]->bind,
    348                         .writable = ctx->tex_rw[*slots]
    349                 };
    350 
    351                 if (ctx->tex[*slots]->target == PIPE_BUFFER)
    352                         tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1;
    353 
    354                 ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots],
    355                                                     &tsurf);
    356                 assert(ctx->surf[i]);
    357         }
    358 
    359         pipe->set_compute_resources(pipe, 0, i, ctx->surf);
    360 }
    361 
    362 static void destroy_compute_resources(struct context *ctx)
    363 {
    364         struct pipe_context *pipe = ctx->pipe;
    365         int i;
    366 
    367         pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL);
    368 
    369         for (i = 0; i < MAX_RESOURCES; ++i) {
    370                 if (ctx->surf[i]) {
    371                         pipe->surface_destroy(pipe, ctx->surf[i]);
    372                         ctx->surf[i] = NULL;
    373                 }
    374         }
    375 }
    376 
    377 static void init_sampler_states(struct context *ctx, int n)
    378 {
    379         struct pipe_context *pipe = ctx->pipe;
    380         struct pipe_sampler_state smp = {
    381                 .normalized_coords = 1,
    382         };
    383         int i;
    384 
    385         for (i = 0; i < n; ++i) {
    386                 ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp);
    387                 assert(ctx->hwsmp[i]);
    388         }
    389 
    390         pipe->bind_compute_sampler_states(pipe, 0, i, ctx->hwsmp);
    391 }
    392 
    393 static void destroy_sampler_states(struct context *ctx)
    394 {
    395         struct pipe_context *pipe = ctx->pipe;
    396         int i;
    397 
    398         pipe->bind_compute_sampler_states(pipe, 0, MAX_RESOURCES, NULL);
    399 
    400         for (i = 0; i < MAX_RESOURCES; ++i) {
    401                 if (ctx->hwsmp[i]) {
    402                         pipe->delete_sampler_state(pipe, ctx->hwsmp[i]);
    403                         ctx->hwsmp[i] = NULL;
    404                 }
    405         }
    406 }
    407 
    408 static void init_globals(struct context *ctx, const int *slots,
    409                          uint32_t **handles)
    410 {
    411         struct pipe_context *pipe = ctx->pipe;
    412         struct pipe_resource *res[MAX_RESOURCES];
    413         int i;
    414 
    415         for (i = 0; *slots >= 0; ++i, ++slots)
    416                 res[i] = ctx->tex[*slots];
    417 
    418         pipe->set_global_binding(pipe, 0, i, res, handles);
    419 }
    420 
    421 static void destroy_globals(struct context *ctx)
    422 {
    423         struct pipe_context *pipe = ctx->pipe;
    424 
    425         pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL);
    426 }
    427 
    428 static void launch_grid(struct context *ctx, const uint *block_layout,
    429                         const uint *grid_layout, uint32_t pc,
    430                         const void *input)
    431 {
    432         struct pipe_context *pipe = ctx->pipe;
    433 
    434         pipe->launch_grid(pipe, block_layout, grid_layout, pc, input);
    435 }
    436 
    437 static void test_system_values(struct context *ctx)
    438 {
    439         const char *src = "COMP\n"
    440                 "DCL RES[0], BUFFER, RAW, WR\n"
    441                 "DCL SV[0], BLOCK_ID[0]\n"
    442                 "DCL SV[1], BLOCK_SIZE[0]\n"
    443                 "DCL SV[2], GRID_SIZE[0]\n"
    444                 "DCL SV[3], THREAD_ID[0]\n"
    445                 "DCL TEMP[0], LOCAL\n"
    446                 "DCL TEMP[1], LOCAL\n"
    447                 "IMM UINT32 { 64, 0, 0, 0 }\n"
    448                 "IMM UINT32 { 16, 0, 0, 0 }\n"
    449                 "IMM UINT32 { 0, 0, 0, 0 }\n"
    450                 "\n"
    451                 "BGNSUB"
    452                 "  UMUL TEMP[0], SV[0], SV[1]\n"
    453                 "  UADD TEMP[0], TEMP[0], SV[3]\n"
    454                 "  UMUL TEMP[1], SV[1], SV[2]\n"
    455                 "  UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n"
    456                 "  UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n"
    457                 "  UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n"
    458                 "  UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n"
    459                 "  UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n"
    460                 "  UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
    461                 "  STORE RES[0].xyzw, TEMP[0], SV[0]\n"
    462                 "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
    463                 "  STORE RES[0].xyzw, TEMP[0], SV[1]\n"
    464                 "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
    465                 "  STORE RES[0].xyzw, TEMP[0], SV[2]\n"
    466                 "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
    467                 "  STORE RES[0].xyzw, TEMP[0], SV[3]\n"
    468                 "  RET\n"
    469                 "ENDSUB\n";
    470         void init(void *p, int s, int x, int y) {
    471                 *(uint32_t *)p = 0xdeadbeef;
    472         }
    473         void expect(void *p, int s, int x, int y) {
    474                 int id = x / 16, sv = (x % 16) / 4, c = x % 4;
    475                 int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 };
    476                 int bsz[] = { 4, 3, 5, 1};
    477                 int gsz[] = { 5, 4, 1, 1};
    478 
    479                 switch (sv) {
    480                 case 0:
    481                         *(uint32_t *)p = tid[c] / bsz[c];
    482                         break;
    483                 case 1:
    484                         *(uint32_t *)p = bsz[c];
    485                         break;
    486                 case 2:
    487                         *(uint32_t *)p = gsz[c];
    488                         break;
    489                 case 3:
    490                         *(uint32_t *)p = tid[c] % bsz[c];
    491                         break;
    492                 }
    493         }
    494 
    495         printf("- %s\n", __func__);
    496 
    497         init_prog(ctx, 0, 0, 0, src, NULL);
    498         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    499                  76800, 0, init);
    500         init_compute_resources(ctx, (int []) { 0, -1 });
    501         launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL);
    502         check_tex(ctx, 0, expect, NULL);
    503         destroy_compute_resources(ctx);
    504         destroy_tex(ctx);
    505         destroy_prog(ctx);
    506 }
    507 
    508 static void test_resource_access(struct context *ctx)
    509 {
    510         const char *src = "COMP\n"
    511                 "DCL RES[0], BUFFER, RAW, WR\n"
    512                 "DCL RES[1], 2D, RAW, WR\n"
    513                 "DCL SV[0], BLOCK_ID[0]\n"
    514                 "DCL TEMP[0], LOCAL\n"
    515                 "DCL TEMP[1], LOCAL\n"
    516                 "IMM UINT32 { 15, 0, 0, 0 }\n"
    517                 "IMM UINT32 { 16, 1, 0, 0 }\n"
    518                 "\n"
    519                 "    BGNSUB\n"
    520                 "       UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n"
    521                 "       AND TEMP[0].x, TEMP[0], IMM[0]\n"
    522                 "       UMUL TEMP[0].x, TEMP[0], IMM[1]\n"
    523                 "       LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n"
    524                 "       UMUL TEMP[1], SV[0], IMM[1]\n"
    525                 "       STORE RES[1].xyzw, TEMP[1], TEMP[0]\n"
    526                 "       RET\n"
    527                 "    ENDSUB\n";
    528         void init0(void *p, int s, int x, int y) {
    529                 *(float *)p = 8.0 - (float)x;
    530         }
    531         void init1(void *p, int s, int x, int y) {
    532                 *(uint32_t *)p = 0xdeadbeef;
    533         }
    534         void expect(void *p, int s, int x, int y) {
    535                 *(float *)p = 8.0 - (float)((x + 4*y) & 0x3f);
    536         }
    537 
    538         printf("- %s\n", __func__);
    539 
    540         init_prog(ctx, 0, 0, 0, src, NULL);
    541         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    542                  256, 0, init0);
    543         init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
    544                  60, 12, init1);
    545         init_compute_resources(ctx, (int []) { 0, 1, -1 });
    546         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL);
    547         check_tex(ctx, 1, expect, NULL);
    548         destroy_compute_resources(ctx);
    549         destroy_tex(ctx);
    550         destroy_prog(ctx);
    551 }
    552 
    553 static void test_function_calls(struct context *ctx)
    554 {
    555         const char *src = "COMP\n"
    556                 "DCL RES[0], 2D, RAW, WR\n"
    557                 "DCL SV[0], BLOCK_ID[0]\n"
    558                 "DCL SV[1], BLOCK_SIZE[0]\n"
    559                 "DCL SV[2], GRID_SIZE[0]\n"
    560                 "DCL SV[3], THREAD_ID[0]\n"
    561                 "DCL TEMP[0]\n"
    562                 "DCL TEMP[1]\n"
    563                 "DCL TEMP[2], LOCAL\n"
    564                 "IMM UINT32 { 0, 11, 22, 33 }\n"
    565                 "IMM FLT32 { 11, 33, 55, 99 }\n"
    566                 "IMM UINT32 { 4, 1, 0, 0 }\n"
    567                 "IMM UINT32 { 12, 0, 0, 0 }\n"
    568                 "\n"
    569                 "00: BGNSUB\n"
    570                 "01:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
    571                 "02:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
    572                 "03:  USLT TEMP[0].x, TEMP[0], IMM[0]\n"
    573                 "04:  RET\n"
    574                 "05: ENDSUB\n"
    575                 "06: BGNSUB\n"
    576                 "07:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
    577                 "08:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
    578                 "09:  USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n"
    579                 "10:  IF TEMP[0].xxxx\n"
    580                 "11:   CAL :0\n"
    581                 "12:  ENDIF\n"
    582                 "13:  RET\n"
    583                 "14: ENDSUB\n"
    584                 "15: BGNSUB\n"
    585                 "16:  UMUL TEMP[2], SV[0], SV[1]\n"
    586                 "17:  UADD TEMP[2], TEMP[2], SV[3]\n"
    587                 "18:  UMUL TEMP[2], TEMP[2], IMM[2]\n"
    588                 "00:  MOV TEMP[1].x, IMM[2].wwww\n"
    589                 "19:  LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n"
    590                 "20:  CAL :6\n"
    591                 "21:  STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n"
    592                 "22:  RET\n"
    593                 "23: ENDSUB\n";
    594         void init(void *p, int s, int x, int y) {
    595                 *(uint32_t *)p = 15 * y + x;
    596         }
    597         void expect(void *p, int s, int x, int y) {
    598                 *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ;
    599         }
    600 
    601         printf("- %s\n", __func__);
    602 
    603         init_prog(ctx, 0, 0, 0, src, NULL);
    604         init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
    605                  15, 12, init);
    606         init_compute_resources(ctx, (int []) { 0, -1 });
    607         launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL);
    608         check_tex(ctx, 0, expect, NULL);
    609         destroy_compute_resources(ctx);
    610         destroy_tex(ctx);
    611         destroy_prog(ctx);
    612 }
    613 
    614 static void test_input_global(struct context *ctx)
    615 {
    616         const char *src = "COMP\n"
    617                 "DCL SV[0], THREAD_ID[0]\n"
    618                 "DCL TEMP[0], LOCAL\n"
    619                 "DCL TEMP[1], LOCAL\n"
    620                 "IMM UINT32 { 8, 0, 0, 0 }\n"
    621                 "\n"
    622                 "    BGNSUB\n"
    623                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
    624                 "       LOAD TEMP[1].xy, RINPUT, TEMP[0]\n"
    625                 "       LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n"
    626                 "       UADD TEMP[1].x, TEMP[0], -TEMP[1]\n"
    627                 "       STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n"
    628                 "       RET\n"
    629                 "    ENDSUB\n";
    630         void init(void *p, int s, int x, int y) {
    631                 *(uint32_t *)p = 0xdeadbeef;
    632         }
    633         void expect(void *p, int s, int x, int y) {
    634                 *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0);
    635         }
    636         uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004,
    637                               0x10005, 0x10006, 0x10007, 0x10008 };
    638 
    639         printf("- %s\n", __func__);
    640 
    641         init_prog(ctx, 0, 0, 32, src, NULL);
    642         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
    643         init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
    644         init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
    645         init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
    646         init_globals(ctx, (int []){ 0, 1, 2, 3, -1 },
    647                      (uint32_t *[]){ &input[1], &input[3],
    648                                      &input[5], &input[7] });
    649         launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input);
    650         check_tex(ctx, 0, expect, NULL);
    651         check_tex(ctx, 1, expect, NULL);
    652         check_tex(ctx, 2, expect, NULL);
    653         check_tex(ctx, 3, expect, NULL);
    654         destroy_globals(ctx);
    655         destroy_tex(ctx);
    656         destroy_prog(ctx);
    657 }
    658 
    659 static void test_private(struct context *ctx)
    660 {
    661         const char *src = "COMP\n"
    662                 "DCL RES[0], BUFFER, RAW, WR\n"
    663                 "DCL SV[0], BLOCK_ID[0]\n"
    664                 "DCL SV[1], BLOCK_SIZE[0]\n"
    665                 "DCL SV[2], THREAD_ID[0]\n"
    666                 "DCL TEMP[0], LOCAL\n"
    667                 "DCL TEMP[1], LOCAL\n"
    668                 "DCL TEMP[2], LOCAL\n"
    669                 "IMM UINT32 { 128, 0, 0, 0 }\n"
    670                 "IMM UINT32 { 4, 0, 0, 0 }\n"
    671                 "\n"
    672                 "    BGNSUB\n"
    673                 "       UMUL TEMP[0].x, SV[0], SV[1]\n"
    674                 "       UADD TEMP[0].x, TEMP[0], SV[2]\n"
    675                 "       MOV TEMP[1].x, IMM[0].wwww\n"
    676                 "       BGNLOOP\n"
    677                 "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
    678                 "               IF TEMP[2]\n"
    679                 "                       BRK\n"
    680                 "               ENDIF\n"
    681                 "               UDIV TEMP[2].x, TEMP[1], IMM[1]\n"
    682                 "               UADD TEMP[2].x, TEMP[2], TEMP[0]\n"
    683                 "               STORE RPRIVATE.x, TEMP[1], TEMP[2]\n"
    684                 "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
    685                 "       ENDLOOP\n"
    686                 "       MOV TEMP[1].x, IMM[0].wwww\n"
    687                 "       UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
    688                 "       BGNLOOP\n"
    689                 "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
    690                 "               IF TEMP[2]\n"
    691                 "                       BRK\n"
    692                 "               ENDIF\n"
    693                 "               LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n"
    694                 "               STORE RES[0].x, TEMP[0], TEMP[2]\n"
    695                 "               UADD TEMP[0].x, TEMP[0], IMM[1]\n"
    696                 "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
    697                 "       ENDLOOP\n"
    698                 "       RET\n"
    699                 "    ENDSUB\n";
    700         void init(void *p, int s, int x, int y) {
    701                 *(uint32_t *)p = 0xdeadbeef;
    702         }
    703         void expect(void *p, int s, int x, int y) {
    704                 *(uint32_t *)p = (x / 32) + x % 32;
    705         }
    706 
    707         printf("- %s\n", __func__);
    708 
    709         init_prog(ctx, 0, 128, 0, src, NULL);
    710         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    711                  32768, 0, init);
    712         init_compute_resources(ctx, (int []) { 0, -1 });
    713         launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
    714         check_tex(ctx, 0, expect, NULL);
    715         destroy_compute_resources(ctx);
    716         destroy_tex(ctx);
    717         destroy_prog(ctx);
    718 }
    719 
    720 static void test_local(struct context *ctx)
    721 {
    722         const char *src = "COMP\n"
    723                 "DCL RES[0], BUFFER, RAW, WR\n"
    724                 "DCL SV[0], BLOCK_ID[0]\n"
    725                 "DCL SV[1], BLOCK_SIZE[0]\n"
    726                 "DCL SV[2], THREAD_ID[0]\n"
    727                 "DCL TEMP[0], LOCAL\n"
    728                 "DCL TEMP[1], LOCAL\n"
    729                 "DCL TEMP[2], LOCAL\n"
    730                 "IMM UINT32 { 1, 0, 0, 0 }\n"
    731                 "IMM UINT32 { 2, 0, 0, 0 }\n"
    732                 "IMM UINT32 { 4, 0, 0, 0 }\n"
    733                 "IMM UINT32 { 32, 0, 0, 0 }\n"
    734                 "IMM UINT32 { 128, 0, 0, 0 }\n"
    735                 "\n"
    736                 "    BGNSUB\n"
    737                 "       UMUL TEMP[0].x, SV[2], IMM[2]\n"
    738                 "       STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n"
    739                 "       MFENCE RLOCAL\n"
    740                 "       USLT TEMP[1].x, SV[2], IMM[3]\n"
    741                 "       IF TEMP[1]\n"
    742                 "               UADD TEMP[1].x, TEMP[0], IMM[4]\n"
    743                 "               BGNLOOP\n"
    744                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
    745                 "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
    746                 "                       IF TEMP[2]\n"
    747                 "                               BRK\n"
    748                 "                       ENDIF\n"
    749                 "               ENDLOOP\n"
    750                 "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
    751                 "               MFENCE RLOCAL\n"
    752                 "               BGNLOOP\n"
    753                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
    754                 "                       USEQ TEMP[2].x, TEMP[2], IMM[1]\n"
    755                 "                       IF TEMP[2]\n"
    756                 "                               BRK\n"
    757                 "                       ENDIF\n"
    758                 "               ENDLOOP\n"
    759                 "       ELSE\n"
    760                 "               UADD TEMP[1].x, TEMP[0], -IMM[4]\n"
    761                 "               BGNLOOP\n"
    762                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
    763                 "                       USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n"
    764                 "                       IF TEMP[2]\n"
    765                 "                               BRK\n"
    766                 "                       ENDIF\n"
    767                 "               ENDLOOP\n"
    768                 "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
    769                 "               MFENCE RLOCAL\n"
    770                 "               BGNLOOP\n"
    771                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
    772                 "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
    773                 "                       IF TEMP[2]\n"
    774                 "                               BRK\n"
    775                 "                       ENDIF\n"
    776                 "               ENDLOOP\n"
    777                 "               STORE RLOCAL.x, TEMP[0], IMM[1]\n"
    778                 "               MFENCE RLOCAL\n"
    779                 "       ENDIF\n"
    780                 "       UMUL TEMP[1].x, SV[0], SV[1]\n"
    781                 "       UMUL TEMP[1].x, TEMP[1], IMM[2]\n"
    782                 "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
    783                 "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
    784                 "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
    785                 "       RET\n"
    786                 "    ENDSUB\n";
    787         void init(void *p, int s, int x, int y) {
    788                 *(uint32_t *)p = 0xdeadbeef;
    789         }
    790         void expect(void *p, int s, int x, int y) {
    791                 *(uint32_t *)p = x & 0x20 ? 2 : 1;
    792         }
    793 
    794         printf("- %s\n", __func__);
    795 
    796         init_prog(ctx, 256, 0, 0, src, NULL);
    797         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    798                  4096, 0, init);
    799         init_compute_resources(ctx, (int []) { 0, -1 });
    800         launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
    801         check_tex(ctx, 0, expect, NULL);
    802         destroy_compute_resources(ctx);
    803         destroy_tex(ctx);
    804         destroy_prog(ctx);
    805 }
    806 
    807 static void test_sample(struct context *ctx)
    808 {
    809         const char *src = "COMP\n"
    810                 "DCL SVIEW[0], 2D, FLOAT\n"
    811                 "DCL RES[0], 2D, RAW, WR\n"
    812                 "DCL SAMP[0]\n"
    813                 "DCL SV[0], BLOCK_ID[0]\n"
    814                 "DCL TEMP[0], LOCAL\n"
    815                 "DCL TEMP[1], LOCAL\n"
    816                 "IMM UINT32 { 16, 1, 0, 0 }\n"
    817                 "IMM FLT32 { 128, 32, 0, 0 }\n"
    818                 "\n"
    819                 "    BGNSUB\n"
    820                 "       I2F TEMP[1], SV[0]\n"
    821                 "       DIV TEMP[1], TEMP[1], IMM[1]\n"
    822                 "       SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n"
    823                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
    824                 "       STORE RES[0].xyzw, TEMP[0], TEMP[1]\n"
    825                 "       RET\n"
    826                 "    ENDSUB\n";
    827         void init(void *p, int s, int x, int y) {
    828                 *(float *)p = s ? 1 : x * y;
    829         }
    830         void expect(void *p, int s, int x, int y) {
    831                 switch (x % 4) {
    832                 case 0:
    833                         *(float *)p = x / 4 * y;
    834                         break;
    835                 case 1:
    836                 case 2:
    837                         *(float *)p = 0;
    838                         break;
    839                 case 3:
    840                         *(float *)p = 1;
    841                         break;
    842                 }
    843         }
    844 
    845         printf("- %s\n", __func__);
    846 
    847         init_prog(ctx, 0, 0, 0, src, NULL);
    848         init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
    849                  128, 32, init);
    850         init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
    851                  512, 32, init);
    852         init_compute_resources(ctx, (int []) { 1, -1 });
    853         init_sampler_views(ctx, (int []) { 0, -1 });
    854         init_sampler_states(ctx, 2);
    855         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL);
    856         check_tex(ctx, 1, expect, NULL);
    857         destroy_sampler_states(ctx);
    858         destroy_sampler_views(ctx);
    859         destroy_compute_resources(ctx);
    860         destroy_tex(ctx);
    861         destroy_prog(ctx);
    862 }
    863 
    864 static void test_many_kern(struct context *ctx)
    865 {
    866         const char *src = "COMP\n"
    867                 "DCL RES[0], BUFFER, RAW, WR\n"
    868                 "DCL TEMP[0], LOCAL\n"
    869                 "IMM UINT32 { 0, 1, 2, 3 }\n"
    870                 "IMM UINT32 { 4, 0, 0, 0 }\n"
    871                 "\n"
    872                 "    BGNSUB\n"
    873                 "       UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n"
    874                 "       STORE RES[0].x, TEMP[0], IMM[0].xxxx\n"
    875                 "       RET\n"
    876                 "    ENDSUB\n"
    877                 "    BGNSUB\n"
    878                 "       UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n"
    879                 "       STORE RES[0].x, TEMP[0], IMM[0].yyyy\n"
    880                 "       RET\n"
    881                 "    ENDSUB\n"
    882                 "    BGNSUB\n"
    883                 "       UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n"
    884                 "       STORE RES[0].x, TEMP[0], IMM[0].zzzz\n"
    885                 "       RET\n"
    886                 "    ENDSUB\n"
    887                 "    BGNSUB\n"
    888                 "       UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n"
    889                 "       STORE RES[0].x, TEMP[0], IMM[0].wwww\n"
    890                 "       RET\n"
    891                 "    ENDSUB\n";
    892         void init(void *p, int s, int x, int y) {
    893                 *(uint32_t *)p = 0xdeadbeef;
    894         }
    895         void expect(void *p, int s, int x, int y) {
    896                 *(uint32_t *)p = x;
    897         }
    898 
    899         printf("- %s\n", __func__);
    900 
    901         init_prog(ctx, 0, 0, 0, src, NULL);
    902         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    903                  16, 0, init);
    904         init_compute_resources(ctx, (int []) { 0, -1 });
    905         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
    906         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL);
    907         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL);
    908         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL);
    909         check_tex(ctx, 0, expect, NULL);
    910         destroy_compute_resources(ctx);
    911         destroy_tex(ctx);
    912         destroy_prog(ctx);
    913 }
    914 
    915 static void test_constant(struct context *ctx)
    916 {
    917         const char *src = "COMP\n"
    918                 "DCL RES[0], BUFFER, RAW\n"
    919                 "DCL RES[1], BUFFER, RAW, WR\n"
    920                 "DCL SV[0], BLOCK_ID[0]\n"
    921                 "DCL TEMP[0], LOCAL\n"
    922                 "DCL TEMP[1], LOCAL\n"
    923                 "IMM UINT32 { 4, 0, 0, 0 }\n"
    924                 "\n"
    925                 "    BGNSUB\n"
    926                 "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
    927                 "       LOAD TEMP[1].x, RES[0], TEMP[0]\n"
    928                 "       STORE RES[1].x, TEMP[0], TEMP[1]\n"
    929                 "       RET\n"
    930                 "    ENDSUB\n";
    931         void init(void *p, int s, int x, int y) {
    932                 *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x;
    933         }
    934         void expect(void *p, int s, int x, int y) {
    935                 *(float *)p = 8.0 - (float)x;
    936         }
    937 
    938         printf("- %s\n", __func__);
    939 
    940         init_prog(ctx, 0, 0, 0, src, NULL);
    941         init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
    942                  256, 0, init);
    943         init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    944                  256, 0, init);
    945         init_compute_resources(ctx, (int []) { 0, 1, -1 });
    946         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
    947         check_tex(ctx, 1, expect, NULL);
    948         destroy_compute_resources(ctx);
    949         destroy_tex(ctx);
    950         destroy_prog(ctx);
    951 }
    952 
    953 static void test_resource_indirect(struct context *ctx)
    954 {
    955         const char *src = "COMP\n"
    956                 "DCL RES[0], BUFFER, RAW, WR\n"
    957                 "DCL RES[1..3], BUFFER, RAW\n"
    958                 "DCL SV[0], BLOCK_ID[0]\n"
    959                 "DCL TEMP[0], LOCAL\n"
    960                 "DCL TEMP[1], LOCAL\n"
    961                 "IMM UINT32 { 4, 0, 0, 0 }\n"
    962                 "\n"
    963                 "    BGNSUB\n"
    964                 "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
    965                 "       LOAD TEMP[1].x, RES[1], TEMP[0]\n"
    966                 "       LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n"
    967                 "       STORE RES[0].x, TEMP[0], TEMP[1]\n"
    968                 "       RET\n"
    969                 "    ENDSUB\n";
    970         void init(void *p, int s, int x, int y) {
    971                 *(uint32_t *)p = s == 0 ? 0xdeadbeef :
    972                    s == 1 ? x % 2 :
    973                    s == 2 ? 2 * x :
    974                    2 * x + 1;
    975         }
    976         void expect(void *p, int s, int x, int y) {
    977            *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0);
    978         }
    979 
    980         printf("- %s\n", __func__);
    981 
    982         init_prog(ctx, 0, 0, 0, src, NULL);
    983         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
    984                  256, 0, init);
    985         init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
    986                  256, 0, init);
    987         init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
    988                  256, 0, init);
    989         init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
    990                  256, 0, init);
    991         init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 });
    992         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
    993         check_tex(ctx, 0, expect, NULL);
    994         destroy_compute_resources(ctx);
    995         destroy_tex(ctx);
    996         destroy_prog(ctx);
    997 }
    998 
    999 enum pipe_format surface_fmts[] = {
   1000         PIPE_FORMAT_B8G8R8A8_UNORM,
   1001         PIPE_FORMAT_B8G8R8X8_UNORM,
   1002         PIPE_FORMAT_A8R8G8B8_UNORM,
   1003         PIPE_FORMAT_X8R8G8B8_UNORM,
   1004         PIPE_FORMAT_X8R8G8B8_UNORM,
   1005         PIPE_FORMAT_L8_UNORM,
   1006         PIPE_FORMAT_A8_UNORM,
   1007         PIPE_FORMAT_I8_UNORM,
   1008         PIPE_FORMAT_L8A8_UNORM,
   1009         PIPE_FORMAT_R32_FLOAT,
   1010         PIPE_FORMAT_R32G32_FLOAT,
   1011         PIPE_FORMAT_R32G32B32A32_FLOAT,
   1012         PIPE_FORMAT_R32_UNORM,
   1013         PIPE_FORMAT_R32G32_UNORM,
   1014         PIPE_FORMAT_R32G32B32A32_UNORM,
   1015         PIPE_FORMAT_R32_SNORM,
   1016         PIPE_FORMAT_R32G32_SNORM,
   1017         PIPE_FORMAT_R32G32B32A32_SNORM,
   1018         PIPE_FORMAT_R8_UINT,
   1019         PIPE_FORMAT_R8G8_UINT,
   1020         PIPE_FORMAT_R8G8B8A8_UINT,
   1021         PIPE_FORMAT_R8_SINT,
   1022         PIPE_FORMAT_R8G8_SINT,
   1023         PIPE_FORMAT_R8G8B8A8_SINT,
   1024         PIPE_FORMAT_R32_UINT,
   1025         PIPE_FORMAT_R32G32_UINT,
   1026         PIPE_FORMAT_R32G32B32A32_UINT,
   1027         PIPE_FORMAT_R32_SINT,
   1028         PIPE_FORMAT_R32G32_SINT,
   1029         PIPE_FORMAT_R32G32B32A32_SINT
   1030 };
   1031 
   1032 static void test_surface_ld(struct context *ctx)
   1033 {
   1034         const char *src = "COMP\n"
   1035                 "DCL RES[0], 2D\n"
   1036                 "DCL RES[1], 2D, RAW, WR\n"
   1037                 "DCL SV[0], BLOCK_ID[0]\n"
   1038                 "DCL TEMP[0], LOCAL\n"
   1039                 "DCL TEMP[1], LOCAL\n"
   1040                 "IMM UINT32 { 16, 1, 0, 0 }\n"
   1041                 "\n"
   1042                 "    BGNSUB\n"
   1043                 "       LOAD TEMP[1], RES[0], SV[0]\n"
   1044                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
   1045                 "       STORE RES[1].xyzw, TEMP[0], TEMP[1]\n"
   1046                 "       RET\n"
   1047                 "    ENDSUB\n";
   1048         int i = 0;
   1049         void init0f(void *p, int s, int x, int y) {
   1050                 float v[] = { 1.0, -.75, .50, -.25 };
   1051                 util_format_write_4f(surface_fmts[i], v, 0,
   1052                                      p, 0, 0, 0, 1, 1);
   1053         }
   1054         void init0i(void *p, int s, int x, int y) {
   1055                 int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
   1056                 util_format_write_4i(surface_fmts[i], v, 0,
   1057                                      p, 0, 0, 0, 1, 1);
   1058         }
   1059         void init1(void *p, int s, int x, int y) {
   1060                 *(uint32_t *)p = 0xdeadbeef;
   1061         }
   1062         void expectf(void *p, int s, int x, int y) {
   1063                 float v[4], w[4];
   1064                 init0f(v, s, x / 4, y);
   1065                 util_format_read_4f(surface_fmts[i], w, 0,
   1066                                     v, 0, 0, 0, 1, 1);
   1067                 *(float *)p = w[x % 4];
   1068         }
   1069         void expecti(void *p, int s, int x, int y) {
   1070                 int32_t v[4], w[4];
   1071                 init0i(v, s, x / 4, y);
   1072                 util_format_read_4i(surface_fmts[i], w, 0,
   1073                                     v, 0, 0, 0, 1, 1);
   1074                 *(uint32_t *)p = w[x % 4];
   1075         }
   1076 
   1077         printf("- %s\n", __func__);
   1078 
   1079         init_prog(ctx, 0, 0, 0, src, NULL);
   1080 
   1081         for (i = 0; i < Elements(surface_fmts); i++) {
   1082                 bool is_int = util_format_is_pure_integer(surface_fmts[i]);
   1083 
   1084                 printf("   - %s\n", util_format_name(surface_fmts[i]));
   1085 
   1086                 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i],
   1087                          128, 32, (is_int ? init0i : init0f));
   1088                 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
   1089                          512, 32, init1);
   1090                 init_compute_resources(ctx, (int []) { 0, 1, -1 });
   1091                 init_sampler_states(ctx, 2);
   1092                 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
   1093                             NULL);
   1094                 check_tex(ctx, 1, (is_int ? expecti : expectf), NULL);
   1095                 destroy_sampler_states(ctx);
   1096                 destroy_compute_resources(ctx);
   1097                 destroy_tex(ctx);
   1098         }
   1099 
   1100         destroy_prog(ctx);
   1101 }
   1102 
   1103 static void test_surface_st(struct context *ctx)
   1104 {
   1105         const char *src = "COMP\n"
   1106                 "DCL RES[0], 2D, RAW\n"
   1107                 "DCL RES[1], 2D, WR\n"
   1108                 "DCL SV[0], BLOCK_ID[0]\n"
   1109                 "DCL TEMP[0], LOCAL\n"
   1110                 "DCL TEMP[1], LOCAL\n"
   1111                 "IMM UINT32 { 16, 1, 0, 0 }\n"
   1112                 "\n"
   1113                 "    BGNSUB\n"
   1114                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
   1115                 "       LOAD TEMP[1], RES[0], TEMP[0]\n"
   1116                 "       STORE RES[1], SV[0], TEMP[1]\n"
   1117                 "       RET\n"
   1118                 "    ENDSUB\n";
   1119         int i = 0;
   1120         void init0f(void *p, int s, int x, int y) {
   1121                 float v[] = { 1.0, -.75, 0.5, -.25 };
   1122                 *(float *)p = v[x % 4];
   1123         }
   1124         void init0i(void *p, int s, int x, int y) {
   1125                 int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
   1126                 *(int32_t *)p = v[x % 4];
   1127         }
   1128         void init1(void *p, int s, int x, int y) {
   1129                 memset(p, 1, util_format_get_blocksize(surface_fmts[i]));
   1130         }
   1131         void expectf(void *p, int s, int x, int y) {
   1132                 float vf[4];
   1133                 int j;
   1134 
   1135                 for (j = 0; j < 4; j++)
   1136                         init0f(&vf[j], s, 4 * x + j, y);
   1137                 util_format_write_4f(surface_fmts[i], vf, 0,
   1138                                      p, 0, 0, 0, 1, 1);
   1139         }
   1140         void expects(void *p, int s, int x, int y) {
   1141                 int32_t v[4];
   1142                 int j;
   1143 
   1144                 for (j = 0; j < 4; j++)
   1145                         init0i(&v[j], s, 4 * x + j, y);
   1146                 util_format_write_4i(surface_fmts[i], v, 0,
   1147                                      p, 0, 0, 0, 1, 1);
   1148         }
   1149         void expectu(void *p, int s, int x, int y) {
   1150                 uint32_t v[4];
   1151                 int j;
   1152 
   1153                 for (j = 0; j < 4; j++)
   1154                         init0i(&v[j], s, 4 * x + j, y);
   1155                 util_format_write_4ui(surface_fmts[i], v, 0,
   1156                                       p, 0, 0, 0, 1, 1);
   1157         }
   1158         bool check(void *x, void *y, int sz) {
   1159                 int j;
   1160 
   1161                 if (util_format_is_float(surface_fmts[i])) {
   1162                         return fabs(*(float *)x - *(float *)y) < 3.92156863e-3;
   1163 
   1164                 } else if ((sz % 4) == 0) {
   1165                         for (j = 0; j < sz / 4; j++)
   1166                                 if (abs(((uint32_t *)x)[j] -
   1167                                         ((uint32_t *)y)[j]) > 1)
   1168                                         return false;
   1169                         return true;
   1170                 } else {
   1171                         return !memcmp(x, y, sz);
   1172                 }
   1173         }
   1174 
   1175         printf("- %s\n", __func__);
   1176 
   1177         init_prog(ctx, 0, 0, 0, src, NULL);
   1178 
   1179         for (i = 0; i < Elements(surface_fmts); i++) {
   1180                 bool is_signed = (util_format_description(surface_fmts[i])
   1181                                   ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED);
   1182                 bool is_int = util_format_is_pure_integer(surface_fmts[i]);
   1183 
   1184                 printf("   - %s\n", util_format_name(surface_fmts[i]));
   1185 
   1186                 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
   1187                          512, 32, (is_int ? init0i : init0f));
   1188                 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i],
   1189                          128, 32, init1);
   1190                 init_compute_resources(ctx, (int []) { 0, 1, -1 });
   1191                 init_sampler_states(ctx, 2);
   1192                 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
   1193                             NULL);
   1194                 check_tex(ctx, 1, (is_int && is_signed ? expects :
   1195                                    is_int && !is_signed ? expectu :
   1196                                    expectf), check);
   1197                 destroy_sampler_states(ctx);
   1198                 destroy_compute_resources(ctx);
   1199                 destroy_tex(ctx);
   1200         }
   1201 
   1202         destroy_prog(ctx);
   1203 }
   1204 
   1205 static void test_barrier(struct context *ctx)
   1206 {
   1207         const char *src = "COMP\n"
   1208                 "DCL RES[0], BUFFER, RAW, WR\n"
   1209                 "DCL SV[0], BLOCK_ID[0]\n"
   1210                 "DCL SV[1], BLOCK_SIZE[0]\n"
   1211                 "DCL SV[2], THREAD_ID[0]\n"
   1212                 "DCL TEMP[0], LOCAL\n"
   1213                 "DCL TEMP[1], LOCAL\n"
   1214                 "DCL TEMP[2], LOCAL\n"
   1215                 "DCL TEMP[3], LOCAL\n"
   1216                 "IMM UINT32 { 1, 0, 0, 0 }\n"
   1217                 "IMM UINT32 { 4, 0, 0, 0 }\n"
   1218                 "IMM UINT32 { 32, 0, 0, 0 }\n"
   1219                 "\n"
   1220                 "    BGNSUB\n"
   1221                 "       UMUL TEMP[0].x, SV[2], IMM[1]\n"
   1222                 "       MOV TEMP[1].x, IMM[0].wwww\n"
   1223                 "       BGNLOOP\n"
   1224                 "               BARRIER\n"
   1225                 "               STORE RLOCAL.x, TEMP[0], TEMP[1]\n"
   1226                 "               BARRIER\n"
   1227                 "               MOV TEMP[2].x, IMM[0].wwww\n"
   1228                 "               BGNLOOP\n"
   1229                 "                       UMUL TEMP[3].x, TEMP[2], IMM[1]\n"
   1230                 "                       LOAD TEMP[3].x, RLOCAL, TEMP[3]\n"
   1231                 "                       USNE TEMP[3].x, TEMP[3], TEMP[1]\n"
   1232                 "                       IF TEMP[3]\n"
   1233                 "                               END\n"
   1234                 "                       ENDIF\n"
   1235                 "                       UADD TEMP[2].x, TEMP[2], IMM[0]\n"
   1236                 "                       USEQ TEMP[3].x, TEMP[2], SV[1]\n"
   1237                 "                       IF TEMP[3]\n"
   1238                 "                               BRK\n"
   1239                 "                       ENDIF\n"
   1240                 "               ENDLOOP\n"
   1241                 "               UADD TEMP[1].x, TEMP[1], IMM[0]\n"
   1242                 "               USEQ TEMP[2].x, TEMP[1], IMM[2]\n"
   1243                 "               IF TEMP[2]\n"
   1244                 "                       BRK\n"
   1245                 "               ENDIF\n"
   1246                 "       ENDLOOP\n"
   1247                 "       UMUL TEMP[1].x, SV[0], SV[1]\n"
   1248                 "       UMUL TEMP[1].x, TEMP[1], IMM[1]\n"
   1249                 "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
   1250                 "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
   1251                 "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
   1252                 "       RET\n"
   1253                 "    ENDSUB\n";
   1254         void init(void *p, int s, int x, int y) {
   1255                 *(uint32_t *)p = 0xdeadbeef;
   1256         }
   1257         void expect(void *p, int s, int x, int y) {
   1258                 *(uint32_t *)p = 31;
   1259         }
   1260 
   1261         printf("- %s\n", __func__);
   1262 
   1263         init_prog(ctx, 256, 0, 0, src, NULL);
   1264         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
   1265                  4096, 0, init);
   1266         init_compute_resources(ctx, (int []) { 0, -1 });
   1267         launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
   1268         check_tex(ctx, 0, expect, NULL);
   1269         destroy_compute_resources(ctx);
   1270         destroy_tex(ctx);
   1271         destroy_prog(ctx);
   1272 }
   1273 
   1274 static void test_atom_ops(struct context *ctx, bool global)
   1275 {
   1276         const char *src = "COMP\n"
   1277                 "#ifdef TARGET_GLOBAL\n"
   1278                 "#define target RES[0]\n"
   1279                 "#else\n"
   1280                 "#define target RLOCAL\n"
   1281                 "#endif\n"
   1282                 ""
   1283                 "DCL RES[0], BUFFER, RAW, WR\n"
   1284                 "#define threadid SV[0]\n"
   1285                 "DCL threadid, THREAD_ID[0]\n"
   1286                 ""
   1287                 "#define offset TEMP[0]\n"
   1288                 "DCL offset, LOCAL\n"
   1289                 "#define tmp TEMP[1]\n"
   1290                 "DCL tmp, LOCAL\n"
   1291                 ""
   1292                 "#define k0 IMM[0]\n"
   1293                 "IMM UINT32 { 0, 0, 0, 0 }\n"
   1294                 "#define k1 IMM[1]\n"
   1295                 "IMM UINT32 { 1, 0, 0, 0 }\n"
   1296                 "#define k2 IMM[2]\n"
   1297                 "IMM UINT32 { 2, 0, 0, 0 }\n"
   1298                 "#define k3 IMM[3]\n"
   1299                 "IMM UINT32 { 3, 0, 0, 0 }\n"
   1300                 "#define k4 IMM[4]\n"
   1301                 "IMM UINT32 { 4, 0, 0, 0 }\n"
   1302                 "#define k5 IMM[5]\n"
   1303                 "IMM UINT32 { 5, 0, 0, 0 }\n"
   1304                 "#define k6 IMM[6]\n"
   1305                 "IMM UINT32 { 6, 0, 0, 0 }\n"
   1306                 "#define k7 IMM[7]\n"
   1307                 "IMM UINT32 { 7, 0, 0, 0 }\n"
   1308                 "#define k8 IMM[8]\n"
   1309                 "IMM UINT32 { 8, 0, 0, 0 }\n"
   1310                 "#define k9 IMM[9]\n"
   1311                 "IMM UINT32 { 9, 0, 0, 0 }\n"
   1312                 "#define korig IMM[10].xxxx\n"
   1313                 "#define karg IMM[10].yyyy\n"
   1314                 "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n"
   1315                 "\n"
   1316                 "    BGNSUB\n"
   1317                 "       UMUL offset.x, threadid, k4\n"
   1318                 "       STORE target.x, offset, korig\n"
   1319                 "       USEQ tmp.x, threadid, k0\n"
   1320                 "       IF tmp\n"
   1321                 "               ATOMUADD tmp.x, target, offset, karg\n"
   1322                 "               ATOMUADD tmp.x, target, offset, tmp\n"
   1323                 "       ENDIF\n"
   1324                 "       USEQ tmp.x, threadid, k1\n"
   1325                 "       IF tmp\n"
   1326                 "               ATOMXCHG tmp.x, target, offset, karg\n"
   1327                 "               ATOMXCHG tmp.x, target, offset, tmp\n"
   1328                 "       ENDIF\n"
   1329                 "       USEQ tmp.x, threadid, k2\n"
   1330                 "       IF tmp\n"
   1331                 "               ATOMCAS tmp.x, target, offset, korig, karg\n"
   1332                 "               ATOMCAS tmp.x, target, offset, tmp, k0\n"
   1333                 "       ENDIF\n"
   1334                 "       USEQ tmp.x, threadid, k3\n"
   1335                 "       IF tmp\n"
   1336                 "               ATOMAND tmp.x, target, offset, karg\n"
   1337                 "               ATOMAND tmp.x, target, offset, tmp\n"
   1338                 "       ENDIF\n"
   1339                 "       USEQ tmp.x, threadid, k4\n"
   1340                 "       IF tmp\n"
   1341                 "               ATOMOR tmp.x, target, offset, karg\n"
   1342                 "               ATOMOR tmp.x, target, offset, tmp\n"
   1343                 "       ENDIF\n"
   1344                 "       USEQ tmp.x, threadid, k5\n"
   1345                 "       IF tmp\n"
   1346                 "               ATOMXOR tmp.x, target, offset, karg\n"
   1347                 "               ATOMXOR tmp.x, target, offset, tmp\n"
   1348                 "       ENDIF\n"
   1349                 "       USEQ tmp.x, threadid, k6\n"
   1350                 "       IF tmp\n"
   1351                 "               ATOMUMIN tmp.x, target, offset, karg\n"
   1352                 "               ATOMUMIN tmp.x, target, offset, tmp\n"
   1353                 "       ENDIF\n"
   1354                 "       USEQ tmp.x, threadid, k7\n"
   1355                 "       IF tmp\n"
   1356                 "               ATOMUMAX tmp.x, target, offset, karg\n"
   1357                 "               ATOMUMAX tmp.x, target, offset, tmp\n"
   1358                 "       ENDIF\n"
   1359                 "       USEQ tmp.x, threadid, k8\n"
   1360                 "       IF tmp\n"
   1361                 "               ATOMIMIN tmp.x, target, offset, karg\n"
   1362                 "               ATOMIMIN tmp.x, target, offset, tmp\n"
   1363                 "       ENDIF\n"
   1364                 "       USEQ tmp.x, threadid, k9\n"
   1365                 "       IF tmp\n"
   1366                 "               ATOMIMAX tmp.x, target, offset, karg\n"
   1367                 "               ATOMIMAX tmp.x, target, offset, tmp\n"
   1368                 "       ENDIF\n"
   1369                 "#ifdef TARGET_LOCAL\n"
   1370                 "       LOAD tmp.x, RLOCAL, offset\n"
   1371                 "       STORE RES[0].x, offset, tmp\n"
   1372                 "#endif\n"
   1373                 "       RET\n"
   1374                 "    ENDSUB\n";
   1375 
   1376         void init(void *p, int s, int x, int y) {
   1377                 *(uint32_t *)p = 0xbad;
   1378         }
   1379         void expect(void *p, int s, int x, int y) {
   1380                 switch (x) {
   1381                 case 0:
   1382                         *(uint32_t *)p = 0xce6c8eef;
   1383                         break;
   1384                 case 1:
   1385                         *(uint32_t *)p = 0xdeadbeef;
   1386                         break;
   1387                 case 2:
   1388                         *(uint32_t *)p = 0x11111111;
   1389                         break;
   1390                 case 3:
   1391                         *(uint32_t *)p = 0x10011001;
   1392                         break;
   1393                 case 4:
   1394                         *(uint32_t *)p = 0xdfbdbfff;
   1395                         break;
   1396                 case 5:
   1397                         *(uint32_t *)p = 0x11111111;
   1398                         break;
   1399                 case 6:
   1400                         *(uint32_t *)p = 0x11111111;
   1401                         break;
   1402                 case 7:
   1403                         *(uint32_t *)p = 0xdeadbeef;
   1404                         break;
   1405                 case 8:
   1406                         *(uint32_t *)p = 0xdeadbeef;
   1407                         break;
   1408                 case 9:
   1409                         *(uint32_t *)p = 0x11111111;
   1410                         break;
   1411                 }
   1412         }
   1413 
   1414         printf("- %s (%s)\n", __func__, global ? "global" : "local");
   1415 
   1416         init_prog(ctx, 40, 0, 0, src,
   1417                   (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
   1418         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
   1419                  40, 0, init);
   1420         init_compute_resources(ctx, (int []) { 0, -1 });
   1421         launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
   1422         check_tex(ctx, 0, expect, NULL);
   1423         destroy_compute_resources(ctx);
   1424         destroy_tex(ctx);
   1425         destroy_prog(ctx);
   1426 }
   1427 
   1428 static void test_atom_race(struct context *ctx, bool global)
   1429 {
   1430         const char *src = "COMP\n"
   1431                 "#ifdef TARGET_GLOBAL\n"
   1432                 "#define target RES[0]\n"
   1433                 "#else\n"
   1434                 "#define target RLOCAL\n"
   1435                 "#endif\n"
   1436                 ""
   1437                 "DCL RES[0], BUFFER, RAW, WR\n"
   1438                 ""
   1439                 "#define blockid SV[0]\n"
   1440                 "DCL blockid, BLOCK_ID[0]\n"
   1441                 "#define blocksz SV[1]\n"
   1442                 "DCL blocksz, BLOCK_SIZE[0]\n"
   1443                 "#define threadid SV[2]\n"
   1444                 "DCL threadid, THREAD_ID[0]\n"
   1445                 ""
   1446                 "#define offset TEMP[0]\n"
   1447                 "DCL offset, LOCAL\n"
   1448                 "#define arg TEMP[1]\n"
   1449                 "DCL arg, LOCAL\n"
   1450                 "#define count TEMP[2]\n"
   1451                 "DCL count, LOCAL\n"
   1452                 "#define vlocal TEMP[3]\n"
   1453                 "DCL vlocal, LOCAL\n"
   1454                 "#define vshared TEMP[4]\n"
   1455                 "DCL vshared, LOCAL\n"
   1456                 "#define last TEMP[5]\n"
   1457                 "DCL last, LOCAL\n"
   1458                 "#define tmp0 TEMP[6]\n"
   1459                 "DCL tmp0, LOCAL\n"
   1460                 "#define tmp1 TEMP[7]\n"
   1461                 "DCL tmp1, LOCAL\n"
   1462                 ""
   1463                 "#define k0 IMM[0]\n"
   1464                 "IMM UINT32 { 0, 0, 0, 0 }\n"
   1465                 "#define k1 IMM[1]\n"
   1466                 "IMM UINT32 { 1, 0, 0, 0 }\n"
   1467                 "#define k4 IMM[2]\n"
   1468                 "IMM UINT32 { 4, 0, 0, 0 }\n"
   1469                 "#define k32 IMM[3]\n"
   1470                 "IMM UINT32 { 32, 0, 0, 0 }\n"
   1471                 "#define k128 IMM[4]\n"
   1472                 "IMM UINT32 { 128, 0, 0, 0 }\n"
   1473                 "#define kdeadcafe IMM[5]\n"
   1474                 "IMM UINT32 { 3735931646, 0, 0, 0 }\n"
   1475                 "#define kallowed_set IMM[6]\n"
   1476                 "IMM UINT32 { 559035650, 0, 0, 0 }\n"
   1477                 "#define k11111111 IMM[7]\n"
   1478                 "IMM UINT32 { 286331153, 0, 0, 0 }\n"
   1479                 "\n"
   1480                 "    BGNSUB\n"
   1481                 "       MOV offset.x, threadid\n"
   1482                 "#ifdef TARGET_GLOBAL\n"
   1483                 "       UMUL tmp0.x, blockid, blocksz\n"
   1484                 "       UADD offset.x, offset, tmp0\n"
   1485                 "#endif\n"
   1486                 "       UMUL offset.x, offset, k4\n"
   1487                 "       USLT tmp0.x, threadid, k32\n"
   1488                 "       STORE target.x, offset, k0\n"
   1489                 "       BARRIER\n"
   1490                 "       IF tmp0\n"
   1491                 "               MOV vlocal.x, k0\n"
   1492                 "               MOV arg.x, kdeadcafe\n"
   1493                 "               BGNLOOP\n"
   1494                 "                       INEG arg.x, arg\n"
   1495                 "                       ATOMUADD vshared.x, target, offset, arg\n"
   1496                 "                       SFENCE target\n"
   1497                 "                       USNE tmp0.x, vshared, vlocal\n"
   1498                 "                       IF tmp0\n"
   1499                 "                               BRK\n"
   1500                 "                       ENDIF\n"
   1501                 "                       UADD vlocal.x, vlocal, arg\n"
   1502                 "               ENDLOOP\n"
   1503                 "               UADD vlocal.x, vshared, arg\n"
   1504                 "               LOAD vshared.x, target, offset\n"
   1505                 "               USEQ tmp0.x, vshared, vlocal\n"
   1506                 "               STORE target.x, offset, tmp0\n"
   1507                 "       ELSE\n"
   1508                 "               UADD offset.x, offset, -k128\n"
   1509                 "               MOV count.x, k0\n"
   1510                 "               MOV last.x, k0\n"
   1511                 "               BGNLOOP\n"
   1512                 "                       LOAD vshared.x, target, offset\n"
   1513                 "                       USEQ tmp0.x, vshared, kallowed_set.xxxx\n"
   1514                 "                       USEQ tmp1.x, vshared, kallowed_set.yyyy\n"
   1515                 "                       OR tmp0.x, tmp0, tmp1\n"
   1516                 "                       IF tmp0\n"
   1517                 "                               USEQ tmp0.x, vshared, last\n"
   1518                 "                               IF tmp0\n"
   1519                 "                                       CONT\n"
   1520                 "                               ENDIF\n"
   1521                 "                               MOV last.x, vshared\n"
   1522                 "                       ELSE\n"
   1523                 "                               END\n"
   1524                 "                       ENDIF\n"
   1525                 "                       UADD count.x, count, k1\n"
   1526                 "                       USEQ tmp0.x, count, k128\n"
   1527                 "                       IF tmp0\n"
   1528                 "                               BRK\n"
   1529                 "                       ENDIF\n"
   1530                 "               ENDLOOP\n"
   1531                 "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
   1532                 "               UADD offset.x, offset, k128\n"
   1533                 "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
   1534                 "               SFENCE target\n"
   1535                 "       ENDIF\n"
   1536                 "#ifdef TARGET_LOCAL\n"
   1537                 "       LOAD tmp0.x, RLOCAL, offset\n"
   1538                 "       UMUL tmp1.x, blockid, blocksz\n"
   1539                 "       UMUL tmp1.x, tmp1, k4\n"
   1540                 "       UADD offset.x, offset, tmp1\n"
   1541                 "       STORE RES[0].x, offset, tmp0\n"
   1542                 "#endif\n"
   1543                 "       RET\n"
   1544                 "    ENDSUB\n";
   1545 
   1546         void init(void *p, int s, int x, int y) {
   1547                 *(uint32_t *)p = 0xdeadbeef;
   1548         }
   1549         void expect(void *p, int s, int x, int y) {
   1550                 *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff;
   1551         }
   1552 
   1553         printf("- %s (%s)\n", __func__, global ? "global" : "local");
   1554 
   1555         init_prog(ctx, 256, 0, 0, src,
   1556                   (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
   1557         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
   1558                  4096, 0, init);
   1559         init_compute_resources(ctx, (int []) { 0, -1 });
   1560         launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
   1561         check_tex(ctx, 0, expect, NULL);
   1562         destroy_compute_resources(ctx);
   1563         destroy_tex(ctx);
   1564         destroy_prog(ctx);
   1565 }
   1566 
   1567 int main(int argc, char *argv[])
   1568 {
   1569         struct context *ctx = CALLOC_STRUCT(context);
   1570 
   1571         init_ctx(ctx);
   1572         test_system_values(ctx);
   1573         test_resource_access(ctx);
   1574         test_function_calls(ctx);
   1575         test_input_global(ctx);
   1576         test_private(ctx);
   1577         test_local(ctx);
   1578         test_sample(ctx);
   1579         test_many_kern(ctx);
   1580         test_constant(ctx);
   1581         test_resource_indirect(ctx);
   1582         test_surface_ld(ctx);
   1583         test_surface_st(ctx);
   1584         test_barrier(ctx);
   1585         test_atom_ops(ctx, true);
   1586         test_atom_race(ctx, true);
   1587         test_atom_ops(ctx, false);
   1588         test_atom_race(ctx, false);
   1589         destroy_ctx(ctx);
   1590 
   1591         return 0;
   1592 }
   1593