Home | History | Annotate | Download | only in cl_12
      1 /*
      2  * Copyright 2017 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can
      5  * be found in the LICENSE file.
      6  *
      7  */
      8 
      9 //
     10 //
     11 //
     12 
     13 #include <stdlib.h>
     14 #include <stdio.h>
     15 
     16 #include "hs/cl/hs_cl.h"
     17 
     18 #include "common/cl/assert_cl.h"
     19 
     20 #include "composition_cl_12.h"
     21 #include "config_cl.h"
     22 
     23 #include "context.h"
     24 #include "raster.h"
     25 #include "handle.h"
     26 
     27 #include "runtime_cl_12.h"
     28 
     29 #include "common.h"
     30 #include "tile.h"
     31 
     32 //
     33 // TTCK (32-BIT COMPARE) v1:
     34 //
     35 //  0                                                           63
     36 //  | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER |  X  |  Y  |
     37 //  +----------------------+--------+--------+-------+-----+-----+
     38 //  |          30          |    1   |    1   |   18  |  7  |  7  |
     39 //
     40 //
     41 // TTCK (32-BIT COMPARE) v2:
     42 //
     43 //  0                                                           63
     44 //  | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER |  X  |  Y  |
     45 //  +----------------------+--------+--------+-------+-----+-----+
     46 //  |          30          |    1   |    1   |   15  |  9  |  8  |
     47 //
     48 //
     49 // TTCK (64-BIT COMPARE) -- achieves 4K x 4K with an 8x16 tile:
     50 //
     51 //  0                                                           63
     52 //  | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER |  X  |  Y  |
     53 //  +----------------------+--------+--------+-------+-----+-----+
     54 //  |          27          |    1   |    1   |   18  |  9  |  8  |
     55 //
     56 
     57 union skc_ttck
     58 {
     59   skc_ulong   u64;
     60   skc_uint2   u32v2;
     61 
     62   struct {
     63     skc_uint  id         : SKC_TTCK_LO_BITS_ID;
     64     skc_uint  prefix     : SKC_TTCK_LO_BITS_PREFIX;
     65     skc_uint  escape     : SKC_TTCK_LO_BITS_ESCAPE;
     66     skc_uint  layer_lo   : SKC_TTCK_LO_BITS_LAYER;
     67     skc_uint  layer_hi   : SKC_TTCK_HI_BITS_LAYER;
     68     skc_uint  x          : SKC_TTCK_HI_BITS_X;
     69     skc_uint  y          : SKC_TTCK_HI_BITS_Y;
     70   };
     71 
     72   struct {
     73     skc_ulong na0        : SKC_TTCK_LO_BITS_ID_PREFIX_ESCAPE;
     74     skc_ulong layer      : SKC_TTCK_BITS_LAYER;
     75     skc_ulong na1        : SKC_TTCK_HI_BITS_YX;
     76   };
     77 
     78   struct {
     79     skc_uint  na2;
     80     skc_uint  na3        : SKC_TTCK_HI_BITS_LAYER;
     81     skc_uint  yx         : SKC_TTCK_HI_BITS_YX;
     82   };
     83 };
     84 
     85 //
     86 // FIXME -- accept floats on host but convert to subpixel offsets
     87 // before appending to command ring
     88 //
     89 
     90 #define SKC_PLACE_CMD_TX_CONVERT(f)  0
     91 #define SKC_PLACE_CMD_TY_CONVERT(f)  0
     92 
     93 //
     94 // COMPOSITION PLACE
     95 //
     96 // This is a snapshot of the host-side command queue.
     97 //
     98 // Note that the composition command extent could be implemented as
     99 // either a mapped buffer or simply copied to an ephemeral extent.
    100 //
    101 // This implementation may vary between compute platforms.
    102 //
    103 
    104 struct skc_composition_place
    105 {
    106   struct skc_composition_impl      * impl;
    107 
    108   cl_command_queue                   cq;
    109 
    110   struct skc_extent_phw1g_tdrNs_snap cmds;
    111 
    112   skc_subbuf_id_t                    id;
    113 };
    114 
    115 //
    116 // Forward declarations
    117 //
    118 
    119 static
    120 void
    121 skc_composition_unseal_block(struct skc_composition_impl * const impl,
    122                              skc_bool                      const block);
    123 
    124 //
    125 //
    126 //
    127 
    128 static
    129 void
    130 skc_composition_pfn_release(struct skc_composition_impl * const impl)
    131 {
    132   if (--impl->composition->ref_count != 0)
    133     return;
    134 
    135   //
    136   // otherwise, dispose of all resources
    137   //
    138 
    139   // the unsealed state is a safe state to dispose of resources
    140   skc_composition_unseal_block(impl,true); // block
    141 
    142   struct skc_runtime * const runtime = impl->runtime;
    143 
    144   // free host composition
    145   skc_runtime_host_perm_free(runtime,impl->composition);
    146 
    147   // release the cq
    148   skc_runtime_release_cq_in_order(runtime,impl->cq);
    149 
    150   // release kernels
    151   cl(ReleaseKernel(impl->kernels.place));
    152   cl(ReleaseKernel(impl->kernels.segment));
    153 
    154   // release extents
    155   skc_extent_phw1g_tdrNs_free(runtime,&impl->cmds.extent);
    156   skc_extent_phrw_free       (runtime,&impl->saved.extent);
    157   skc_extent_phr_pdrw_free   (runtime,&impl->atomics);
    158 
    159   skc_extent_pdrw_free       (runtime,&impl->keys);
    160   skc_extent_pdrw_free       (runtime,&impl->offsets);
    161 
    162   // free composition impl
    163   skc_runtime_host_perm_free(runtime,impl);
    164 }
    165 
    166 //
    167 //
    168 //
    169 
    170 static
    171 void
    172 skc_composition_place_grid_pfn_dispose(skc_grid_t const grid)
    173 {
    174   struct skc_composition_place * const place   = skc_grid_get_data(grid);
    175   struct skc_composition_impl  * const impl    = place->impl;
    176   struct skc_runtime           * const runtime = impl->runtime;
    177 
    178   // release cq
    179   skc_runtime_release_cq_in_order(runtime,place->cq);
    180 
    181   // unmap the snapshot (could be a copy)
    182   skc_extent_phw1g_tdrNs_snap_free(runtime,&place->cmds);
    183 
    184   // release place struct
    185   skc_runtime_host_temp_free(runtime,place,place->id);
    186 
    187   // release impl
    188   skc_composition_pfn_release(impl);
    189 }
    190 
    191 //
    192 //
    193 //
    194 
    195 static
    196 void
    197 skc_composition_place_read_complete(skc_grid_t const grid)
    198 {
    199   skc_grid_complete(grid);
    200 }
    201 
    202 static
    203 void
    204 skc_composition_place_read_cb(cl_event event, cl_int status, skc_grid_t const grid)
    205 {
    206   SKC_CL_CB(status);
    207 
    208   struct skc_composition_place * const place     = skc_grid_get_data(grid);
    209   struct skc_composition_impl  * const impl      = place->impl;
    210   struct skc_runtime           * const runtime   = impl->runtime;
    211   struct skc_scheduler         * const scheduler = runtime->scheduler;
    212 
    213   // as quickly as possible, enqueue next stage in pipeline to context command scheduler
    214   SKC_SCHEDULER_SCHEDULE(scheduler,skc_composition_place_read_complete,grid);
    215 }
    216 
    217 static
    218 void
    219 skc_composition_place_grid_pfn_execute(skc_grid_t const grid)
    220 {
    221   //
    222   // FILLS EXPAND
    223   //
    224   // need result of cmd counts before launching RASTERIZE grids
    225   //
    226   // - OpenCL 1.2: copy atomic counters back to host and launch RASTERIZE grids from host
    227   // - OpenCL 2.x: have a kernel size and launch RASTERIZE grids from device
    228   // - or launch a device-wide grid that feeds itself but that's unsatisfying
    229   //
    230   struct skc_composition_place * const place   = skc_grid_get_data(grid);
    231   struct skc_composition_impl  * const impl    = place->impl;
    232   struct skc_runtime           * const runtime = impl->runtime;
    233 
    234   skc_uint  const work_size = skc_extent_ring_snap_count(place->cmds.snap);
    235   skc_uint4 const clip      = { 0, 0, SKC_UINT_MAX, SKC_UINT_MAX };
    236 
    237   // initialize kernel args
    238   cl(SetKernelArg(impl->kernels.place,0,SKC_CL_ARG(impl->runtime->block_pool.blocks.drw)));
    239   cl(SetKernelArg(impl->kernels.place,1,SKC_CL_ARG(impl->atomics.drw)));
    240   cl(SetKernelArg(impl->kernels.place,2,SKC_CL_ARG(impl->keys.drw)));
    241   cl(SetKernelArg(impl->kernels.place,3,SKC_CL_ARG(place->cmds.drN)));
    242   cl(SetKernelArg(impl->kernels.place,4,SKC_CL_ARG(runtime->handle_pool.map.drw)));
    243   cl(SetKernelArg(impl->kernels.place,5,SKC_CL_ARG(clip))); // FIXME -- convert the clip to yx0/yx1 format
    244   cl(SetKernelArg(impl->kernels.place,6,SKC_CL_ARG(work_size)));
    245 
    246   // launch kernel
    247   skc_device_enqueue_kernel(runtime->device,
    248                             SKC_DEVICE_KERNEL_ID_PLACE,
    249                             place->cq,
    250                             impl->kernels.place,
    251                             work_size,
    252                             0,NULL,NULL);
    253   //
    254   // copy atomics back after every place launch
    255   //
    256   cl_event complete;
    257 
    258   skc_extent_phr_pdrw_read(&impl->atomics,place->cq,&complete);
    259 
    260   cl(SetEventCallback(complete,CL_COMPLETE,skc_composition_place_read_cb,grid));
    261   cl(ReleaseEvent(complete));
    262 
    263   // flush command queue
    264   cl(Flush(place->cq));
    265 }
    266 
    267 //
    268 //
    269 //
    270 
    271 static
    272 void
    273 skc_composition_snap(struct skc_composition_impl * const impl)
    274 {
    275   skc_composition_retain(impl->composition);
    276 
    277   skc_subbuf_id_t id;
    278 
    279   struct skc_composition_place * const place = skc_runtime_host_temp_alloc(impl->runtime,
    280                                                                            SKC_MEM_FLAGS_READ_WRITE,
    281                                                                            sizeof(*place),&id,NULL);
    282 
    283   // save the subbuf id
    284   place->id = id;
    285 
    286   // save backpointer
    287   place->impl = impl;
    288 
    289   // set grid data
    290   skc_grid_set_data(impl->grids.place,place);
    291 
    292   // acquire command queue
    293   place->cq = skc_runtime_acquire_cq_in_order(impl->runtime);
    294 
    295   // checkpoint the ring
    296   skc_extent_ring_checkpoint(&impl->cmds.ring);
    297 
    298   // make a snapshot
    299   skc_extent_phw1g_tdrNs_snap_init(impl->runtime,&impl->cmds.ring,&place->cmds);
    300 
    301   // unmap the snapshot (could be a copy)
    302   skc_extent_phw1g_tdrNs_snap_alloc(impl->runtime,
    303                                     &impl->cmds.extent,
    304                                     &place->cmds,
    305                                     place->cq,
    306                                     NULL);
    307 
    308   skc_grid_force(impl->grids.place);
    309 }
    310 
    311 //
    312 //
    313 //
    314 
    315 static
    316 void
    317 skc_composition_pfn_seal(struct skc_composition_impl * const impl)
    318 {
    319   // return if sealing or sealed
    320   if (impl->state >= SKC_COMPOSITION_STATE_SEALING)
    321     return;
    322 
    323   struct skc_runtime   * const runtime   = impl->runtime;
    324   struct skc_scheduler * const scheduler = runtime->scheduler;
    325 
    326   //
    327   // otherwise, wait for UNSEALING > UNSEALED transition
    328   //
    329   if (impl->state == SKC_COMPOSITION_STATE_UNSEALING)
    330     {
    331       SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->state != SKC_COMPOSITION_STATE_UNSEALED);
    332     }
    333   else // or we were already unsealed
    334     {
    335       // flush is there is work in progress
    336       skc_uint const count = skc_extent_ring_wip_count(&impl->cmds.ring);
    337 
    338       if (count > 0) {
    339         skc_composition_snap(impl);
    340       }
    341     }
    342 
    343   //
    344   // now unsealed so we need to start sealing...
    345   //
    346   impl->state = SKC_COMPOSITION_STATE_SEALING;
    347 
    348   //
    349   // the seal operation implies we should force start all dependencies
    350   // that are still in a ready state
    351   //
    352   skc_grid_force(impl->grids.sort);
    353 }
    354 
    355 //
    356 //
    357 //
    358 
    359 void
    360 skc_composition_sort_execute_complete(struct skc_composition_impl * const impl)
    361 {
    362   // we're sealed
    363   impl->state = SKC_COMPOSITION_STATE_SEALED;
    364 
    365   // this grid is done
    366   skc_grid_complete(impl->grids.sort);
    367 }
    368 
    369 static
    370 void
    371 skc_composition_sort_execute_cb(cl_event event, cl_int status, struct skc_composition_impl * const impl)
    372 {
    373   SKC_CL_CB(status);
    374 
    375   // as quickly as possible, enqueue next stage in pipeline to context command scheduler
    376   SKC_SCHEDULER_SCHEDULE(impl->runtime->scheduler,skc_composition_sort_execute_complete,impl);
    377 }
    378 
    379 static
    380 void
    381 skc_composition_sort_grid_pfn_execute(skc_grid_t const grid)
    382 {
    383   struct skc_composition_impl * const impl    = skc_grid_get_data(grid);
    384   struct skc_runtime          * const runtime = impl->runtime;
    385 
    386   // we should be sealing
    387   assert(impl->state == SKC_COMPOSITION_STATE_SEALING);
    388 
    389   struct skc_place_atomics * const atomics = impl->atomics.hr;
    390 
    391 #ifndef NDEBUG
    392   fprintf(stderr,"composition sort: %u\n",atomics->keys);
    393 #endif
    394 
    395   if (atomics->keys > 0)
    396     {
    397       uint32_t keys_padded_in, keys_padded_out;
    398 
    399       hs_cl_pad(runtime->hs,atomics->keys,&keys_padded_in,&keys_padded_out);
    400 
    401       hs_cl_sort(impl->runtime->hs,
    402                  impl->cq,
    403                  0,NULL,NULL,
    404                  impl->keys.drw,
    405                  NULL,
    406                  atomics->keys,
    407                  keys_padded_in,
    408                  keys_padded_out,
    409                  false);
    410 
    411       cl(SetKernelArg(impl->kernels.segment,0,SKC_CL_ARG(impl->keys.drw)));
    412       cl(SetKernelArg(impl->kernels.segment,1,SKC_CL_ARG(impl->offsets.drw)));
    413       cl(SetKernelArg(impl->kernels.segment,2,SKC_CL_ARG(impl->atomics.drw)));
    414 
    415       // find start of each tile
    416       skc_device_enqueue_kernel(runtime->device,
    417                                 SKC_DEVICE_KERNEL_ID_SEGMENT_TTCK,
    418                                 impl->cq,
    419                                 impl->kernels.segment,
    420                                 atomics->keys,
    421                                 0,NULL,NULL);
    422     }
    423 
    424   cl_event complete;
    425 
    426   // next stage needs to know number of key segments
    427   skc_extent_phr_pdrw_read(&impl->atomics,impl->cq,&complete);
    428 
    429   // register a callback
    430   cl(SetEventCallback(complete,CL_COMPLETE,skc_composition_sort_execute_cb,impl));
    431   cl(ReleaseEvent(complete));
    432 
    433   // flush cq
    434   cl(Flush(impl->cq));
    435 }
    436 
    437 //
    438 //
    439 //
    440 
    441 static
    442 void
    443 skc_composition_raster_release(struct skc_composition_impl * const impl)
    444 {
    445   //
    446   // reference counts to rasters can only be released when the
    447   // composition is unsealed and the atomics are reset.
    448   //
    449   skc_runtime_raster_device_release(impl->runtime,
    450                                     impl->saved.extent.hrw,
    451                                     impl->saved.count);
    452   // reset count
    453   impl->saved.count = 0;
    454 }
    455 
    456 //
    457 //
    458 //
    459 
    460 static
    461 void
    462 skc_composition_unseal_block(struct skc_composition_impl * const impl,
    463                              skc_bool                      const block)
    464 {
    465   // return if already unsealed
    466   if (impl->state == SKC_COMPOSITION_STATE_UNSEALED)
    467     return;
    468 
    469   //
    470   // otherwise, we're going to need to pump the scheduler
    471   //
    472   struct skc_scheduler * const scheduler = impl->runtime->scheduler;
    473 
    474   //
    475   // wait for UNSEALING > UNSEALED transition
    476   //
    477   if (impl->state == SKC_COMPOSITION_STATE_UNSEALING)
    478     {
    479       if (block) {
    480         SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->state != SKC_COMPOSITION_STATE_UNSEALED);
    481       }
    482       return;
    483     }
    484 
    485   //
    486   // wait for SEALING > SEALED transition ...
    487   //
    488   if (impl->state == SKC_COMPOSITION_STATE_SEALING)
    489     {
    490       // wait if sealing
    491       SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->state != SKC_COMPOSITION_STATE_SEALED);
    492     }
    493 
    494   // wait for rendering locks to be released
    495   SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->lock_count > 0);
    496 
    497   //
    498   // no need to visit UNSEALING state with this implementation
    499   //
    500 
    501   // acquire a new grid
    502   impl->grids.sort = SKC_GRID_DEPS_ATTACH(impl->runtime->deps,
    503                                           NULL,  // the composition state guards this
    504                                           impl,
    505                                           NULL,  // no waiting
    506                                           skc_composition_sort_grid_pfn_execute,
    507                                           NULL); // no dispose
    508 
    509   // mark composition as unsealed
    510   impl->state = SKC_COMPOSITION_STATE_UNSEALED;
    511 }
    512 
    513 //
    514 // can only be called on a composition that was just unsealed
    515 //
    516 static
    517 void
    518 skc_composition_reset(struct skc_composition_impl * const impl)
    519 {
    520   // zero the atomics
    521   skc_extent_phr_pdrw_zero(&impl->atomics,impl->cq,NULL);
    522 
    523   // flush it
    524   cl(Flush(impl->cq));
    525 
    526   // release all the rasters
    527   skc_composition_raster_release(impl);
    528 }
    529 
    530 static
    531 void
    532 skc_composition_unseal_block_reset(struct skc_composition_impl * const impl,
    533                                    skc_bool                      const block,
    534                                    skc_bool                      const reset)
    535 {
    536   skc_composition_unseal_block(impl,block);
    537 
    538   if (reset) {
    539     skc_composition_reset(impl);
    540   }
    541 }
    542 
    543 //
    544 //
    545 //
    546 
    547 static
    548 void
    549 skc_composition_pfn_unseal(struct skc_composition_impl * const impl, skc_bool const reset)
    550 {
    551   skc_composition_unseal_block_reset(impl,false,reset);
    552 }
    553 
    554 //
    555 // only needs to create a grid
    556 //
    557 
    558 static
    559 void
    560 skc_composition_place_create(struct skc_composition_impl * const impl)
    561 {
    562   // acquire a grid
    563   impl->grids.place = SKC_GRID_DEPS_ATTACH(impl->runtime->deps,
    564                                            &impl->grids.place,
    565                                            NULL,
    566                                            NULL, // no waiting
    567                                            skc_composition_place_grid_pfn_execute,
    568                                            skc_composition_place_grid_pfn_dispose);
    569 
    570   // assign happens-after relationship
    571   skc_grid_happens_after_grid(impl->grids.sort,impl->grids.place);
    572 }
    573 
    574 
    575 static
    576 skc_err
    577 skc_composition_pfn_place(struct skc_composition_impl * const impl,
    578                           skc_raster_t          const *       rasters,
    579                           skc_layer_id          const *       layer_ids,
    580                           skc_float             const *       txs,
    581                           skc_float             const *       tys,
    582                           skc_uint                            count)
    583 {
    584   // block and yield if not unsealed
    585   skc_composition_unseal_block(impl,true);
    586 
    587   //
    588   // validate and retain all rasters
    589   //
    590   skc_err err;
    591 
    592   err = skc_runtime_handle_device_validate_retain(impl->runtime,
    593                                                   SKC_TYPED_HANDLE_TYPE_IS_RASTER,
    594                                                   rasters,
    595                                                   count);
    596   if (err)
    597     return err;
    598 
    599   skc_runtime_handle_device_retain(impl->runtime,rasters,count);
    600 
    601   //
    602   // save the stripped handles
    603   //
    604   skc_raster_t * saved = impl->saved.extent.hrw;
    605 
    606   saved             += impl->saved.count;
    607   impl->saved.count += count;
    608 
    609   for (skc_uint ii=0; ii<count; ii++) {
    610     saved[ii] = SKC_TYPED_HANDLE_TO_HANDLE(*rasters++);
    611   }
    612 
    613   //
    614   // - declare the place grid happens after the raster
    615   // - copy place commands into ring
    616   //
    617   do {
    618     skc_uint rem;
    619 
    620     // find out how much room is left in then ring's snap
    621     // if the place ring is full -- let it drain
    622     SKC_SCHEDULER_WAIT_WHILE(impl->runtime->scheduler,(rem = skc_extent_ring_wip_rem(&impl->cmds.ring)) == 0);
    623 
    624     // append commands
    625     skc_uint avail = min(rem,count);
    626 
    627     // decrement count
    628     count -= avail;
    629 
    630     // launch a place kernel after copying commands?
    631     skc_bool const is_wip_full = (avail == rem);
    632 
    633     // if there is no place grid then create one
    634     if (impl->grids.place == NULL)
    635       {
    636         skc_composition_place_create(impl);
    637       }
    638 
    639     //
    640     // FIXME -- OPTIMIZATION? -- the ring_wip_index_inc() test can
    641     // be avoided by splitting into at most two intervals. It should
    642     // be plenty fast as is though so leave for now.
    643     //
    644     union skc_cmd_place * const cmds = impl->cmds.extent.hw1;
    645 
    646     if ((txs == NULL) && (tys == NULL))
    647       {
    648         while (avail-- > 0)
    649           {
    650             skc_raster_t const raster = *saved++;
    651 
    652             skc_grid_happens_after_handle(impl->grids.place,raster);
    653 
    654             cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] =
    655               (union skc_cmd_place){ raster, *layer_ids++, 0, 0 };
    656           }
    657       }
    658     else if (txs == NULL)
    659       {
    660         while (avail-- > 0)
    661           {
    662             skc_raster_t const raster = *saved++;
    663 
    664             skc_grid_happens_after_handle(impl->grids.place,raster);
    665 
    666             cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] =
    667               (union skc_cmd_place){ raster,
    668                                      *layer_ids++,
    669                                      0,
    670                                      SKC_PLACE_CMD_TY_CONVERT(*tys++) };
    671           }
    672       }
    673     else if (tys == NULL)
    674       {
    675         while (avail-- > 0)
    676           {
    677             skc_raster_t const raster = *saved++;
    678 
    679             skc_grid_happens_after_handle(impl->grids.place,raster);
    680 
    681             cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] =
    682               (union skc_cmd_place){ raster,
    683                                      *layer_ids++,
    684                                      SKC_PLACE_CMD_TX_CONVERT(*txs++),
    685                                      0 };
    686           }
    687       }
    688     else
    689       {
    690         while (avail-- > 0)
    691           {
    692             skc_raster_t const raster = *saved++;
    693 
    694             skc_grid_happens_after_handle(impl->grids.place,raster);
    695 
    696             cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] =
    697               (union skc_cmd_place){ raster,
    698                                      *layer_ids++,
    699                                      SKC_PLACE_CMD_TX_CONVERT(*txs++),
    700                                      SKC_PLACE_CMD_TY_CONVERT(*tys++) };
    701           }
    702       }
    703 
    704     // launch place kernel?
    705     if (is_wip_full) {
    706       skc_composition_snap(impl);
    707     }
    708   } while (count > 0);
    709 
    710   return SKC_ERR_SUCCESS;
    711 }
    712 
    713 //
    714 //
    715 //
    716 
    717 static
    718 void
    719 skc_composition_pfn_bounds(struct skc_composition_impl * const impl, skc_int bounds[4])
    720 {
    721   //
    722   // FIXME -- not implemented yet
    723   //
    724   // impl bounds will be copied back after sealing
    725   //
    726   bounds[0] = SKC_INT_MIN;
    727   bounds[1] = SKC_INT_MIN;
    728   bounds[2] = SKC_INT_MAX;
    729   bounds[3] = SKC_INT_MAX;
    730 }
    731 
    732 //
    733 //
    734 //
    735 
    736 void
    737 skc_composition_retain_and_lock(struct skc_composition * const composition)
    738 {
    739   skc_composition_retain(composition);
    740 
    741   composition->impl->lock_count += 1;
    742 }
    743 
    744 void
    745 skc_composition_unlock_and_release(struct skc_composition * const composition)
    746 {
    747   composition->impl->lock_count -= 1;
    748 
    749   skc_composition_pfn_release(composition->impl);
    750 }
    751 
    752 //
    753 //
    754 //
    755 
    756 skc_err
    757 skc_composition_cl_12_create(struct skc_context       * const context,
    758                              struct skc_composition * * const composition)
    759 {
    760   struct skc_runtime * const runtime = context->runtime;
    761 
    762   // retain the context
    763   // skc_context_retain(context);
    764 
    765   // allocate impl
    766   struct skc_composition_impl * const impl = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(*impl));
    767 
    768   // allocate composition
    769   (*composition)            = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(**composition));
    770 
    771   (*composition)->context   = context;
    772   (*composition)->impl      = impl;
    773   (*composition)->ref_count = 1;
    774 
    775   (*composition)->place     = skc_composition_pfn_place;
    776   (*composition)->unseal    = skc_composition_pfn_unseal;
    777   (*composition)->seal      = skc_composition_pfn_seal;
    778   (*composition)->bounds    = skc_composition_pfn_bounds;
    779   (*composition)->release   = skc_composition_pfn_release;
    780 
    781   // intialize impl
    782   impl->composition   = (*composition);
    783   impl->runtime       = runtime;
    784 
    785   SKC_ASSERT_STATE_INIT(impl,SKC_COMPOSITION_STATE_SEALED);
    786 
    787   impl->lock_count    = 0;
    788 
    789   impl->grids.sort    = NULL;
    790   impl->grids.place   = NULL;
    791 
    792   // acquire command queue for sealing/unsealing
    793   impl->cq            = skc_runtime_acquire_cq_in_order(runtime);
    794 
    795   // acquire kernels
    796   impl->kernels.place   = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_PLACE);
    797   impl->kernels.segment = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_SEGMENT_TTCK);
    798 
    799   // get config
    800   struct skc_config const * const config = runtime->config;
    801 
    802   // initialize ring size with config values
    803   skc_extent_ring_init(&impl->cmds.ring,
    804                        config->composition.cmds.elem_count,
    805                        config->composition.cmds.snap_count,
    806                        sizeof(union skc_cmd_place));
    807 
    808   skc_extent_phw1g_tdrNs_alloc(runtime,&impl->cmds.extent ,sizeof(union skc_cmd_place) * config->composition.cmds.elem_count);
    809   skc_extent_phrw_alloc       (runtime,&impl->saved.extent,sizeof(skc_raster_t)        * config->composition.raster_ids.elem_count);
    810   skc_extent_phr_pdrw_alloc   (runtime,&impl->atomics     ,sizeof(struct skc_place_atomics));
    811 
    812   skc_extent_pdrw_alloc       (runtime,&impl->keys        ,sizeof(skc_ttxk_t)          * config->composition.keys.elem_count);
    813   skc_extent_pdrw_alloc       (runtime,&impl->offsets     ,sizeof(skc_uint)            * (1u << SKC_TTCK_HI_BITS_YX)); // 1MB
    814 
    815   // nothing saved
    816   impl->saved.count = 0;
    817 
    818   // unseal the composition, zero the atomics, etc.
    819   skc_composition_unseal_block_reset(impl,false,true);
    820 
    821   return SKC_ERR_SUCCESS;
    822 }
    823 
    824 //
    825 //
    826 //
    827