Home | History | Annotate | Download | only in api
      1 //
      2 // Copyright 2012 Francisco Jerez
      3 //
      4 // Permission is hereby granted, free of charge, to any person obtaining a
      5 // copy of this software and associated documentation files (the "Software"),
      6 // to deal in the Software without restriction, including without limitation
      7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 // and/or sell copies of the Software, and to permit persons to whom the
      9 // Software is furnished to do so, subject to the following conditions:
     10 //
     11 // The above copyright notice and this permission notice shall be included in
     12 // all copies or substantial portions of the Software.
     13 //
     14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 // THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     18 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
     19 // OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     20 // SOFTWARE.
     21 //
     22 
     23 #include "api/util.hpp"
     24 #include "core/kernel.hpp"
     25 #include "core/event.hpp"
     26 
     27 using namespace clover;
     28 
     29 PUBLIC cl_kernel
     30 clCreateKernel(cl_program prog, const char *name,
     31                cl_int *errcode_ret) try {
     32    if (!prog)
     33       throw error(CL_INVALID_PROGRAM);
     34 
     35    if (!name)
     36       throw error(CL_INVALID_VALUE);
     37 
     38    if (prog->binaries().empty())
     39       throw error(CL_INVALID_PROGRAM_EXECUTABLE);
     40 
     41    auto sym = prog->binaries().begin()->second.sym(name);
     42 
     43    ret_error(errcode_ret, CL_SUCCESS);
     44    return new kernel(*prog, name, { sym.args.begin(), sym.args.end() });
     45 
     46 } catch (module::noent_error &e) {
     47    ret_error(errcode_ret, CL_INVALID_KERNEL_NAME);
     48    return NULL;
     49 
     50 } catch(error &e) {
     51    ret_error(errcode_ret, e);
     52    return NULL;
     53 }
     54 
     55 PUBLIC cl_int
     56 clCreateKernelsInProgram(cl_program prog, cl_uint count,
     57                          cl_kernel *kerns, cl_uint *count_ret) {
     58    if (!prog)
     59       throw error(CL_INVALID_PROGRAM);
     60 
     61    if (prog->binaries().empty())
     62       throw error(CL_INVALID_PROGRAM_EXECUTABLE);
     63 
     64    auto &syms = prog->binaries().begin()->second.syms;
     65 
     66    if (kerns && count < syms.size())
     67       throw error(CL_INVALID_VALUE);
     68 
     69    if (kerns)
     70       std::transform(syms.begin(), syms.end(), kerns,
     71                      [=](const module::symbol &sym) {
     72                         return new kernel(*prog, compat::string(sym.name),
     73                                           { sym.args.begin(), sym.args.end() });
     74                      });
     75 
     76    if (count_ret)
     77       *count_ret = syms.size();
     78 
     79    return CL_SUCCESS;
     80 }
     81 
     82 PUBLIC cl_int
     83 clRetainKernel(cl_kernel kern) {
     84    if (!kern)
     85       return CL_INVALID_KERNEL;
     86 
     87    kern->retain();
     88    return CL_SUCCESS;
     89 }
     90 
     91 PUBLIC cl_int
     92 clReleaseKernel(cl_kernel kern) {
     93    if (!kern)
     94       return CL_INVALID_KERNEL;
     95 
     96    if (kern->release())
     97       delete kern;
     98 
     99    return CL_SUCCESS;
    100 }
    101 
    102 PUBLIC cl_int
    103 clSetKernelArg(cl_kernel kern, cl_uint idx, size_t size,
    104                const void *value) try {
    105    if (!kern)
    106       throw error(CL_INVALID_KERNEL);
    107 
    108    if (idx >= kern->args.size())
    109       throw error(CL_INVALID_ARG_INDEX);
    110 
    111    kern->args[idx]->set(size, value);
    112 
    113    return CL_SUCCESS;
    114 
    115 } catch(error &e) {
    116    return e.get();
    117 }
    118 
    119 PUBLIC cl_int
    120 clGetKernelInfo(cl_kernel kern, cl_kernel_info param,
    121                 size_t size, void *buf, size_t *size_ret) {
    122    if (!kern)
    123       return CL_INVALID_KERNEL;
    124 
    125    switch (param) {
    126    case CL_KERNEL_FUNCTION_NAME:
    127       return string_property(buf, size, size_ret, kern->name());
    128 
    129    case CL_KERNEL_NUM_ARGS:
    130       return scalar_property<cl_uint>(buf, size, size_ret,
    131                                       kern->args.size());
    132 
    133    case CL_KERNEL_REFERENCE_COUNT:
    134       return scalar_property<cl_uint>(buf, size, size_ret,
    135                                       kern->ref_count());
    136 
    137    case CL_KERNEL_CONTEXT:
    138       return scalar_property<cl_context>(buf, size, size_ret,
    139                                          &kern->prog.ctx);
    140 
    141    case CL_KERNEL_PROGRAM:
    142       return scalar_property<cl_program>(buf, size, size_ret,
    143                                          &kern->prog);
    144 
    145    default:
    146       return CL_INVALID_VALUE;
    147    }
    148 }
    149 
    150 PUBLIC cl_int
    151 clGetKernelWorkGroupInfo(cl_kernel kern, cl_device_id dev,
    152                          cl_kernel_work_group_info param,
    153                          size_t size, void *buf, size_t *size_ret) {
    154    if (!kern)
    155       return CL_INVALID_KERNEL;
    156 
    157    if ((!dev && kern->prog.binaries().size() != 1) ||
    158        (dev && !kern->prog.binaries().count(dev)))
    159       return CL_INVALID_DEVICE;
    160 
    161    switch (param) {
    162    case CL_KERNEL_WORK_GROUP_SIZE:
    163       return scalar_property<size_t>(buf, size, size_ret,
    164                                      kern->max_block_size());
    165 
    166    case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
    167       return vector_property<size_t>(buf, size, size_ret,
    168                                      kern->block_size());
    169 
    170    case CL_KERNEL_LOCAL_MEM_SIZE:
    171       return scalar_property<cl_ulong>(buf, size, size_ret,
    172                                        kern->mem_local());
    173 
    174    case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
    175       return scalar_property<size_t>(buf, size, size_ret, 1);
    176 
    177    case CL_KERNEL_PRIVATE_MEM_SIZE:
    178       return scalar_property<cl_ulong>(buf, size, size_ret,
    179                                        kern->mem_private());
    180 
    181    default:
    182       return CL_INVALID_VALUE;
    183    }
    184 }
    185 
    186 namespace {
    187    ///
    188    /// Common argument checking shared by kernel invocation commands.
    189    ///
    190    void
    191    kernel_validate(cl_command_queue q, cl_kernel kern,
    192                    cl_uint dims, const size_t *grid_offset,
    193                    const size_t *grid_size, const size_t *block_size,
    194                    cl_uint num_deps, const cl_event *deps,
    195                    cl_event *ev) {
    196       if (!q)
    197          throw error(CL_INVALID_COMMAND_QUEUE);
    198 
    199       if (!kern)
    200          throw error(CL_INVALID_KERNEL);
    201 
    202       if (&kern->prog.ctx != &q->ctx ||
    203           any_of([&](const cl_event ev) {
    204                 return &ev->ctx != &q->ctx;
    205              }, deps, deps + num_deps))
    206          throw error(CL_INVALID_CONTEXT);
    207 
    208       if (bool(num_deps) != bool(deps) ||
    209           any_of(is_zero<cl_event>(), deps, deps + num_deps))
    210          throw error(CL_INVALID_EVENT_WAIT_LIST);
    211 
    212       if (any_of([](std::unique_ptr<kernel::argument> &arg) {
    213                return !arg->set();
    214             }, kern->args.begin(), kern->args.end()))
    215          throw error(CL_INVALID_KERNEL_ARGS);
    216 
    217       if (!kern->prog.binaries().count(&q->dev))
    218          throw error(CL_INVALID_PROGRAM_EXECUTABLE);
    219 
    220       if (dims < 1 || dims > q->dev.max_block_size().size())
    221          throw error(CL_INVALID_WORK_DIMENSION);
    222 
    223       if (!grid_size || any_of(is_zero<size_t>(), grid_size, grid_size + dims))
    224          throw error(CL_INVALID_GLOBAL_WORK_SIZE);
    225 
    226       if (block_size) {
    227          if (any_of([](size_t b, size_t max) {
    228                   return b == 0 || b > max;
    229                }, block_size, block_size + dims,
    230                q->dev.max_block_size().begin()))
    231             throw error(CL_INVALID_WORK_ITEM_SIZE);
    232 
    233          if (any_of([](size_t b, size_t g) {
    234                   return g % b;
    235                }, block_size, block_size + dims, grid_size))
    236             throw error(CL_INVALID_WORK_GROUP_SIZE);
    237 
    238          if (fold(std::multiplies<size_t>(), 1u,
    239                   block_size, block_size + dims) >
    240              q->dev.max_threads_per_block())
    241             throw error(CL_INVALID_WORK_GROUP_SIZE);
    242       }
    243    }
    244 
    245    ///
    246    /// Common event action shared by kernel invocation commands.
    247    ///
    248    std::function<void (event &)>
    249    kernel_op(cl_command_queue q, cl_kernel kern,
    250              const std::vector<size_t> &grid_offset,
    251              const std::vector<size_t> &grid_size,
    252              const std::vector<size_t> &block_size) {
    253       const std::vector<size_t> reduced_grid_size = map(
    254          std::divides<size_t>(), grid_size.begin(), grid_size.end(),
    255          block_size.begin());
    256 
    257       return [=](event &) {
    258          kern->launch(*q, grid_offset, reduced_grid_size, block_size);
    259       };
    260    }
    261 
    262    template<typename T, typename S>
    263    std::vector<T>
    264    opt_vector(const T *p, S n) {
    265       if (p)
    266          return { p, p + n };
    267       else
    268          return { n };
    269    }
    270 }
    271 
    272 PUBLIC cl_int
    273 clEnqueueNDRangeKernel(cl_command_queue q, cl_kernel kern,
    274                        cl_uint dims, const size_t *pgrid_offset,
    275                        const size_t *pgrid_size, const size_t *pblock_size,
    276                        cl_uint num_deps, const cl_event *deps,
    277                        cl_event *ev) try {
    278    const std::vector<size_t> grid_offset = opt_vector(pgrid_offset, dims);
    279    const std::vector<size_t> grid_size = opt_vector(pgrid_size, dims);
    280    const std::vector<size_t> block_size = opt_vector(pblock_size, dims);
    281 
    282    kernel_validate(q, kern, dims, pgrid_offset, pgrid_size, pblock_size,
    283                    num_deps, deps, ev);
    284 
    285    hard_event *hev = new hard_event(
    286       *q, CL_COMMAND_NDRANGE_KERNEL, { deps, deps + num_deps },
    287       kernel_op(q, kern, grid_offset, grid_size, block_size));
    288 
    289    ret_object(ev, hev);
    290    return CL_SUCCESS;
    291 
    292 } catch(error &e) {
    293    return e.get();
    294 }
    295 
    296 PUBLIC cl_int
    297 clEnqueueTask(cl_command_queue q, cl_kernel kern,
    298               cl_uint num_deps, const cl_event *deps,
    299               cl_event *ev) try {
    300    const std::vector<size_t> grid_offset = { 0 };
    301    const std::vector<size_t> grid_size = { 1 };
    302    const std::vector<size_t> block_size = { 1 };
    303 
    304    kernel_validate(q, kern, 1, grid_offset.data(), grid_size.data(),
    305                    block_size.data(), num_deps, deps, ev);
    306 
    307    hard_event *hev = new hard_event(
    308       *q, CL_COMMAND_TASK, { deps, deps + num_deps },
    309       kernel_op(q, kern, grid_offset, grid_size, block_size));
    310 
    311    ret_object(ev, hev);
    312    return CL_SUCCESS;
    313 
    314 } catch(error &e) {
    315    return e.get();
    316 }
    317 
    318 PUBLIC cl_int
    319 clEnqueueNativeKernel(cl_command_queue q, void (*func)(void *),
    320                       void *args, size_t args_size,
    321                       cl_uint obj_count, const cl_mem *obj_list,
    322                       const void **obj_args, cl_uint num_deps,
    323                       const cl_event *deps, cl_event *ev) {
    324    return CL_INVALID_OPERATION;
    325 }
    326