Home | History | Annotate | Download | only in api
      1 //
      2 // Copyright 2012 Francisco Jerez
      3 //
      4 // Permission is hereby granted, free of charge, to any person obtaining a
      5 // copy of this software and associated documentation files (the "Software"),
      6 // to deal in the Software without restriction, including without limitation
      7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 // and/or sell copies of the Software, and to permit persons to whom the
      9 // Software is furnished to do so, subject to the following conditions:
     10 //
     11 // The above copyright notice and this permission notice shall be included in
     12 // all copies or substantial portions of the Software.
     13 //
     14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 // OTHER DEALINGS IN THE SOFTWARE.
     21 //
     22 
     23 #include "api/util.hpp"
     24 #include "core/kernel.hpp"
     25 #include "core/event.hpp"
     26 
     27 using namespace clover;
     28 
     29 CLOVER_API cl_kernel
     30 clCreateKernel(cl_program d_prog, const char *name, cl_int *r_errcode) try {
     31    auto &prog = obj(d_prog);
     32 
     33    if (!name)
     34       throw error(CL_INVALID_VALUE);
     35 
     36    auto &sym = find(name_equals(name), prog.symbols());
     37 
     38    ret_error(r_errcode, CL_SUCCESS);
     39    return new kernel(prog, name, range(sym.args));
     40 
     41 } catch (std::out_of_range &e) {
     42    ret_error(r_errcode, CL_INVALID_KERNEL_NAME);
     43    return NULL;
     44 
     45 } catch (error &e) {
     46    ret_error(r_errcode, e);
     47    return NULL;
     48 }
     49 
     50 CLOVER_API cl_int
     51 clCreateKernelsInProgram(cl_program d_prog, cl_uint count,
     52                          cl_kernel *rd_kerns, cl_uint *r_count) try {
     53    auto &prog = obj(d_prog);
     54    auto &syms = prog.symbols();
     55 
     56    if (rd_kerns && count < syms.size())
     57       throw error(CL_INVALID_VALUE);
     58 
     59    if (rd_kerns)
     60       copy(map([&](const module::symbol &sym) {
     61                return desc(new kernel(prog,
     62                                       std::string(sym.name.begin(),
     63                                                   sym.name.end()),
     64                                       range(sym.args)));
     65             }, syms),
     66          rd_kerns);
     67 
     68    if (r_count)
     69       *r_count = syms.size();
     70 
     71    return CL_SUCCESS;
     72 
     73 } catch (error &e) {
     74    return e.get();
     75 }
     76 
     77 CLOVER_API cl_int
     78 clRetainKernel(cl_kernel d_kern) try {
     79    obj(d_kern).retain();
     80    return CL_SUCCESS;
     81 
     82 } catch (error &e) {
     83    return e.get();
     84 }
     85 
     86 CLOVER_API cl_int
     87 clReleaseKernel(cl_kernel d_kern) try {
     88    if (obj(d_kern).release())
     89       delete pobj(d_kern);
     90 
     91    return CL_SUCCESS;
     92 
     93 } catch (error &e) {
     94    return e.get();
     95 }
     96 
     97 CLOVER_API cl_int
     98 clSetKernelArg(cl_kernel d_kern, cl_uint idx, size_t size,
     99                const void *value) try {
    100    obj(d_kern).args().at(idx).set(size, value);
    101    return CL_SUCCESS;
    102 
    103 } catch (std::out_of_range &e) {
    104    return CL_INVALID_ARG_INDEX;
    105 
    106 } catch (error &e) {
    107    return e.get();
    108 }
    109 
    110 CLOVER_API cl_int
    111 clGetKernelInfo(cl_kernel d_kern, cl_kernel_info param,
    112                 size_t size, void *r_buf, size_t *r_size) try {
    113    property_buffer buf { r_buf, size, r_size };
    114    auto &kern = obj(d_kern);
    115 
    116    switch (param) {
    117    case CL_KERNEL_FUNCTION_NAME:
    118       buf.as_string() = kern.name();
    119       break;
    120 
    121    case CL_KERNEL_NUM_ARGS:
    122       buf.as_scalar<cl_uint>() = kern.args().size();
    123       break;
    124 
    125    case CL_KERNEL_REFERENCE_COUNT:
    126       buf.as_scalar<cl_uint>() = kern.ref_count();
    127       break;
    128 
    129    case CL_KERNEL_CONTEXT:
    130       buf.as_scalar<cl_context>() = desc(kern.program().context());
    131       break;
    132 
    133    case CL_KERNEL_PROGRAM:
    134       buf.as_scalar<cl_program>() = desc(kern.program());
    135       break;
    136 
    137    default:
    138       throw error(CL_INVALID_VALUE);
    139    }
    140 
    141    return CL_SUCCESS;
    142 
    143 } catch (error &e) {
    144    return e.get();
    145 }
    146 
    147 CLOVER_API cl_int
    148 clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
    149                          cl_kernel_work_group_info param,
    150                          size_t size, void *r_buf, size_t *r_size) try {
    151    property_buffer buf { r_buf, size, r_size };
    152    auto &kern = obj(d_kern);
    153    auto &dev = (d_dev ? *pobj(d_dev) : unique(kern.program().devices()));
    154 
    155    if (!count(dev, kern.program().devices()))
    156       throw error(CL_INVALID_DEVICE);
    157 
    158    switch (param) {
    159    case CL_KERNEL_WORK_GROUP_SIZE:
    160       buf.as_scalar<size_t>() = dev.max_threads_per_block();
    161       break;
    162 
    163    case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
    164       buf.as_vector<size_t>() = kern.required_block_size();
    165       break;
    166 
    167    case CL_KERNEL_LOCAL_MEM_SIZE:
    168       buf.as_scalar<cl_ulong>() = kern.mem_local();
    169       break;
    170 
    171    case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
    172       buf.as_scalar<size_t>() = dev.subgroup_size();
    173       break;
    174 
    175    case CL_KERNEL_PRIVATE_MEM_SIZE:
    176       buf.as_scalar<cl_ulong>() = kern.mem_private();
    177       break;
    178 
    179    default:
    180       throw error(CL_INVALID_VALUE);
    181    }
    182 
    183    return CL_SUCCESS;
    184 
    185 } catch (error &e) {
    186    return e.get();
    187 
    188 } catch (std::out_of_range &e) {
    189    return CL_INVALID_DEVICE;
    190 }
    191 
    192 CLOVER_API cl_int
    193 clGetKernelArgInfo(cl_kernel d_kern,
    194                    cl_uint idx, cl_kernel_arg_info param,
    195                    size_t size, void *r_buf, size_t *r_size) {
    196    CLOVER_NOT_SUPPORTED_UNTIL("1.2");
    197    return CL_KERNEL_ARG_INFO_NOT_AVAILABLE;
    198 }
    199 
    200 namespace {
    201    ///
    202    /// Common argument checking shared by kernel invocation commands.
    203    ///
    204    void
    205    validate_common(const command_queue &q, kernel &kern,
    206                    const ref_vector<event> &deps) {
    207       if (kern.program().context() != q.context() ||
    208           any_of([&](const event &ev) {
    209                 return ev.context() != q.context();
    210              }, deps))
    211          throw error(CL_INVALID_CONTEXT);
    212 
    213       if (any_of([](kernel::argument &arg) {
    214                return !arg.set();
    215             }, kern.args()))
    216          throw error(CL_INVALID_KERNEL_ARGS);
    217 
    218       // If the command queue's device is not associated to the program, we get
    219       // a module, with no sections, which will also fail the following test.
    220       auto &m = kern.program().build(q.device()).binary;
    221       if (!any_of(type_equals(module::section::text_executable), m.secs))
    222          throw error(CL_INVALID_PROGRAM_EXECUTABLE);
    223    }
    224 
    225    std::vector<size_t>
    226    validate_grid_size(const command_queue &q, cl_uint dims,
    227                       const size_t *d_grid_size) {
    228       auto grid_size = range(d_grid_size, dims);
    229 
    230       if (dims < 1 || dims > q.device().max_block_size().size())
    231          throw error(CL_INVALID_WORK_DIMENSION);
    232 
    233       if (!d_grid_size || any_of(is_zero(), grid_size))
    234          throw error(CL_INVALID_GLOBAL_WORK_SIZE);
    235 
    236       return grid_size;
    237    }
    238 
    239    std::vector<size_t>
    240    validate_grid_offset(const command_queue &q, cl_uint dims,
    241                         const size_t *d_grid_offset) {
    242       if (d_grid_offset)
    243          return range(d_grid_offset, dims);
    244       else
    245          return std::vector<size_t>(dims, 0);
    246    }
    247 
    248    std::vector<size_t>
    249    validate_block_size(const command_queue &q, const kernel &kern,
    250                        cl_uint dims, const size_t *d_grid_size,
    251                        const size_t *d_block_size) {
    252       auto grid_size = range(d_grid_size, dims);
    253 
    254       if (d_block_size) {
    255          auto block_size = range(d_block_size, dims);
    256 
    257          if (any_of(is_zero(), block_size) ||
    258              any_of(greater(), block_size, q.device().max_block_size()))
    259             throw error(CL_INVALID_WORK_ITEM_SIZE);
    260 
    261          if (any_of(modulus(), grid_size, block_size))
    262             throw error(CL_INVALID_WORK_GROUP_SIZE);
    263 
    264          if (fold(multiplies(), 1u, block_size) >
    265              q.device().max_threads_per_block())
    266             throw error(CL_INVALID_WORK_GROUP_SIZE);
    267 
    268          return block_size;
    269 
    270       } else {
    271          return kern.optimal_block_size(q, grid_size);
    272       }
    273    }
    274 }
    275 
    276 CLOVER_API cl_int
    277 clEnqueueNDRangeKernel(cl_command_queue d_q, cl_kernel d_kern,
    278                        cl_uint dims, const size_t *d_grid_offset,
    279                        const size_t *d_grid_size, const size_t *d_block_size,
    280                        cl_uint num_deps, const cl_event *d_deps,
    281                        cl_event *rd_ev) try {
    282    auto &q = obj(d_q);
    283    auto &kern = obj(d_kern);
    284    auto deps = objs<wait_list_tag>(d_deps, num_deps);
    285    auto grid_size = validate_grid_size(q, dims, d_grid_size);
    286    auto grid_offset = validate_grid_offset(q, dims, d_grid_offset);
    287    auto block_size = validate_block_size(q, kern, dims,
    288                                          d_grid_size, d_block_size);
    289 
    290    validate_common(q, kern, deps);
    291 
    292    auto hev = create<hard_event>(
    293       q, CL_COMMAND_NDRANGE_KERNEL, deps,
    294       [=, &kern, &q](event &) {
    295          kern.launch(q, grid_offset, grid_size, block_size);
    296       });
    297 
    298    ret_object(rd_ev, hev);
    299    return CL_SUCCESS;
    300 
    301 } catch (error &e) {
    302    return e.get();
    303 }
    304 
    305 CLOVER_API cl_int
    306 clEnqueueTask(cl_command_queue d_q, cl_kernel d_kern,
    307               cl_uint num_deps, const cl_event *d_deps,
    308               cl_event *rd_ev) try {
    309    auto &q = obj(d_q);
    310    auto &kern = obj(d_kern);
    311    auto deps = objs<wait_list_tag>(d_deps, num_deps);
    312 
    313    validate_common(q, kern, deps);
    314 
    315    auto hev = create<hard_event>(
    316       q, CL_COMMAND_TASK, deps,
    317       [=, &kern, &q](event &) {
    318          kern.launch(q, { 0 }, { 1 }, { 1 });
    319       });
    320 
    321    ret_object(rd_ev, hev);
    322    return CL_SUCCESS;
    323 
    324 } catch (error &e) {
    325    return e.get();
    326 }
    327 
    328 CLOVER_API cl_int
    329 clEnqueueNativeKernel(cl_command_queue d_q, void (*func)(void *),
    330                       void *args, size_t args_size,
    331                       cl_uint num_mems, const cl_mem *d_mems,
    332                       const void **mem_handles, cl_uint num_deps,
    333                       const cl_event *d_deps, cl_event *rd_ev) {
    334    return CL_INVALID_OPERATION;
    335 }
    336