1 // 2 // Copyright 2012 Francisco Jerez 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a 5 // copy of this software and associated documentation files (the "Software"), 6 // to deal in the Software without restriction, including without limitation 7 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 // and/or sell copies of the Software, and to permit persons to whom the 9 // Software is furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 // THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 // OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 // SOFTWARE. 21 // 22 23 #include "api/util.hpp" 24 #include "core/kernel.hpp" 25 #include "core/event.hpp" 26 27 using namespace clover; 28 29 PUBLIC cl_kernel 30 clCreateKernel(cl_program prog, const char *name, 31 cl_int *errcode_ret) try { 32 if (!prog) 33 throw error(CL_INVALID_PROGRAM); 34 35 if (!name) 36 throw error(CL_INVALID_VALUE); 37 38 if (prog->binaries().empty()) 39 throw error(CL_INVALID_PROGRAM_EXECUTABLE); 40 41 auto sym = prog->binaries().begin()->second.sym(name); 42 43 ret_error(errcode_ret, CL_SUCCESS); 44 return new kernel(*prog, name, { sym.args.begin(), sym.args.end() }); 45 46 } catch (module::noent_error &e) { 47 ret_error(errcode_ret, CL_INVALID_KERNEL_NAME); 48 return NULL; 49 50 } catch(error &e) { 51 ret_error(errcode_ret, e); 52 return NULL; 53 } 54 55 PUBLIC cl_int 56 clCreateKernelsInProgram(cl_program prog, cl_uint count, 57 cl_kernel *kerns, cl_uint *count_ret) { 58 if (!prog) 59 throw error(CL_INVALID_PROGRAM); 60 61 if (prog->binaries().empty()) 62 throw error(CL_INVALID_PROGRAM_EXECUTABLE); 63 64 auto &syms = prog->binaries().begin()->second.syms; 65 66 if (kerns && count < syms.size()) 67 throw error(CL_INVALID_VALUE); 68 69 if (kerns) 70 std::transform(syms.begin(), syms.end(), kerns, 71 [=](const module::symbol &sym) { 72 return new kernel(*prog, compat::string(sym.name), 73 { sym.args.begin(), sym.args.end() }); 74 }); 75 76 if (count_ret) 77 *count_ret = syms.size(); 78 79 return CL_SUCCESS; 80 } 81 82 PUBLIC cl_int 83 clRetainKernel(cl_kernel kern) { 84 if (!kern) 85 return CL_INVALID_KERNEL; 86 87 kern->retain(); 88 return CL_SUCCESS; 89 } 90 91 PUBLIC cl_int 92 clReleaseKernel(cl_kernel kern) { 93 if (!kern) 94 return CL_INVALID_KERNEL; 95 96 if (kern->release()) 97 delete kern; 98 99 return CL_SUCCESS; 100 } 101 102 PUBLIC cl_int 103 clSetKernelArg(cl_kernel kern, cl_uint idx, size_t size, 104 const void *value) try { 105 if (!kern) 106 throw error(CL_INVALID_KERNEL); 107 108 if (idx >= kern->args.size()) 109 throw error(CL_INVALID_ARG_INDEX); 110 111 kern->args[idx]->set(size, value); 112 113 return CL_SUCCESS; 114 115 } catch(error &e) { 116 return e.get(); 117 } 118 119 PUBLIC cl_int 120 clGetKernelInfo(cl_kernel kern, cl_kernel_info param, 121 size_t size, void *buf, size_t *size_ret) { 122 if (!kern) 123 return CL_INVALID_KERNEL; 124 125 switch (param) { 126 case CL_KERNEL_FUNCTION_NAME: 127 return string_property(buf, size, size_ret, kern->name()); 128 129 case CL_KERNEL_NUM_ARGS: 130 return scalar_property<cl_uint>(buf, size, size_ret, 131 kern->args.size()); 132 133 case CL_KERNEL_REFERENCE_COUNT: 134 return scalar_property<cl_uint>(buf, size, size_ret, 135 kern->ref_count()); 136 137 case CL_KERNEL_CONTEXT: 138 return scalar_property<cl_context>(buf, size, size_ret, 139 &kern->prog.ctx); 140 141 case CL_KERNEL_PROGRAM: 142 return scalar_property<cl_program>(buf, size, size_ret, 143 &kern->prog); 144 145 default: 146 return CL_INVALID_VALUE; 147 } 148 } 149 150 PUBLIC cl_int 151 clGetKernelWorkGroupInfo(cl_kernel kern, cl_device_id dev, 152 cl_kernel_work_group_info param, 153 size_t size, void *buf, size_t *size_ret) { 154 if (!kern) 155 return CL_INVALID_KERNEL; 156 157 if ((!dev && kern->prog.binaries().size() != 1) || 158 (dev && !kern->prog.binaries().count(dev))) 159 return CL_INVALID_DEVICE; 160 161 switch (param) { 162 case CL_KERNEL_WORK_GROUP_SIZE: 163 return scalar_property<size_t>(buf, size, size_ret, 164 kern->max_block_size()); 165 166 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: 167 return vector_property<size_t>(buf, size, size_ret, 168 kern->block_size()); 169 170 case CL_KERNEL_LOCAL_MEM_SIZE: 171 return scalar_property<cl_ulong>(buf, size, size_ret, 172 kern->mem_local()); 173 174 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: 175 return scalar_property<size_t>(buf, size, size_ret, 1); 176 177 case CL_KERNEL_PRIVATE_MEM_SIZE: 178 return scalar_property<cl_ulong>(buf, size, size_ret, 179 kern->mem_private()); 180 181 default: 182 return CL_INVALID_VALUE; 183 } 184 } 185 186 namespace { 187 /// 188 /// Common argument checking shared by kernel invocation commands. 189 /// 190 void 191 kernel_validate(cl_command_queue q, cl_kernel kern, 192 cl_uint dims, const size_t *grid_offset, 193 const size_t *grid_size, const size_t *block_size, 194 cl_uint num_deps, const cl_event *deps, 195 cl_event *ev) { 196 if (!q) 197 throw error(CL_INVALID_COMMAND_QUEUE); 198 199 if (!kern) 200 throw error(CL_INVALID_KERNEL); 201 202 if (&kern->prog.ctx != &q->ctx || 203 any_of([&](const cl_event ev) { 204 return &ev->ctx != &q->ctx; 205 }, deps, deps + num_deps)) 206 throw error(CL_INVALID_CONTEXT); 207 208 if (bool(num_deps) != bool(deps) || 209 any_of(is_zero<cl_event>(), deps, deps + num_deps)) 210 throw error(CL_INVALID_EVENT_WAIT_LIST); 211 212 if (any_of([](std::unique_ptr<kernel::argument> &arg) { 213 return !arg->set(); 214 }, kern->args.begin(), kern->args.end())) 215 throw error(CL_INVALID_KERNEL_ARGS); 216 217 if (!kern->prog.binaries().count(&q->dev)) 218 throw error(CL_INVALID_PROGRAM_EXECUTABLE); 219 220 if (dims < 1 || dims > q->dev.max_block_size().size()) 221 throw error(CL_INVALID_WORK_DIMENSION); 222 223 if (!grid_size || any_of(is_zero<size_t>(), grid_size, grid_size + dims)) 224 throw error(CL_INVALID_GLOBAL_WORK_SIZE); 225 226 if (block_size) { 227 if (any_of([](size_t b, size_t max) { 228 return b == 0 || b > max; 229 }, block_size, block_size + dims, 230 q->dev.max_block_size().begin())) 231 throw error(CL_INVALID_WORK_ITEM_SIZE); 232 233 if (any_of([](size_t b, size_t g) { 234 return g % b; 235 }, block_size, block_size + dims, grid_size)) 236 throw error(CL_INVALID_WORK_GROUP_SIZE); 237 238 if (fold(std::multiplies<size_t>(), 1u, 239 block_size, block_size + dims) > 240 q->dev.max_threads_per_block()) 241 throw error(CL_INVALID_WORK_GROUP_SIZE); 242 } 243 } 244 245 /// 246 /// Common event action shared by kernel invocation commands. 247 /// 248 std::function<void (event &)> 249 kernel_op(cl_command_queue q, cl_kernel kern, 250 const std::vector<size_t> &grid_offset, 251 const std::vector<size_t> &grid_size, 252 const std::vector<size_t> &block_size) { 253 const std::vector<size_t> reduced_grid_size = map( 254 std::divides<size_t>(), grid_size.begin(), grid_size.end(), 255 block_size.begin()); 256 257 return [=](event &) { 258 kern->launch(*q, grid_offset, reduced_grid_size, block_size); 259 }; 260 } 261 262 template<typename T, typename S> 263 std::vector<T> 264 opt_vector(const T *p, S n) { 265 if (p) 266 return { p, p + n }; 267 else 268 return { n }; 269 } 270 } 271 272 PUBLIC cl_int 273 clEnqueueNDRangeKernel(cl_command_queue q, cl_kernel kern, 274 cl_uint dims, const size_t *pgrid_offset, 275 const size_t *pgrid_size, const size_t *pblock_size, 276 cl_uint num_deps, const cl_event *deps, 277 cl_event *ev) try { 278 const std::vector<size_t> grid_offset = opt_vector(pgrid_offset, dims); 279 const std::vector<size_t> grid_size = opt_vector(pgrid_size, dims); 280 const std::vector<size_t> block_size = opt_vector(pblock_size, dims); 281 282 kernel_validate(q, kern, dims, pgrid_offset, pgrid_size, pblock_size, 283 num_deps, deps, ev); 284 285 hard_event *hev = new hard_event( 286 *q, CL_COMMAND_NDRANGE_KERNEL, { deps, deps + num_deps }, 287 kernel_op(q, kern, grid_offset, grid_size, block_size)); 288 289 ret_object(ev, hev); 290 return CL_SUCCESS; 291 292 } catch(error &e) { 293 return e.get(); 294 } 295 296 PUBLIC cl_int 297 clEnqueueTask(cl_command_queue q, cl_kernel kern, 298 cl_uint num_deps, const cl_event *deps, 299 cl_event *ev) try { 300 const std::vector<size_t> grid_offset = { 0 }; 301 const std::vector<size_t> grid_size = { 1 }; 302 const std::vector<size_t> block_size = { 1 }; 303 304 kernel_validate(q, kern, 1, grid_offset.data(), grid_size.data(), 305 block_size.data(), num_deps, deps, ev); 306 307 hard_event *hev = new hard_event( 308 *q, CL_COMMAND_TASK, { deps, deps + num_deps }, 309 kernel_op(q, kern, grid_offset, grid_size, block_size)); 310 311 ret_object(ev, hev); 312 return CL_SUCCESS; 313 314 } catch(error &e) { 315 return e.get(); 316 } 317 318 PUBLIC cl_int 319 clEnqueueNativeKernel(cl_command_queue q, void (*func)(void *), 320 void *args, size_t args_size, 321 cl_uint obj_count, const cl_mem *obj_list, 322 const void **obj_args, cl_uint num_deps, 323 const cl_event *deps, cl_event *ev) { 324 return CL_INVALID_OPERATION; 325 } 326