1 // 2 // Copyright 2012 Francisco Jerez 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a 5 // copy of this software and associated documentation files (the "Software"), 6 // to deal in the Software without restriction, including without limitation 7 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 // and/or sell copies of the Software, and to permit persons to whom the 9 // Software is furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 // OTHER DEALINGS IN THE SOFTWARE. 21 // 22 23 #include "api/util.hpp" 24 #include "core/kernel.hpp" 25 #include "core/event.hpp" 26 27 using namespace clover; 28 29 CLOVER_API cl_kernel 30 clCreateKernel(cl_program d_prog, const char *name, cl_int *r_errcode) try { 31 auto &prog = obj(d_prog); 32 33 if (!name) 34 throw error(CL_INVALID_VALUE); 35 36 auto &sym = find(name_equals(name), prog.symbols()); 37 38 ret_error(r_errcode, CL_SUCCESS); 39 return new kernel(prog, name, range(sym.args)); 40 41 } catch (std::out_of_range &e) { 42 ret_error(r_errcode, CL_INVALID_KERNEL_NAME); 43 return NULL; 44 45 } catch (error &e) { 46 ret_error(r_errcode, e); 47 return NULL; 48 } 49 50 CLOVER_API cl_int 51 clCreateKernelsInProgram(cl_program d_prog, cl_uint count, 52 cl_kernel *rd_kerns, cl_uint *r_count) try { 53 auto &prog = obj(d_prog); 54 auto &syms = prog.symbols(); 55 56 if (rd_kerns && count < syms.size()) 57 throw error(CL_INVALID_VALUE); 58 59 if (rd_kerns) 60 copy(map([&](const module::symbol &sym) { 61 return desc(new kernel(prog, 62 std::string(sym.name.begin(), 63 sym.name.end()), 64 range(sym.args))); 65 }, syms), 66 rd_kerns); 67 68 if (r_count) 69 *r_count = syms.size(); 70 71 return CL_SUCCESS; 72 73 } catch (error &e) { 74 return e.get(); 75 } 76 77 CLOVER_API cl_int 78 clRetainKernel(cl_kernel d_kern) try { 79 obj(d_kern).retain(); 80 return CL_SUCCESS; 81 82 } catch (error &e) { 83 return e.get(); 84 } 85 86 CLOVER_API cl_int 87 clReleaseKernel(cl_kernel d_kern) try { 88 if (obj(d_kern).release()) 89 delete pobj(d_kern); 90 91 return CL_SUCCESS; 92 93 } catch (error &e) { 94 return e.get(); 95 } 96 97 CLOVER_API cl_int 98 clSetKernelArg(cl_kernel d_kern, cl_uint idx, size_t size, 99 const void *value) try { 100 obj(d_kern).args().at(idx).set(size, value); 101 return CL_SUCCESS; 102 103 } catch (std::out_of_range &e) { 104 return CL_INVALID_ARG_INDEX; 105 106 } catch (error &e) { 107 return e.get(); 108 } 109 110 CLOVER_API cl_int 111 clGetKernelInfo(cl_kernel d_kern, cl_kernel_info param, 112 size_t size, void *r_buf, size_t *r_size) try { 113 property_buffer buf { r_buf, size, r_size }; 114 auto &kern = obj(d_kern); 115 116 switch (param) { 117 case CL_KERNEL_FUNCTION_NAME: 118 buf.as_string() = kern.name(); 119 break; 120 121 case CL_KERNEL_NUM_ARGS: 122 buf.as_scalar<cl_uint>() = kern.args().size(); 123 break; 124 125 case CL_KERNEL_REFERENCE_COUNT: 126 buf.as_scalar<cl_uint>() = kern.ref_count(); 127 break; 128 129 case CL_KERNEL_CONTEXT: 130 buf.as_scalar<cl_context>() = desc(kern.program().context()); 131 break; 132 133 case CL_KERNEL_PROGRAM: 134 buf.as_scalar<cl_program>() = desc(kern.program()); 135 break; 136 137 default: 138 throw error(CL_INVALID_VALUE); 139 } 140 141 return CL_SUCCESS; 142 143 } catch (error &e) { 144 return e.get(); 145 } 146 147 CLOVER_API cl_int 148 clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev, 149 cl_kernel_work_group_info param, 150 size_t size, void *r_buf, size_t *r_size) try { 151 property_buffer buf { r_buf, size, r_size }; 152 auto &kern = obj(d_kern); 153 auto &dev = (d_dev ? *pobj(d_dev) : unique(kern.program().devices())); 154 155 if (!count(dev, kern.program().devices())) 156 throw error(CL_INVALID_DEVICE); 157 158 switch (param) { 159 case CL_KERNEL_WORK_GROUP_SIZE: 160 buf.as_scalar<size_t>() = dev.max_threads_per_block(); 161 break; 162 163 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: 164 buf.as_vector<size_t>() = kern.required_block_size(); 165 break; 166 167 case CL_KERNEL_LOCAL_MEM_SIZE: 168 buf.as_scalar<cl_ulong>() = kern.mem_local(); 169 break; 170 171 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: 172 buf.as_scalar<size_t>() = dev.subgroup_size(); 173 break; 174 175 case CL_KERNEL_PRIVATE_MEM_SIZE: 176 buf.as_scalar<cl_ulong>() = kern.mem_private(); 177 break; 178 179 default: 180 throw error(CL_INVALID_VALUE); 181 } 182 183 return CL_SUCCESS; 184 185 } catch (error &e) { 186 return e.get(); 187 188 } catch (std::out_of_range &e) { 189 return CL_INVALID_DEVICE; 190 } 191 192 CLOVER_API cl_int 193 clGetKernelArgInfo(cl_kernel d_kern, 194 cl_uint idx, cl_kernel_arg_info param, 195 size_t size, void *r_buf, size_t *r_size) { 196 CLOVER_NOT_SUPPORTED_UNTIL("1.2"); 197 return CL_KERNEL_ARG_INFO_NOT_AVAILABLE; 198 } 199 200 namespace { 201 /// 202 /// Common argument checking shared by kernel invocation commands. 203 /// 204 void 205 validate_common(const command_queue &q, kernel &kern, 206 const ref_vector<event> &deps) { 207 if (kern.program().context() != q.context() || 208 any_of([&](const event &ev) { 209 return ev.context() != q.context(); 210 }, deps)) 211 throw error(CL_INVALID_CONTEXT); 212 213 if (any_of([](kernel::argument &arg) { 214 return !arg.set(); 215 }, kern.args())) 216 throw error(CL_INVALID_KERNEL_ARGS); 217 218 // If the command queue's device is not associated to the program, we get 219 // a module, with no sections, which will also fail the following test. 220 auto &m = kern.program().build(q.device()).binary; 221 if (!any_of(type_equals(module::section::text_executable), m.secs)) 222 throw error(CL_INVALID_PROGRAM_EXECUTABLE); 223 } 224 225 std::vector<size_t> 226 validate_grid_size(const command_queue &q, cl_uint dims, 227 const size_t *d_grid_size) { 228 auto grid_size = range(d_grid_size, dims); 229 230 if (dims < 1 || dims > q.device().max_block_size().size()) 231 throw error(CL_INVALID_WORK_DIMENSION); 232 233 if (!d_grid_size || any_of(is_zero(), grid_size)) 234 throw error(CL_INVALID_GLOBAL_WORK_SIZE); 235 236 return grid_size; 237 } 238 239 std::vector<size_t> 240 validate_grid_offset(const command_queue &q, cl_uint dims, 241 const size_t *d_grid_offset) { 242 if (d_grid_offset) 243 return range(d_grid_offset, dims); 244 else 245 return std::vector<size_t>(dims, 0); 246 } 247 248 std::vector<size_t> 249 validate_block_size(const command_queue &q, const kernel &kern, 250 cl_uint dims, const size_t *d_grid_size, 251 const size_t *d_block_size) { 252 auto grid_size = range(d_grid_size, dims); 253 254 if (d_block_size) { 255 auto block_size = range(d_block_size, dims); 256 257 if (any_of(is_zero(), block_size) || 258 any_of(greater(), block_size, q.device().max_block_size())) 259 throw error(CL_INVALID_WORK_ITEM_SIZE); 260 261 if (any_of(modulus(), grid_size, block_size)) 262 throw error(CL_INVALID_WORK_GROUP_SIZE); 263 264 if (fold(multiplies(), 1u, block_size) > 265 q.device().max_threads_per_block()) 266 throw error(CL_INVALID_WORK_GROUP_SIZE); 267 268 return block_size; 269 270 } else { 271 return kern.optimal_block_size(q, grid_size); 272 } 273 } 274 } 275 276 CLOVER_API cl_int 277 clEnqueueNDRangeKernel(cl_command_queue d_q, cl_kernel d_kern, 278 cl_uint dims, const size_t *d_grid_offset, 279 const size_t *d_grid_size, const size_t *d_block_size, 280 cl_uint num_deps, const cl_event *d_deps, 281 cl_event *rd_ev) try { 282 auto &q = obj(d_q); 283 auto &kern = obj(d_kern); 284 auto deps = objs<wait_list_tag>(d_deps, num_deps); 285 auto grid_size = validate_grid_size(q, dims, d_grid_size); 286 auto grid_offset = validate_grid_offset(q, dims, d_grid_offset); 287 auto block_size = validate_block_size(q, kern, dims, 288 d_grid_size, d_block_size); 289 290 validate_common(q, kern, deps); 291 292 auto hev = create<hard_event>( 293 q, CL_COMMAND_NDRANGE_KERNEL, deps, 294 [=, &kern, &q](event &) { 295 kern.launch(q, grid_offset, grid_size, block_size); 296 }); 297 298 ret_object(rd_ev, hev); 299 return CL_SUCCESS; 300 301 } catch (error &e) { 302 return e.get(); 303 } 304 305 CLOVER_API cl_int 306 clEnqueueTask(cl_command_queue d_q, cl_kernel d_kern, 307 cl_uint num_deps, const cl_event *d_deps, 308 cl_event *rd_ev) try { 309 auto &q = obj(d_q); 310 auto &kern = obj(d_kern); 311 auto deps = objs<wait_list_tag>(d_deps, num_deps); 312 313 validate_common(q, kern, deps); 314 315 auto hev = create<hard_event>( 316 q, CL_COMMAND_TASK, deps, 317 [=, &kern, &q](event &) { 318 kern.launch(q, { 0 }, { 1 }, { 1 }); 319 }); 320 321 ret_object(rd_ev, hev); 322 return CL_SUCCESS; 323 324 } catch (error &e) { 325 return e.get(); 326 } 327 328 CLOVER_API cl_int 329 clEnqueueNativeKernel(cl_command_queue d_q, void (*func)(void *), 330 void *args, size_t args_size, 331 cl_uint num_mems, const cl_mem *d_mems, 332 const void **mem_handles, cl_uint num_deps, 333 const cl_event *d_deps, cl_event *rd_ev) { 334 return CL_INVALID_OPERATION; 335 } 336