1 /* 2 * cl_kernel.cpp - CL kernel 3 * 4 * Copyright (c) 2015 Intel Corporation 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 * 18 * Author: Wind Yuan <feng.yuan (at) intel.com> 19 */ 20 21 #include "cl_kernel.h" 22 #include "cl_context.h" 23 #include "cl_device.h" 24 #include "file_handle.h" 25 26 #include <sys/stat.h> 27 28 #define ENABLE_DEBUG_KERNEL 0 29 30 #define XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE 0 31 32 namespace XCam { 33 34 CLKernel::KernelMap CLKernel::_kernel_map; 35 Mutex CLKernel::_kernel_map_mutex; 36 37 static char* 38 default_cache_path () { 39 static char path[XCAM_MAX_STR_SIZE] = {0}; 40 snprintf ( 41 path, XCAM_MAX_STR_SIZE - 1, 42 "%s/%s", std::getenv ("HOME"), ".xcam/"); 43 44 return path; 45 } 46 47 const char* CLKernel::_kernel_cache_path = default_cache_path (); 48 49 CLKernel::CLKernel (const SmartPtr<CLContext> &context, const char *name) 50 : _name (NULL) 51 , _kernel_id (NULL) 52 , _context (context) 53 { 54 XCAM_ASSERT (context.ptr ()); 55 //XCAM_ASSERT (name); 56 57 if (name) 58 _name = strndup (name, XCAM_MAX_STR_SIZE); 59 60 set_default_work_size (); 61 62 XCAM_OBJ_PROFILING_INIT; 63 } 64 65 CLKernel::~CLKernel () 66 { 67 destroy (); 68 if (_name) 69 xcam_free (_name); 70 } 71 72 void 73 CLKernel::destroy () 74 { 75 if (!_parent_kernel.ptr ()) 76 _context->destroy_kernel_id (_kernel_id); 77 } 78 79 static void 80 get_string_key_id (const char *str, uint32_t len, uint8_t key_id[8]) 81 { 82 uint32_t key[2]; 83 uint32_t *ptr = (uint32_t*)(str); 84 uint32_t aligned_len = 0; 85 uint32_t i = 0; 86 87 xcam_mem_clear (key); 88 if (!len) 89 len = strlen (str); 90 aligned_len = XCAM_ALIGN_DOWN (len, 8); 91 92 for (i = 0; i < aligned_len / 8; ++i) { 93 key[0] ^= ptr[0]; 94 key[1] ^= ptr[1]; 95 ptr += 2; 96 } 97 memcpy (key_id, key, 8); 98 len -= aligned_len; 99 str += aligned_len; 100 for (i = 0; i < len; ++i) { 101 key_id[i] ^= (uint8_t)str[i]; 102 } 103 } 104 105 XCamReturn 106 CLKernel::build_kernel (const XCamKernelInfo& info, const char* options) 107 { 108 KernelMap::iterator i_kernel; 109 SmartPtr<CLKernel> single_kernel; 110 char key_str[1024]; 111 uint8_t body_key[8]; 112 std::string key; 113 XCamReturn ret = XCAM_RETURN_NO_ERROR; 114 115 XCAM_FAIL_RETURN (ERROR, info.kernel_name, XCAM_RETURN_ERROR_PARAM, "build kernel failed since kernel name null"); 116 117 xcam_mem_clear (body_key); 118 get_string_key_id (info.kernel_body, info.kernel_body_len, body_key); 119 snprintf ( 120 key_str, sizeof(key_str), 121 "%s#%02x%02x%02x%02x%02x%02x%02x%02x#%s", 122 info.kernel_name, 123 body_key[0], body_key[1], body_key[2], body_key[3], body_key[4], body_key[5], body_key[6], body_key[7], 124 XCAM_STR(options)); 125 key = key_str; 126 127 char temp_filename[XCAM_MAX_STR_SIZE] = {0}; 128 char cache_filename[XCAM_MAX_STR_SIZE] = {0}; 129 FileHandle temp_file; 130 FileHandle cache_file; 131 size_t read_cache_size = 0; 132 size_t write_cache_size = 0; 133 uint8_t *kernel_cache = NULL; 134 bool load_cache = false; 135 struct timeval ts; 136 137 const char* cache_path = std::getenv ("XCAM_CL_KERNEL_CACHE_PATH"); 138 if (NULL == cache_path) { 139 cache_path = _kernel_cache_path; 140 } 141 142 snprintf ( 143 cache_filename, XCAM_MAX_STR_SIZE - 1, 144 "%s/%s", 145 cache_path, key_str); 146 147 { 148 SmartLock locker (_kernel_map_mutex); 149 150 i_kernel = _kernel_map.find (key); 151 if (i_kernel == _kernel_map.end ()) { 152 SmartPtr<CLContext> context = get_context (); 153 single_kernel = new CLKernel (context, info.kernel_name); 154 XCAM_ASSERT (single_kernel.ptr ()); 155 156 if (access (cache_path, F_OK) == -1) { 157 mkdir (cache_path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); 158 } 159 160 ret = cache_file.open (cache_filename, "r"); 161 if (ret == XCAM_RETURN_NO_ERROR) { 162 cache_file.get_file_size (read_cache_size); 163 if (read_cache_size > 0) { 164 kernel_cache = (uint8_t*) xcam_malloc0 (sizeof (uint8_t) * (read_cache_size + 1)); 165 if (NULL != kernel_cache) { 166 cache_file.read_file (kernel_cache, read_cache_size); 167 cache_file.close (); 168 169 ret = single_kernel->load_from_binary (kernel_cache, read_cache_size); 170 xcam_free (kernel_cache); 171 kernel_cache = NULL; 172 173 XCAM_FAIL_RETURN ( 174 ERROR, ret == XCAM_RETURN_NO_ERROR, ret, 175 "build kernel(%s) from binary failed", key_str); 176 177 load_cache = true; 178 } 179 } 180 } else { 181 XCAM_LOG_DEBUG ("open kernel cache file to read failed ret(%d)", ret); 182 } 183 184 if (load_cache == false) { 185 ret = single_kernel->load_from_source (info.kernel_body, strlen (info.kernel_body), &kernel_cache, &write_cache_size, options); 186 XCAM_FAIL_RETURN ( 187 ERROR, ret == XCAM_RETURN_NO_ERROR, ret, 188 "build kernel(%s) from source failed", key_str); 189 } 190 191 _kernel_map.insert (std::make_pair (key, single_kernel)); 192 //_kernel_map[key] = single_kernel; 193 } else { 194 single_kernel = i_kernel->second; 195 } 196 } 197 198 if (load_cache == false && NULL != kernel_cache) { 199 gettimeofday (&ts, NULL); 200 snprintf ( 201 temp_filename, XCAM_MAX_STR_SIZE - 1, 202 "%s." XCAM_TIMESTAMP_FORMAT, 203 cache_filename, XCAM_TIMESTAMP_ARGS (XCAM_TIMEVAL_2_USEC (ts))); 204 205 ret = temp_file.open (temp_filename, "wb"); 206 if (ret == XCAM_RETURN_NO_ERROR) { 207 ret = temp_file.write_file (kernel_cache, write_cache_size); 208 temp_file.close (); 209 if (ret == XCAM_RETURN_NO_ERROR && write_cache_size > 0) { 210 rename (temp_filename, cache_filename); 211 } else { 212 remove (temp_filename); 213 } 214 } else { 215 XCAM_LOG_ERROR ("open kernel cache file to write failed ret(%d)", ret); 216 } 217 xcam_free (kernel_cache); 218 kernel_cache = NULL; 219 } 220 221 XCAM_FAIL_RETURN ( 222 ERROR, (single_kernel.ptr () && single_kernel->is_valid ()), XCAM_RETURN_ERROR_UNKNOWN, 223 "build kernel(%s) failed, unknown error", key_str); 224 225 ret = this->clone (single_kernel); 226 XCAM_FAIL_RETURN ( 227 ERROR, ret == XCAM_RETURN_NO_ERROR, ret, 228 "load kernel(%s) from kernel failed", key_str); 229 return ret; 230 } 231 232 XCamReturn 233 CLKernel::load_from_source ( 234 const char *source, size_t length, 235 uint8_t **gen_binary, size_t *binary_size, 236 const char *build_option) 237 { 238 cl_kernel new_kernel_id = NULL; 239 240 XCAM_ASSERT (source); 241 if (!source) { 242 XCAM_LOG_WARNING ("kernel:%s source empty", XCAM_STR (_name)); 243 return XCAM_RETURN_ERROR_PARAM; 244 } 245 246 if (_kernel_id) { 247 XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name)); 248 return XCAM_RETURN_ERROR_PARAM; 249 } 250 251 XCAM_ASSERT (_context.ptr ()); 252 253 if (length == 0) 254 length = strlen (source); 255 256 new_kernel_id = 257 _context->generate_kernel_id ( 258 this, 259 (const uint8_t *)source, length, 260 CLContext::KERNEL_BUILD_SOURCE, 261 gen_binary, binary_size, 262 build_option); 263 XCAM_FAIL_RETURN( 264 WARNING, 265 new_kernel_id != NULL, 266 XCAM_RETURN_ERROR_CL, 267 "cl kernel(%s) load from source failed", XCAM_STR (_name)); 268 269 _kernel_id = new_kernel_id; 270 return XCAM_RETURN_NO_ERROR; 271 } 272 273 XCamReturn 274 CLKernel::load_from_binary (const uint8_t *binary, size_t length) 275 { 276 cl_kernel new_kernel_id = NULL; 277 278 XCAM_ASSERT (binary); 279 if (!binary || !length) { 280 XCAM_LOG_WARNING ("kernel:%s binary empty", XCAM_STR (_name)); 281 return XCAM_RETURN_ERROR_PARAM; 282 } 283 284 if (_kernel_id) { 285 XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name)); 286 return XCAM_RETURN_ERROR_PARAM; 287 } 288 289 XCAM_ASSERT (_context.ptr ()); 290 291 new_kernel_id = 292 _context->generate_kernel_id ( 293 this, 294 binary, length, 295 CLContext::KERNEL_BUILD_BINARY, 296 NULL, NULL, 297 NULL); 298 XCAM_FAIL_RETURN( 299 WARNING, 300 new_kernel_id != NULL, 301 XCAM_RETURN_ERROR_CL, 302 "cl kernel(%s) load from binary failed", XCAM_STR (_name)); 303 304 _kernel_id = new_kernel_id; 305 return XCAM_RETURN_NO_ERROR; 306 } 307 308 XCamReturn 309 CLKernel::clone (SmartPtr<CLKernel> kernel) 310 { 311 XCAM_FAIL_RETURN ( 312 WARNING, 313 kernel.ptr () && kernel->is_valid (), 314 XCAM_RETURN_ERROR_CL, 315 "cl kernel(%s) load from kernel failed", XCAM_STR (_name)); 316 _kernel_id = kernel->get_kernel_id (); 317 _parent_kernel = kernel; 318 if (!_name && kernel->get_kernel_name ()) { 319 _name = strndup (kernel->get_kernel_name (), XCAM_MAX_STR_SIZE); 320 } 321 return XCAM_RETURN_NO_ERROR; 322 } 323 324 XCamReturn 325 CLKernel::set_arguments (const CLArgList &args, const CLWorkSize &work_size) 326 { 327 XCamReturn ret = XCAM_RETURN_NO_ERROR; 328 uint32_t i_count = 0; 329 330 XCAM_FAIL_RETURN ( 331 ERROR, _arg_list.empty (), XCAM_RETURN_ERROR_PARAM, 332 "cl image kernel(%s) arguments was already set, can NOT be set twice", get_kernel_name ()); 333 334 for (CLArgList::const_iterator iter = args.begin (); iter != args.end (); ++iter, ++i_count) { 335 const SmartPtr<CLArgument> &arg = *iter; 336 XCAM_FAIL_RETURN ( 337 WARNING, arg.ptr (), 338 XCAM_RETURN_ERROR_PARAM, "cl image kernel(%s) argc(%d) is NULL", get_kernel_name (), i_count); 339 340 void *adress = NULL; 341 uint32_t size = 0; 342 arg->get_value (adress, size); 343 ret = set_argument (i_count, adress, size); 344 XCAM_FAIL_RETURN ( 345 WARNING, ret == XCAM_RETURN_NO_ERROR, 346 ret, "cl image kernel(%s) set argc(%d) failed", get_kernel_name (), i_count); 347 } 348 349 ret = set_work_size (work_size); 350 XCAM_FAIL_RETURN ( 351 WARNING, ret == XCAM_RETURN_NO_ERROR, ret, 352 "cl image kernel(%s) set worksize(global:%dx%dx%d, local:%dx%dx%d) failed", 353 XCAM_STR(get_kernel_name ()), 354 (int)work_size.global[0], (int)work_size.global[1], (int)work_size.global[2], 355 (int)work_size.local[0], (int)work_size.local[1], (int)work_size.local[2]); 356 357 _arg_list = args; 358 return ret; 359 } 360 361 XCamReturn 362 CLKernel::set_argument (uint32_t arg_i, void *arg_addr, uint32_t arg_size) 363 { 364 cl_int error_code = clSetKernelArg (_kernel_id, arg_i, arg_size, arg_addr); 365 if (error_code != CL_SUCCESS) { 366 XCAM_LOG_DEBUG ("kernel(%s) set arg_i(%d) failed", _name, arg_i); 367 return XCAM_RETURN_ERROR_CL; 368 } 369 return XCAM_RETURN_NO_ERROR; 370 } 371 372 XCamReturn 373 CLKernel::set_work_size (const CLWorkSize &work_size) 374 { 375 uint32_t i = 0; 376 uint32_t work_group_size = 1; 377 const CLDevieInfo &dev_info = CLDevice::instance ()->get_device_info (); 378 379 XCAM_FAIL_RETURN ( 380 WARNING, 381 work_size.dim <= dev_info.max_work_item_dims, 382 XCAM_RETURN_ERROR_PARAM, 383 "kernel(%s) work dims(%d) greater than device max dims(%d)", 384 _name, work_size.dim, dev_info.max_work_item_dims); 385 386 for (i = 0; i < work_size.dim; ++i) { 387 work_group_size *= work_size.local [i]; 388 389 XCAM_FAIL_RETURN ( 390 WARNING, 391 work_size.local [i] <= dev_info.max_work_item_sizes [i], 392 XCAM_RETURN_ERROR_PARAM, 393 "kernel(%s) work item(%d) size:%d is greater than device max work item size(%d)", 394 _name, i, (uint32_t)work_size.local [i], (uint32_t)dev_info.max_work_item_sizes [i]); 395 } 396 397 XCAM_FAIL_RETURN ( 398 WARNING, 399 work_group_size == 0 || work_group_size <= dev_info.max_work_group_size, 400 XCAM_RETURN_ERROR_PARAM, 401 "kernel(%s) work-group-size:%d is greater than device max work-group-size(%d)", 402 _name, work_group_size, (uint32_t)dev_info.max_work_group_size); 403 404 _work_size = work_size; 405 406 return XCAM_RETURN_NO_ERROR; 407 } 408 409 void 410 CLKernel::set_default_work_size () 411 { 412 _work_size.dim = XCAM_DEFAULT_IMAGE_DIM; 413 for (uint32_t i = 0; i < _work_size.dim; ++i) { 414 //_global_work_size [i] = XCAM_CL_KERNEL_DEFAULT_GLOBAL_WORK_SIZE; 415 _work_size.local [i] = XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE; 416 } 417 } 418 419 struct KernelUserData { 420 SmartPtr<CLKernel> kernel; 421 SmartPtr<CLEvent> event; 422 CLArgList arg_list; 423 424 KernelUserData (const SmartPtr<CLKernel> &k, SmartPtr<CLEvent> &e) 425 : kernel (k) 426 , event (e) 427 {} 428 }; 429 430 void 431 CLKernel::event_notify (cl_event event, cl_int status, void* data) 432 { 433 KernelUserData *kernel_data = (KernelUserData *)data; 434 XCAM_ASSERT (event == kernel_data->event->get_event_id ()); 435 XCAM_UNUSED (status); 436 XCAM_UNUSED (event); 437 438 delete kernel_data; 439 } 440 441 XCamReturn 442 CLKernel::execute ( 443 const SmartPtr<CLKernel> self, 444 bool block, 445 CLEventList &events, 446 SmartPtr<CLEvent> &event_out) 447 { 448 XCAM_ASSERT (self.ptr () == this); 449 XCAM_ASSERT (_context.ptr ()); 450 SmartPtr<CLEvent> kernel_event = event_out; 451 452 if (!block && !kernel_event.ptr ()) { 453 kernel_event = new CLEvent; 454 } 455 456 #if ENABLE_DEBUG_KERNEL 457 XCAM_OBJ_PROFILING_START; 458 #endif 459 460 XCamReturn ret = _context->execute_kernel (self, NULL, events, kernel_event); 461 462 XCAM_FAIL_RETURN ( 463 ERROR, 464 ret == XCAM_RETURN_NO_ERROR, 465 ret, 466 "kernel(%s) execute failed", XCAM_STR(_name)); 467 468 469 if (block) { 470 _context->finish (); 471 } else { 472 XCAM_ASSERT (kernel_event.ptr () && kernel_event->get_event_id ()); 473 KernelUserData *user_data = new KernelUserData (self, kernel_event); 474 user_data->arg_list.swap (_arg_list); 475 ret = _context->set_event_callback (kernel_event, CL_COMPLETE, event_notify, user_data); 476 if (ret != XCAM_RETURN_NO_ERROR) { 477 XCAM_LOG_WARNING ("kernel(%s) set event callback failed", XCAM_STR (_name)); 478 _context->finish (); 479 delete user_data; 480 } 481 } 482 _arg_list.clear (); 483 484 #if ENABLE_DEBUG_KERNEL 485 _context->finish (); 486 char name[1024]; 487 snprintf (name, 1024, "%s-%p", XCAM_STR (_name), this); 488 XCAM_OBJ_PROFILING_END (name, XCAM_OBJ_DUR_FRAME_NUM); 489 #endif 490 return ret; 491 } 492 493 }; 494