Home | History | Annotate | Download | only in ocl
      1 /*
      2  * cl_kernel.cpp - CL kernel
      3  *
      4  *  Copyright (c) 2015 Intel Corporation
      5  *
      6  * Licensed under the Apache License, Version 2.0 (the "License");
      7  * you may not use this file except in compliance with the License.
      8  * You may obtain a copy of the License at
      9  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  *
     18  * Author: Wind Yuan <feng.yuan (at) intel.com>
     19  */
     20 
     21 #include "cl_kernel.h"
     22 #include "cl_context.h"
     23 #include "cl_device.h"
     24 #include "file_handle.h"
     25 
     26 #include <sys/stat.h>
     27 
     28 #define ENABLE_DEBUG_KERNEL 0
     29 
     30 #define XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE 0
     31 
     32 namespace XCam {
     33 
     34 CLKernel::KernelMap CLKernel::_kernel_map;
     35 Mutex CLKernel::_kernel_map_mutex;
     36 
     37 static char*
     38 default_cache_path () {
     39     static char path[XCAM_MAX_STR_SIZE] = {0};
     40     snprintf (
     41         path, XCAM_MAX_STR_SIZE - 1,
     42         "%s/%s", std::getenv ("HOME"), ".xcam/");
     43 
     44     return path;
     45 }
     46 
     47 const char* CLKernel::_kernel_cache_path = default_cache_path ();
     48 
     49 CLKernel::CLKernel (const SmartPtr<CLContext> &context, const char *name)
     50     : _name (NULL)
     51     , _kernel_id (NULL)
     52     , _context (context)
     53 {
     54     XCAM_ASSERT (context.ptr ());
     55     //XCAM_ASSERT (name);
     56 
     57     if (name)
     58         _name = strndup (name, XCAM_MAX_STR_SIZE);
     59 
     60     set_default_work_size ();
     61 
     62     XCAM_OBJ_PROFILING_INIT;
     63 }
     64 
     65 CLKernel::~CLKernel ()
     66 {
     67     destroy ();
     68     if (_name)
     69         xcam_free (_name);
     70 }
     71 
     72 void
     73 CLKernel::destroy ()
     74 {
     75     if (!_parent_kernel.ptr ())
     76         _context->destroy_kernel_id (_kernel_id);
     77 }
     78 
     79 static void
     80 get_string_key_id (const char *str, uint32_t len, uint8_t key_id[8])
     81 {
     82     uint32_t key[2];
     83     uint32_t *ptr = (uint32_t*)(str);
     84     uint32_t aligned_len = 0;
     85     uint32_t i = 0;
     86 
     87     xcam_mem_clear (key);
     88     if (!len)
     89         len = strlen (str);
     90     aligned_len = XCAM_ALIGN_DOWN (len, 8);
     91 
     92     for (i = 0; i < aligned_len / 8; ++i) {
     93         key[0] ^= ptr[0];
     94         key[1] ^= ptr[1];
     95         ptr += 2;
     96     }
     97     memcpy (key_id, key, 8);
     98     len -= aligned_len;
     99     str += aligned_len;
    100     for (i = 0; i < len; ++i) {
    101         key_id[i] ^= (uint8_t)str[i];
    102     }
    103 }
    104 
    105 XCamReturn
    106 CLKernel::build_kernel (const XCamKernelInfo& info, const char* options)
    107 {
    108     KernelMap::iterator i_kernel;
    109     SmartPtr<CLKernel> single_kernel;
    110     char key_str[1024];
    111     uint8_t body_key[8];
    112     std::string key;
    113     XCamReturn ret = XCAM_RETURN_NO_ERROR;
    114 
    115     XCAM_FAIL_RETURN (ERROR, info.kernel_name, XCAM_RETURN_ERROR_PARAM, "build kernel failed since kernel name null");
    116 
    117     xcam_mem_clear (body_key);
    118     get_string_key_id (info.kernel_body, info.kernel_body_len, body_key);
    119     snprintf (
    120         key_str, sizeof(key_str),
    121         "%s#%02x%02x%02x%02x%02x%02x%02x%02x#%s",
    122         info.kernel_name,
    123         body_key[0], body_key[1], body_key[2], body_key[3], body_key[4], body_key[5], body_key[6], body_key[7],
    124         XCAM_STR(options));
    125     key = key_str;
    126 
    127     char temp_filename[XCAM_MAX_STR_SIZE] = {0};
    128     char cache_filename[XCAM_MAX_STR_SIZE] = {0};
    129     FileHandle temp_file;
    130     FileHandle cache_file;
    131     size_t read_cache_size = 0;
    132     size_t write_cache_size = 0;
    133     uint8_t *kernel_cache = NULL;
    134     bool load_cache = false;
    135     struct timeval ts;
    136 
    137     const char* cache_path = std::getenv ("XCAM_CL_KERNEL_CACHE_PATH");
    138     if (NULL == cache_path) {
    139         cache_path = _kernel_cache_path;
    140     }
    141 
    142     snprintf (
    143         cache_filename, XCAM_MAX_STR_SIZE - 1,
    144         "%s/%s",
    145         cache_path, key_str);
    146 
    147     {
    148         SmartLock locker (_kernel_map_mutex);
    149 
    150         i_kernel = _kernel_map.find (key);
    151         if (i_kernel == _kernel_map.end ()) {
    152             SmartPtr<CLContext>  context = get_context ();
    153             single_kernel = new CLKernel (context, info.kernel_name);
    154             XCAM_ASSERT (single_kernel.ptr ());
    155 
    156             if (access (cache_path, F_OK) == -1) {
    157                 mkdir (cache_path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
    158             }
    159 
    160             ret = cache_file.open (cache_filename, "r");
    161             if (ret == XCAM_RETURN_NO_ERROR) {
    162                 cache_file.get_file_size (read_cache_size);
    163                 if (read_cache_size > 0) {
    164                     kernel_cache = (uint8_t*) xcam_malloc0 (sizeof (uint8_t) * (read_cache_size + 1));
    165                     if (NULL != kernel_cache) {
    166                         cache_file.read_file (kernel_cache, read_cache_size);
    167                         cache_file.close ();
    168 
    169                         ret = single_kernel->load_from_binary (kernel_cache, read_cache_size);
    170                         xcam_free (kernel_cache);
    171                         kernel_cache = NULL;
    172 
    173                         XCAM_FAIL_RETURN (
    174                             ERROR, ret == XCAM_RETURN_NO_ERROR, ret,
    175                             "build kernel(%s) from binary failed", key_str);
    176 
    177                         load_cache = true;
    178                     }
    179                 }
    180             } else {
    181                 XCAM_LOG_DEBUG ("open kernel cache file to read failed ret(%d)", ret);
    182             }
    183 
    184             if (load_cache == false) {
    185                 ret = single_kernel->load_from_source (info.kernel_body, strlen (info.kernel_body), &kernel_cache, &write_cache_size, options);
    186                 XCAM_FAIL_RETURN (
    187                     ERROR, ret == XCAM_RETURN_NO_ERROR, ret,
    188                     "build kernel(%s) from source failed", key_str);
    189             }
    190 
    191             _kernel_map.insert (std::make_pair (key, single_kernel));
    192             //_kernel_map[key] = single_kernel;
    193         } else {
    194             single_kernel = i_kernel->second;
    195         }
    196     }
    197 
    198     if (load_cache == false && NULL != kernel_cache) {
    199         gettimeofday (&ts, NULL);
    200         snprintf (
    201             temp_filename, XCAM_MAX_STR_SIZE - 1,
    202             "%s." XCAM_TIMESTAMP_FORMAT,
    203             cache_filename, XCAM_TIMESTAMP_ARGS (XCAM_TIMEVAL_2_USEC (ts)));
    204 
    205         ret = temp_file.open (temp_filename, "wb");
    206         if (ret == XCAM_RETURN_NO_ERROR) {
    207             ret = temp_file.write_file (kernel_cache, write_cache_size);
    208             temp_file.close ();
    209             if (ret == XCAM_RETURN_NO_ERROR && write_cache_size > 0) {
    210                 rename (temp_filename, cache_filename);
    211             } else {
    212                 remove (temp_filename);
    213             }
    214         } else {
    215             XCAM_LOG_ERROR ("open kernel cache file to write failed ret(%d)", ret);
    216         }
    217         xcam_free (kernel_cache);
    218         kernel_cache = NULL;
    219     }
    220 
    221     XCAM_FAIL_RETURN (
    222         ERROR, (single_kernel.ptr () && single_kernel->is_valid ()), XCAM_RETURN_ERROR_UNKNOWN,
    223         "build kernel(%s) failed, unknown error", key_str);
    224 
    225     ret = this->clone (single_kernel);
    226     XCAM_FAIL_RETURN (
    227         ERROR, ret == XCAM_RETURN_NO_ERROR, ret,
    228         "load kernel(%s) from kernel failed", key_str);
    229     return ret;
    230 }
    231 
    232 XCamReturn
    233 CLKernel::load_from_source (
    234     const char *source, size_t length,
    235     uint8_t **gen_binary, size_t *binary_size,
    236     const char *build_option)
    237 {
    238     cl_kernel new_kernel_id = NULL;
    239 
    240     XCAM_ASSERT (source);
    241     if (!source) {
    242         XCAM_LOG_WARNING ("kernel:%s source empty", XCAM_STR (_name));
    243         return XCAM_RETURN_ERROR_PARAM;
    244     }
    245 
    246     if (_kernel_id) {
    247         XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name));
    248         return XCAM_RETURN_ERROR_PARAM;
    249     }
    250 
    251     XCAM_ASSERT (_context.ptr ());
    252 
    253     if (length == 0)
    254         length = strlen (source);
    255 
    256     new_kernel_id =
    257         _context->generate_kernel_id (
    258             this,
    259             (const uint8_t *)source, length,
    260             CLContext::KERNEL_BUILD_SOURCE,
    261             gen_binary, binary_size,
    262             build_option);
    263     XCAM_FAIL_RETURN(
    264         WARNING,
    265         new_kernel_id != NULL,
    266         XCAM_RETURN_ERROR_CL,
    267         "cl kernel(%s) load from source failed", XCAM_STR (_name));
    268 
    269     _kernel_id = new_kernel_id;
    270     return XCAM_RETURN_NO_ERROR;
    271 }
    272 
    273 XCamReturn
    274 CLKernel::load_from_binary (const uint8_t *binary, size_t length)
    275 {
    276     cl_kernel new_kernel_id = NULL;
    277 
    278     XCAM_ASSERT (binary);
    279     if (!binary || !length) {
    280         XCAM_LOG_WARNING ("kernel:%s binary empty", XCAM_STR (_name));
    281         return XCAM_RETURN_ERROR_PARAM;
    282     }
    283 
    284     if (_kernel_id) {
    285         XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name));
    286         return XCAM_RETURN_ERROR_PARAM;
    287     }
    288 
    289     XCAM_ASSERT (_context.ptr ());
    290 
    291     new_kernel_id =
    292         _context->generate_kernel_id (
    293             this,
    294             binary, length,
    295             CLContext::KERNEL_BUILD_BINARY,
    296             NULL, NULL,
    297             NULL);
    298     XCAM_FAIL_RETURN(
    299         WARNING,
    300         new_kernel_id != NULL,
    301         XCAM_RETURN_ERROR_CL,
    302         "cl kernel(%s) load from binary failed", XCAM_STR (_name));
    303 
    304     _kernel_id = new_kernel_id;
    305     return XCAM_RETURN_NO_ERROR;
    306 }
    307 
    308 XCamReturn
    309 CLKernel::clone (SmartPtr<CLKernel> kernel)
    310 {
    311     XCAM_FAIL_RETURN (
    312         WARNING,
    313         kernel.ptr () && kernel->is_valid (),
    314         XCAM_RETURN_ERROR_CL,
    315         "cl kernel(%s) load from kernel failed", XCAM_STR (_name));
    316     _kernel_id = kernel->get_kernel_id ();
    317     _parent_kernel = kernel;
    318     if (!_name && kernel->get_kernel_name ()) {
    319         _name = strndup (kernel->get_kernel_name (), XCAM_MAX_STR_SIZE);
    320     }
    321     return XCAM_RETURN_NO_ERROR;
    322 }
    323 
    324 XCamReturn
    325 CLKernel::set_arguments (const CLArgList &args, const CLWorkSize &work_size)
    326 {
    327     XCamReturn ret = XCAM_RETURN_NO_ERROR;
    328     uint32_t i_count = 0;
    329 
    330     XCAM_FAIL_RETURN (
    331         ERROR, _arg_list.empty (), XCAM_RETURN_ERROR_PARAM,
    332         "cl image kernel(%s) arguments was already set, can NOT be set twice", get_kernel_name ());
    333 
    334     for (CLArgList::const_iterator iter = args.begin (); iter != args.end (); ++iter, ++i_count) {
    335         const SmartPtr<CLArgument> &arg = *iter;
    336         XCAM_FAIL_RETURN (
    337             WARNING, arg.ptr (),
    338             XCAM_RETURN_ERROR_PARAM, "cl image kernel(%s) argc(%d) is NULL", get_kernel_name (), i_count);
    339 
    340         void *adress = NULL;
    341         uint32_t size = 0;
    342         arg->get_value (adress, size);
    343         ret = set_argument (i_count, adress, size);
    344         XCAM_FAIL_RETURN (
    345             WARNING, ret == XCAM_RETURN_NO_ERROR,
    346             ret, "cl image kernel(%s) set argc(%d) failed", get_kernel_name (), i_count);
    347     }
    348 
    349     ret = set_work_size (work_size);
    350     XCAM_FAIL_RETURN (
    351         WARNING, ret == XCAM_RETURN_NO_ERROR, ret,
    352         "cl image kernel(%s) set worksize(global:%dx%dx%d, local:%dx%dx%d) failed",
    353         XCAM_STR(get_kernel_name ()),
    354         (int)work_size.global[0], (int)work_size.global[1], (int)work_size.global[2],
    355         (int)work_size.local[0], (int)work_size.local[1], (int)work_size.local[2]);
    356 
    357     _arg_list = args;
    358     return ret;
    359 }
    360 
    361 XCamReturn
    362 CLKernel::set_argument (uint32_t arg_i, void *arg_addr, uint32_t arg_size)
    363 {
    364     cl_int error_code = clSetKernelArg (_kernel_id, arg_i, arg_size, arg_addr);
    365     if (error_code != CL_SUCCESS) {
    366         XCAM_LOG_DEBUG ("kernel(%s) set arg_i(%d) failed", _name, arg_i);
    367         return XCAM_RETURN_ERROR_CL;
    368     }
    369     return XCAM_RETURN_NO_ERROR;
    370 }
    371 
    372 XCamReturn
    373 CLKernel::set_work_size (const CLWorkSize &work_size)
    374 {
    375     uint32_t i = 0;
    376     uint32_t work_group_size = 1;
    377     const CLDevieInfo &dev_info = CLDevice::instance ()->get_device_info ();
    378 
    379     XCAM_FAIL_RETURN (
    380         WARNING,
    381         work_size.dim <= dev_info.max_work_item_dims,
    382         XCAM_RETURN_ERROR_PARAM,
    383         "kernel(%s) work dims(%d) greater than device max dims(%d)",
    384         _name, work_size.dim, dev_info.max_work_item_dims);
    385 
    386     for (i = 0; i < work_size.dim; ++i) {
    387         work_group_size *= work_size.local [i];
    388 
    389         XCAM_FAIL_RETURN (
    390             WARNING,
    391             work_size.local [i] <= dev_info.max_work_item_sizes [i],
    392             XCAM_RETURN_ERROR_PARAM,
    393             "kernel(%s) work item(%d) size:%d is greater than device max work item size(%d)",
    394             _name, i, (uint32_t)work_size.local [i], (uint32_t)dev_info.max_work_item_sizes [i]);
    395     }
    396 
    397     XCAM_FAIL_RETURN (
    398         WARNING,
    399         work_group_size == 0 || work_group_size <= dev_info.max_work_group_size,
    400         XCAM_RETURN_ERROR_PARAM,
    401         "kernel(%s) work-group-size:%d is greater than device max work-group-size(%d)",
    402         _name, work_group_size, (uint32_t)dev_info.max_work_group_size);
    403 
    404     _work_size = work_size;
    405 
    406     return XCAM_RETURN_NO_ERROR;
    407 }
    408 
    409 void
    410 CLKernel::set_default_work_size ()
    411 {
    412     _work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
    413     for (uint32_t i = 0; i < _work_size.dim; ++i) {
    414         //_global_work_size [i] = XCAM_CL_KERNEL_DEFAULT_GLOBAL_WORK_SIZE;
    415         _work_size.local [i] = XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE;
    416     }
    417 }
    418 
    419 struct KernelUserData {
    420     SmartPtr<CLKernel>  kernel;
    421     SmartPtr<CLEvent>   event;
    422     CLArgList           arg_list;
    423 
    424     KernelUserData (const SmartPtr<CLKernel> &k, SmartPtr<CLEvent> &e)
    425         : kernel (k)
    426         , event (e)
    427     {}
    428 };
    429 
    430 void
    431 CLKernel::event_notify (cl_event event, cl_int status, void* data)
    432 {
    433     KernelUserData *kernel_data = (KernelUserData *)data;
    434     XCAM_ASSERT (event == kernel_data->event->get_event_id ());
    435     XCAM_UNUSED (status);
    436     XCAM_UNUSED (event);
    437 
    438     delete kernel_data;
    439 }
    440 
    441 XCamReturn
    442 CLKernel::execute (
    443     const SmartPtr<CLKernel> self,
    444     bool block,
    445     CLEventList &events,
    446     SmartPtr<CLEvent> &event_out)
    447 {
    448     XCAM_ASSERT (self.ptr () == this);
    449     XCAM_ASSERT (_context.ptr ());
    450     SmartPtr<CLEvent> kernel_event = event_out;
    451 
    452     if (!block && !kernel_event.ptr ()) {
    453         kernel_event = new CLEvent;
    454     }
    455 
    456 #if ENABLE_DEBUG_KERNEL
    457     XCAM_OBJ_PROFILING_START;
    458 #endif
    459 
    460     XCamReturn ret = _context->execute_kernel (self, NULL, events, kernel_event);
    461 
    462     XCAM_FAIL_RETURN (
    463         ERROR,
    464         ret == XCAM_RETURN_NO_ERROR,
    465         ret,
    466         "kernel(%s) execute failed", XCAM_STR(_name));
    467 
    468 
    469     if (block) {
    470         _context->finish ();
    471     } else {
    472         XCAM_ASSERT (kernel_event.ptr () && kernel_event->get_event_id ());
    473         KernelUserData *user_data = new KernelUserData (self, kernel_event);
    474         user_data->arg_list.swap (_arg_list);
    475         ret = _context->set_event_callback (kernel_event, CL_COMPLETE, event_notify, user_data);
    476         if (ret != XCAM_RETURN_NO_ERROR) {
    477             XCAM_LOG_WARNING ("kernel(%s) set event callback failed", XCAM_STR (_name));
    478             _context->finish ();
    479             delete user_data;
    480         }
    481     }
    482     _arg_list.clear ();
    483 
    484 #if ENABLE_DEBUG_KERNEL
    485     _context->finish ();
    486     char name[1024];
    487     snprintf (name, 1024, "%s-%p", XCAM_STR (_name), this);
    488     XCAM_OBJ_PROFILING_END (name, XCAM_OBJ_DUR_FRAME_NUM);
    489 #endif
    490     return ret;
    491 }
    492 
    493 };
    494