Home | History | Annotate | Download | only in ocl
      1 /*
      2  * cl_newtonemapping_handler.cpp - CL tonemapping handler
      3  *
      4  *  Copyright (c) 2015 Intel Corporation
      5  *
      6  * Licensed under the Apache License, Version 2.0 (the "License");
      7  * you may not use this file except in compliance with the License.
      8  * You may obtain a copy of the License at
      9  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  *
     18  *  Author: Wu Junkai <junkai.wu (at) intel.com>
     19  */
     20 
     21 #include "cl_utils.h"
     22 #include "cl_newtonemapping_handler.h"
     23 
     24 namespace XCam {
     25 
     26 static const XCamKernelInfo kernel_tone_mapping_pipe_info = {
     27     "kernel_newtonemapping",
     28 #include "kernel_newtonemapping.clx"
     29     , 0,
     30 };
     31 
     32 CLNewTonemappingImageKernel::CLNewTonemappingImageKernel (
     33     const SmartPtr<CLContext> &context, const char *name)
     34     : CLImageKernel (context, name)
     35 {
     36 }
     37 
     38 static void
     39 haleq(int *y, int *hist, int *hist_leq, int left, int right, int level, int index_left, int index_right)
     40 {
     41     int l;
     42     float e, le;
     43 
     44     l = (left + right) / 2;
     45     int num_left = left > 0 ? hist[left - 1] : 0;
     46     int pixel_num = hist[right] - num_left;
     47     e = y[num_left + pixel_num / 2];
     48 
     49     if(e != 0)
     50     {
     51         le = 0.5f * (e - l) + l;
     52     }
     53     else
     54     {
     55         le = l;
     56     }
     57 
     58     int index = (index_left + index_right) / 2;
     59     hist_leq[index] = (int)(le + 0.5f);
     60 
     61     if(level > 5) return;
     62 
     63     haleq (y, hist, hist_leq, left, (int)(le + 0.5f), level + 1, index_left, index);
     64     haleq (y, hist, hist_leq, (int)(le + 0.5f) + 1, right, level + 1, index + 1, index_right);
     65 }
     66 
     67 static void
     68 block_split_haleq(int* hist, int hist_bin_count, int pixel_num, int block_start_index, float* y_max, float* y_avg, float* map_hist)
     69 {
     70     int block_id = block_start_index / hist_bin_count;
     71 
     72     for(int i = hist_bin_count - 1; i >= 0; i--)
     73     {
     74         if(hist[i] > 0)
     75         {
     76             y_max[block_id] = i;
     77             break;
     78         }
     79     }
     80 
     81     for(int i = 0; i < hist_bin_count; i++)
     82     {
     83         y_avg[block_id] += i * hist[i];
     84     }
     85 
     86     y_max[block_id] = y_max[block_id] + 1;
     87     y_avg[block_id] = y_avg[block_id] / pixel_num;
     88 
     89     int *hist_log = (int *) xcam_malloc0 (hist_bin_count * sizeof (int));
     90     int *sort_y = (int *) xcam_malloc0 ((pixel_num + 1) * sizeof (int));
     91     int *map_index_leq = (int *) xcam_malloc0 (hist_bin_count * sizeof (int));
     92     int *map_index_log = (int *) xcam_malloc0 (hist_bin_count * sizeof (int));
     93     XCAM_ASSERT (hist_log && sort_y && map_index_leq && map_index_log);
     94 
     95     int thres = (int)(1500 * 1500 / (y_avg[block_id] * y_avg[block_id] + 1) * 600);
     96     int y_max0 = (y_max[block_id] > thres) ? thres : y_max[block_id];
     97     int y_max1 = (y_max[block_id] - thres) > 0 ? (y_max[block_id] - thres) : 0;
     98 
     99     float t0 = 0.01f * y_max0 + 0.001f;
    100     float t1 = 0.001f * y_max1 + 0.001f;
    101     float max0_log = log(y_max0 + t0);
    102     float max1_log = log(y_max1 + t1);
    103     float t0_log = log(t0);
    104     float t1_log = log(t1);
    105     float factor0;
    106 
    107     if(y_max[block_id] < thres)
    108     {
    109         factor0 = (hist_bin_count - 1) / (max0_log - t0_log + 0.001f);
    110     }
    111     else
    112         factor0 = y_max0 / (max0_log - t0_log + 0.001f);
    113 
    114     float factor1 = y_max1 / (max1_log - t1_log + 0.001f);
    115 
    116     if(y_max[block_id] < thres)
    117     {
    118         for(int i = 0; i < y_max[block_id]; i++)
    119         {
    120             int index = (int)((log(i + t0) - t0_log) * factor0 + 0.5f);
    121             hist_log[index] += hist[i];
    122             map_index_log[i] = index;
    123         }
    124     }
    125     else
    126     {
    127         for(int i = 0; i < y_max0; i++)
    128         {
    129             int index = (int)((log(i + t0) - t0_log) * factor0 + 0.5f);
    130             hist_log[index] += hist[i];
    131             map_index_log[i] = index;
    132         }
    133 
    134         for(int i = y_max0; i < y_max[block_id]; i++)
    135         {
    136             int r = y_max[block_id] - i;
    137             int index = (int)((log(r + t1) - t1_log) * factor1 + 0.5f);
    138             index = y_max[block_id] - index;
    139             hist_log[index] += hist[i];
    140             map_index_log[i] = index;
    141         }
    142     }
    143 
    144     for(int i = y_max[block_id]; i < hist_bin_count; i++)
    145     {
    146         hist_log[map_index_log[(int)y_max[block_id] - 1]] += hist[i];
    147         map_index_log[i] = map_index_log[(int)y_max[block_id] - 1];
    148     }
    149 
    150     int sort_index = 1;
    151     for(int i = 0; i < hist_bin_count; i++)
    152     {
    153         for(int l = 0; l < hist_log[i]; l++)
    154         {
    155             sort_y[sort_index] = i;
    156             sort_index++;
    157         }
    158     }
    159     sort_y[0] = 0;
    160 
    161     for(int i = 1; i < hist_bin_count; i++)
    162     {
    163         hist_log[i] += hist_log[i - 1];
    164     }
    165 
    166     int map_leq_index[256];
    167 
    168     haleq(sort_y, hist_log, map_leq_index, 0, hist_bin_count - 1, 0, 0, 255);
    169 
    170     map_leq_index[255] = hist_bin_count;
    171     map_leq_index[0] = 0;
    172 
    173     for(int i = 1; i < 255; i++)
    174     {
    175         if(i % 2 == 0) map_leq_index[i] = (map_leq_index[i - 1] + map_leq_index[i + 1]) / 2;
    176         if(map_leq_index[i] < map_leq_index[i - 1])
    177             map_leq_index[i] = map_leq_index[i - 1];
    178     }
    179 
    180     for(int i = 0; i < 255; i++)
    181     {
    182         for(int k = map_leq_index[i]; k < map_leq_index[i + 1]; k++)
    183         {
    184             map_index_leq[k] = (float)i;
    185         }
    186     }
    187 
    188     for(int i = 0; i < hist_bin_count; i++)
    189     {
    190         map_hist[i + block_start_index] = map_index_leq[map_index_log[i]] / 255.0f;
    191     }
    192 
    193     y_max[block_id] = y_max[block_id] / hist_bin_count;
    194     y_avg[block_id] = y_avg[block_id] / hist_bin_count;
    195 
    196     xcam_free (hist_log);
    197     hist_log = NULL;
    198     xcam_free (map_index_leq);
    199     map_index_leq = NULL;
    200     xcam_free (map_index_log);
    201     map_index_log = NULL;
    202     xcam_free (sort_y);
    203     sort_y = NULL;
    204 }
    205 
    206 CLNewTonemappingImageHandler::CLNewTonemappingImageHandler (
    207     const SmartPtr<CLContext> &context, const char *name)
    208     : CLImageHandler (context, name)
    209     , _output_format (XCAM_PIX_FMT_SGRBG16_planar)
    210     , _block_factor (4)
    211 {
    212     for(int i = 0; i < 65536; i++)
    213     {
    214         _map_hist[i] = i;
    215     }
    216 
    217     for(int i = 0; i < 4 * 4; i++)
    218     {
    219         _y_max[i] = 0.0f;
    220         _y_avg[i] = 0.0f;
    221     }
    222 }
    223 
    224 bool
    225 CLNewTonemappingImageHandler::set_tonemapping_kernel(SmartPtr<CLNewTonemappingImageKernel> &kernel)
    226 {
    227     SmartPtr<CLImageKernel> image_kernel = kernel;
    228     add_kernel (image_kernel);
    229     _tonemapping_kernel = kernel;
    230     return true;
    231 }
    232 
    233 XCamReturn
    234 CLNewTonemappingImageHandler::prepare_buffer_pool_video_info (
    235     const VideoBufferInfo &input,
    236     VideoBufferInfo &output)
    237 {
    238     bool format_inited = output.init (_output_format, input.width, input.height);
    239 
    240     XCAM_FAIL_RETURN (
    241         WARNING,
    242         format_inited,
    243         XCAM_RETURN_ERROR_PARAM,
    244         "CL image handler(%s) output format(%s) unsupported",
    245         get_name (), xcam_fourcc_to_string (_output_format));
    246 
    247     return XCAM_RETURN_NO_ERROR;
    248 }
    249 
    250 XCamReturn
    251 CLNewTonemappingImageHandler::prepare_parameters (
    252     SmartPtr<VideoBuffer> &input, SmartPtr<VideoBuffer> &output)
    253 {
    254     SmartPtr<CLContext> context = get_context ();
    255     const VideoBufferInfo &video_info = input->get_video_info ();
    256     CLArgList args;
    257     CLWorkSize work_size;
    258 
    259     XCAM_ASSERT (_tonemapping_kernel.ptr ());
    260 
    261     CLImageDesc desc;
    262     desc.format.image_channel_order = CL_RGBA;
    263     desc.format.image_channel_data_type = CL_UNORM_INT16;
    264     desc.width = video_info.aligned_width / 4;
    265     desc.height = video_info.aligned_height * 4;
    266     desc.row_pitch = video_info.strides[0];
    267     desc.array_size = 4;
    268     desc.slice_pitch = video_info.strides [0] * video_info.aligned_height;
    269 
    270     SmartPtr<CLImage> image_in = convert_to_climage (context, input, desc);
    271     SmartPtr<CLImage> image_out = convert_to_climage (context, output, desc);
    272     int image_width = video_info.aligned_width;
    273     int image_height = video_info.aligned_height;
    274 
    275     XCAM_FAIL_RETURN (
    276         WARNING,
    277         image_in->is_valid () && image_out->is_valid (),
    278         XCAM_RETURN_ERROR_MEM,
    279         "cl image handler(%s) in/out memory not available", XCAM_STR (get_name ()));
    280 
    281     SmartPtr<X3aStats> stats;
    282     SmartPtr<CLVideoBuffer> cl_buf = input.dynamic_cast_ptr<CLVideoBuffer> ();
    283     if (cl_buf.ptr ()) {
    284         stats = cl_buf->find_3a_stats ();
    285     }
    286 #if HAVE_LIBDRM
    287     else {
    288         SmartPtr<DrmBoBuffer> bo_buf = input.dynamic_cast_ptr<DrmBoBuffer> ();
    289         stats = bo_buf->find_3a_stats ();
    290     }
    291 #endif
    292     XCAM_FAIL_RETURN (
    293         ERROR, stats.ptr (), XCAM_RETURN_ERROR_MEM,
    294         "new tonemapping handler prepare_arguments find_3a_stats failed");
    295 
    296     XCam3AStats *stats_ptr = stats->get_stats ();
    297     XCAM_FAIL_RETURN (
    298         ERROR, stats_ptr, XCAM_RETURN_ERROR_MEM,
    299         "new tonemapping handler prepare_arguments get_stats failed");
    300 
    301     int block_factor = 4;
    302     int width_per_block = stats_ptr->info.width / block_factor;
    303     int height_per_block = stats_ptr->info.height / block_factor;
    304     int height_last_block = height_per_block + stats_ptr->info.height % block_factor;
    305     int hist_bin_count = 1 << stats_ptr->info.bit_depth;
    306 
    307     int *hist_per_block = (int *) xcam_malloc0 (hist_bin_count * sizeof (int));
    308     XCAM_ASSERT (hist_per_block);
    309 
    310     for(int block_row = 0; block_row < block_factor; block_row++)
    311     {
    312         for(int block_col = 0; block_col < block_factor; block_col++)
    313         {
    314             int block_start_index = (block_row * block_factor + block_col) * hist_bin_count;
    315             int start_index = block_row * height_per_block * stats_ptr->info.width + block_col * width_per_block;
    316 
    317             for(int i = 0; i < hist_bin_count; i++)
    318             {
    319                 hist_per_block[i] = 0;
    320             }
    321 
    322             if(block_row == block_factor - 1)
    323             {
    324                 height_per_block = height_last_block;
    325             }
    326 
    327             int block_totalnum = width_per_block * height_per_block;
    328             for(int i = 0; i < height_per_block; i++)
    329             {
    330                 for(int j = 0; j < width_per_block; j++)
    331                 {
    332                     int y = stats_ptr->stats[start_index + i * stats_ptr->info.width + j].avg_y;
    333                     hist_per_block[y]++;
    334                 }
    335             }
    336 
    337             block_split_haleq (hist_per_block, hist_bin_count, block_totalnum, block_start_index, _y_max, _y_avg, _map_hist);
    338         }
    339     }
    340 
    341     xcam_free (hist_per_block);
    342     hist_per_block = NULL;
    343 
    344     SmartPtr<CLBuffer> y_max_buffer = new CLBuffer(
    345         context, sizeof(float) * block_factor * block_factor,
    346         CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &_y_max);
    347 
    348     SmartPtr<CLBuffer> y_avg_buffer = new CLBuffer(
    349         context, sizeof(float) * block_factor * block_factor,
    350         CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &_y_avg);
    351 
    352     SmartPtr<CLBuffer> map_hist_buffer = new CLBuffer(
    353         context, sizeof(float) * hist_bin_count * block_factor * block_factor,
    354         CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &_map_hist);
    355 
    356     //set args;
    357     args.push_back (new CLMemArgument (image_in));
    358     args.push_back (new CLMemArgument (image_out));
    359     args.push_back (new CLMemArgument (y_max_buffer));
    360     args.push_back (new CLMemArgument (y_avg_buffer));
    361     args.push_back (new CLMemArgument (map_hist_buffer));
    362     args.push_back (new CLArgumentT<int> (image_width));
    363     args.push_back (new CLArgumentT<int> (image_height));
    364 
    365     const CLImageDesc out_info = image_out->get_image_desc ();
    366     work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
    367     work_size.global[0] = out_info.width;
    368     work_size.global[1] = out_info.height / 4;
    369     work_size.local[0] = 8;
    370     work_size.local[1] = 8;
    371 
    372     XCAM_ASSERT (_tonemapping_kernel.ptr ());
    373     XCamReturn ret = _tonemapping_kernel->set_arguments (args, work_size);
    374     XCAM_FAIL_RETURN (
    375         WARNING, ret == XCAM_RETURN_NO_ERROR, ret,
    376         "new tone mapping kernel set arguments failed.");
    377 
    378     return XCAM_RETURN_NO_ERROR;
    379 }
    380 
    381 
    382 SmartPtr<CLImageHandler>
    383 create_cl_newtonemapping_image_handler (const SmartPtr<CLContext> &context)
    384 {
    385     SmartPtr<CLNewTonemappingImageHandler> tonemapping_handler;
    386     SmartPtr<CLNewTonemappingImageKernel> tonemapping_kernel;
    387 
    388     tonemapping_kernel = new CLNewTonemappingImageKernel (context, "kernel_newtonemapping");
    389     XCAM_ASSERT (tonemapping_kernel.ptr ());
    390     XCAM_FAIL_RETURN (
    391         ERROR, tonemapping_kernel->build_kernel (kernel_tone_mapping_pipe_info, NULL) == XCAM_RETURN_NO_ERROR, NULL,
    392         "build new tonemapping kernel(%s) failed", kernel_tone_mapping_pipe_info.kernel_name);
    393 
    394     XCAM_ASSERT (tonemapping_kernel->is_valid ());
    395     tonemapping_handler = new CLNewTonemappingImageHandler(context, "cl_handler_newtonemapping");
    396     tonemapping_handler->set_tonemapping_kernel(tonemapping_kernel);
    397 
    398     return tonemapping_handler;
    399 }
    400 
    401 };
    402