Home | History | Annotate | Download | only in ocl
      1 /*
      2  * cl_newwavelet_denoise_handler.cpp - CL wavelet denoise handler
      3  *
      4  *  Copyright (c) 2015 Intel Corporation
      5  *
      6  * Licensed under the Apache License, Version 2.0 (the "License");
      7  * you may not use this file except in compliance with the License.
      8  * You may obtain a copy of the License at
      9  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  *
     18  * Author: Wei Zong <wei.zong (at) intel.com>
     19  */
     20 
     21 #include "cl_utils.h"
     22 #include "cl_context.h"
     23 #include "cl_device.h"
     24 #include "cl_newwavelet_denoise_handler.h"
     25 
     26 #define WAVELET_DECOMPOSITION_LEVELS 4
     27 
     28 namespace XCam {
     29 
     30 enum {
     31     KernelWaveletDecompose = 0,
     32     KernelWaveletReconstruct,
     33     KernelWaveletNoiseEstimate,
     34     KernelWaveletThreshold,
     35 };
     36 
     37 static const XCamKernelInfo kernel_new_wavelet_info[] = {
     38     {
     39         "kernel_wavelet_haar_decomposition",
     40 #include "kernel_wavelet_haar.clx"
     41         , 0,
     42     },
     43     {
     44         "kernel_wavelet_haar_reconstruction",
     45 #include "kernel_wavelet_haar.clx"
     46         , 0,
     47     },
     48     {
     49         "kernel_wavelet_coeff_variance",
     50 #include "kernel_wavelet_coeff.clx"
     51         , 0,
     52     },
     53     {
     54         "kernel_wavelet_coeff_thresholding",
     55 #include "kernel_wavelet_coeff.clx"
     56         , 0,
     57     },
     58 };
     59 
     60 
     61 CLWaveletNoiseEstimateKernel::CLWaveletNoiseEstimateKernel (
     62     const SmartPtr<CLContext> &context,
     63     const char *name,
     64     SmartPtr<CLNewWaveletDenoiseImageHandler> &handler,
     65     uint32_t channel,
     66     uint32_t subband,
     67     uint32_t layer)
     68     : CLImageKernel (context, name)
     69     , _decomposition_levels (WAVELET_DECOMPOSITION_LEVELS)
     70     , _channel (channel)
     71     , _subband (subband)
     72     , _current_layer (layer)
     73     , _analog_gain (-1.0)
     74     , _handler (handler)
     75 {
     76 }
     77 
     78 SmartPtr<CLImage>
     79 CLWaveletNoiseEstimateKernel::get_input_buffer ()
     80 {
     81     SmartPtr<VideoBuffer> input = _handler->get_input_buf ();
     82     const VideoBufferInfo & video_info = input->get_video_info ();
     83 
     84     SmartPtr<CLImage> image;
     85     SmartPtr<CLWaveletDecompBuffer> buffer = _handler->get_decomp_buffer (_channel, _current_layer);
     86     XCAM_ASSERT (buffer.ptr ());
     87 
     88     if (_subband == CL_WAVELET_SUBBAND_HL) {
     89         image = buffer->hl[0];
     90     } else if (_subband == CL_WAVELET_SUBBAND_LH) {
     91         image = buffer->lh[0];
     92     } else if (_subband == CL_WAVELET_SUBBAND_HH) {
     93         image = buffer->hh[0];
     94     } else {
     95         image = buffer->ll;
     96     }
     97 
     98     float current_ag = _handler->get_denoise_config ().analog_gain;
     99     if ((_analog_gain == -1.0f) ||
    100             (fabs(_analog_gain - current_ag) > 0.2)) {
    101 
    102         if ((_current_layer == 1) && (_subband == CL_WAVELET_SUBBAND_HH)) {
    103             _analog_gain = current_ag;
    104             estimate_noise_variance (video_info, buffer->hh[0], buffer->noise_variance);
    105             _handler->set_estimated_noise_variation (buffer->noise_variance);
    106         } else {
    107             _handler->get_estimated_noise_variation (buffer->noise_variance);
    108         }
    109     } else {
    110         _handler->get_estimated_noise_variation (buffer->noise_variance);
    111     }
    112     return image;
    113 }
    114 
    115 SmartPtr<CLImage>
    116 CLWaveletNoiseEstimateKernel::get_output_buffer ()
    117 {
    118     SmartPtr<CLImage> image;
    119     SmartPtr<CLWaveletDecompBuffer> buffer = _handler->get_decomp_buffer (_channel, _current_layer);
    120     XCAM_ASSERT (buffer.ptr ());
    121 
    122     if (_subband == CL_WAVELET_SUBBAND_HL) {
    123         image = buffer->hl[1];
    124     } else if (_subband == CL_WAVELET_SUBBAND_LH) {
    125         image = buffer->lh[1];
    126     } else if (_subband == CL_WAVELET_SUBBAND_HH) {
    127         image = buffer->hh[1];
    128     } else {
    129         image = buffer->ll;
    130     }
    131     return image;
    132 }
    133 
    134 XCamReturn
    135 CLWaveletNoiseEstimateKernel::prepare_arguments (
    136     CLArgList &args, CLWorkSize &work_size)
    137 {
    138     SmartPtr<CLContext> context = get_context ();
    139 
    140     SmartPtr<CLImage> image_in = get_input_buffer ();
    141     SmartPtr<CLImage> image_out = get_output_buffer ();
    142 
    143     CLImageDesc cl_desc = image_in->get_image_desc ();
    144     uint32_t cl_width = XCAM_ALIGN_UP (cl_desc.width, 2);
    145     uint32_t cl_height = XCAM_ALIGN_UP (cl_desc.height, 2);
    146 
    147     XCAM_FAIL_RETURN (
    148         WARNING,
    149         image_in->is_valid () && image_out->is_valid (),
    150         XCAM_RETURN_ERROR_MEM,
    151         "cl image kernel(%s) in/out memory not available", get_kernel_name ());
    152 
    153     //set args;
    154     args.push_back (new CLMemArgument (image_in));
    155     args.push_back (new CLMemArgument (image_out));
    156     args.push_back (new CLArgumentT<uint32_t> (_current_layer));
    157 
    158     work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
    159     work_size.local[0] = 8;
    160     work_size.local[1] = 8;
    161     work_size.global[0] = XCAM_ALIGN_UP (cl_width, work_size.local[0]);
    162     work_size.global[1] = XCAM_ALIGN_UP (cl_height, work_size.local[1]);
    163 
    164     return XCAM_RETURN_NO_ERROR;
    165 }
    166 
    167 XCamReturn
    168 CLWaveletNoiseEstimateKernel::estimate_noise_variance (const VideoBufferInfo & video_info, SmartPtr<CLImage> image, float* noise_var)
    169 {
    170     XCamReturn ret = XCAM_RETURN_NO_ERROR;
    171 
    172     SmartPtr<CLEvent> map_event = new CLEvent;
    173     void *buf_ptr = NULL;
    174 
    175     CLImageDesc cl_desc = image->get_image_desc ();
    176     uint32_t cl_width = XCAM_ALIGN_UP (cl_desc.width, 2);
    177     uint32_t cl_height = XCAM_ALIGN_UP (cl_desc.height, 2);
    178 
    179     uint32_t image_width = cl_width << 2;
    180     uint32_t image_height = cl_height;
    181 
    182     size_t origin[3] = {0, 0, 0};
    183     size_t row_pitch = cl_desc.row_pitch;
    184     size_t slice_pitch = 0;
    185     size_t region[3] = {cl_width, cl_height, 1};
    186 
    187     ret = image->enqueue_map (buf_ptr,
    188                               origin, region,
    189                               &row_pitch, &slice_pitch,
    190                               CL_MAP_READ,
    191                               CLEvent::EmptyList,
    192                               map_event);
    193     if (ret != XCAM_RETURN_NO_ERROR) {
    194         XCAM_LOG_ERROR ("wavelet noise variance buffer enqueue map failed");
    195     }
    196     XCAM_ASSERT (map_event->get_event_id ());
    197 
    198     ret = map_event->wait ();
    199     if (ret != XCAM_RETURN_NO_ERROR) {
    200         XCAM_LOG_ERROR ("wavelet noise variance buffer enqueue map event wait failed");
    201     }
    202 
    203     uint8_t* pixel = (uint8_t*)buf_ptr;
    204     uint32_t pixel_count = image_width * image_height;
    205     uint32_t pixel_sum = 0;
    206 
    207     uint32_t median_thresh = pixel_count >> 1;
    208     float median = 0;
    209     float noise_std_deviation = 0;
    210 
    211     uint32_t hist_bin_count = 1 << video_info.color_bits;
    212     uint32_t hist_y[128] = {0};
    213     uint32_t hist_u[128] = {0};
    214     uint32_t hist_v[128] = {0};
    215 
    216     if (_channel == CL_IMAGE_CHANNEL_Y) {
    217         for (uint32_t i = 0; i < image_width; i++) {
    218             for (uint32_t j = 0; j < image_height; j++) {
    219                 uint8_t base = (pixel[i + j * row_pitch] <= 127) ? 127 : 128;
    220                 hist_y[abs(pixel[i + j * row_pitch] - base)]++;
    221             }
    222         }
    223         pixel_sum = 0;
    224         median = 0;
    225         for (uint32_t i = 0; i < (hist_bin_count - 1); i++) {
    226             pixel_sum += hist_y[i];
    227             if (pixel_sum >= median_thresh) {
    228                 median = i;
    229                 break;
    230             }
    231         }
    232         noise_std_deviation = median / 0.6745;
    233         noise_var[0] = noise_std_deviation * noise_std_deviation;
    234     }
    235     if (_channel == CL_IMAGE_CHANNEL_UV) {
    236         for (uint32_t i = 0; i < (image_width / 2); i++) {
    237             for (uint32_t j = 0; j < image_height; j++) {
    238                 uint8_t base = (pixel[2 * i + j * row_pitch] <= 127) ? 127 : 128;
    239                 hist_u[abs(pixel[2 * i + j * row_pitch] - base)]++;
    240                 base = (pixel[2 * i + 1 + j * row_pitch] <= 127) ? 127 : 128;
    241                 hist_v[abs(pixel[2 * i + 1 + j * row_pitch] - base)]++;
    242             }
    243         }
    244         pixel_sum = 0;
    245         median = 0;
    246         for (uint32_t i = 0; i < (hist_bin_count - 1); i++) {
    247             pixel_sum += hist_u[i];
    248             if (pixel_sum >= median_thresh >> 1) {
    249                 median = i;
    250                 break;
    251             }
    252         }
    253         noise_std_deviation = median / 0.6745;
    254         noise_var[1] = noise_std_deviation * noise_std_deviation;
    255 
    256         pixel_sum = 0;
    257         median = 0;
    258         for (uint32_t i = 0; i < (hist_bin_count - 1); i++) {
    259             pixel_sum += hist_v[i];
    260             if (pixel_sum >= median_thresh >> 1) {
    261                 median = i;
    262                 break;
    263             }
    264         }
    265         noise_std_deviation = median / 0.6745;
    266         noise_var[2] = noise_std_deviation * noise_std_deviation;
    267     }
    268 
    269     map_event.release ();
    270 
    271     SmartPtr<CLEvent> unmap_event = new CLEvent;
    272     ret = image->enqueue_unmap (buf_ptr, CLEvent::EmptyList, unmap_event);
    273     if (ret != XCAM_RETURN_NO_ERROR) {
    274         XCAM_LOG_ERROR ("wavelet noise variance buffer enqueue unmap failed");
    275     }
    276     XCAM_ASSERT (unmap_event->get_event_id ());
    277 
    278     ret = unmap_event->wait ();
    279     if (ret != XCAM_RETURN_NO_ERROR) {
    280         XCAM_LOG_ERROR ("wavelet noise variance buffer enqueue unmap event wait failed");
    281     }
    282     unmap_event.release ();
    283 
    284     return ret;
    285 }
    286 
    287 CLWaveletThresholdingKernel::CLWaveletThresholdingKernel (
    288     const SmartPtr<CLContext> &context,
    289     const char *name,
    290     SmartPtr<CLNewWaveletDenoiseImageHandler> &handler,
    291     uint32_t channel,
    292     uint32_t layer)
    293     : CLImageKernel (context, name, true)
    294     , _decomposition_levels (WAVELET_DECOMPOSITION_LEVELS)
    295     , _channel (channel)
    296     , _current_layer (layer)
    297     , _handler (handler)
    298 {
    299 }
    300 
    301 XCamReturn
    302 CLWaveletThresholdingKernel::prepare_arguments (
    303     CLArgList &args, CLWorkSize &work_size)
    304 {
    305     SmartPtr<CLContext> context = get_context ();
    306     float noise_variance[2];
    307 
    308     xcam_mem_clear (noise_variance);
    309     _decomposition_levels = WAVELET_DECOMPOSITION_LEVELS;
    310     float soft_threshold = _handler->get_denoise_config ().threshold[0];
    311     float hard_threshold = _handler->get_denoise_config ().threshold[1];
    312     float anolog_gain_weight = 1.0 + 100 *  _handler->get_denoise_config ().analog_gain;
    313 
    314     SmartPtr<CLWaveletDecompBuffer> buffer;
    315     buffer = _handler->get_decomp_buffer (_channel, _current_layer);
    316 
    317     CLImageDesc cl_desc = buffer->ll->get_image_desc ();
    318 
    319     float weight = 4;
    320     if (_channel == CL_IMAGE_CHANNEL_Y) {
    321         noise_variance[0] = buffer->noise_variance[0] * weight;
    322         noise_variance[1] = buffer->noise_variance[0] * weight;
    323     } else {
    324         noise_variance[0] = buffer->noise_variance[1] * weight;
    325         noise_variance[1] = buffer->noise_variance[2] * weight;
    326     }
    327 #if 0
    328     {
    329         SmartPtr<CLImage> save_image = buffer->hh[0];
    330         _handler->dump_coeff (save_image, _channel, _current_layer, CL_WAVELET_SUBBAND_HH);
    331     }
    332 #endif
    333     if (_channel == CL_IMAGE_CHANNEL_Y) {
    334         args.push_back (new CLArgumentT<float> (noise_variance[0]));
    335         args.push_back (new CLArgumentT<float> (noise_variance[0]));
    336     } else {
    337         args.push_back (new CLArgumentT<float> (noise_variance[0]));
    338         args.push_back (new CLArgumentT<float> (noise_variance[1]));
    339     }
    340 
    341     args.push_back (new CLMemArgument (buffer->hl[0]));
    342     args.push_back (new CLMemArgument (buffer->hl[1]));
    343     args.push_back (new CLMemArgument (buffer->hl[2]));
    344 
    345     args.push_back (new CLMemArgument (buffer->lh[0]));
    346     args.push_back (new CLMemArgument (buffer->lh[1]));
    347     args.push_back (new CLMemArgument (buffer->lh[2]));
    348 
    349     args.push_back (new CLMemArgument (buffer->hh[0]));
    350     args.push_back (new CLMemArgument (buffer->hh[1]));
    351     args.push_back (new CLMemArgument (buffer->hh[2]));
    352 
    353     args.push_back (new CLArgumentT<uint32_t> (_current_layer));
    354     args.push_back (new CLArgumentT<uint32_t> (_decomposition_levels));
    355     args.push_back (new CLArgumentT<float> (hard_threshold));
    356     args.push_back (new CLArgumentT<float> (soft_threshold));
    357     args.push_back (new CLArgumentT<float> (anolog_gain_weight));
    358 
    359     uint32_t cl_width = XCAM_ALIGN_UP (cl_desc.width, 2);
    360     uint32_t cl_height = XCAM_ALIGN_UP (cl_desc.height, 2);
    361 
    362     //set args;
    363     work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
    364     work_size.local[0] = 8;
    365     work_size.local[1] = 4;
    366     work_size.global[0] = XCAM_ALIGN_UP (cl_width , work_size.local[0]);
    367     work_size.global[1] = XCAM_ALIGN_UP (cl_height, work_size.local[1]);
    368 
    369     return XCAM_RETURN_NO_ERROR;
    370 }
    371 
    372 CLWaveletTransformKernel::CLWaveletTransformKernel (
    373     const SmartPtr<CLContext> &context,
    374     const char *name,
    375     SmartPtr<CLNewWaveletDenoiseImageHandler> &handler,
    376     CLWaveletFilterBank fb,
    377     uint32_t channel,
    378     uint32_t layer,
    379     bool bayes_shrink)
    380     : CLImageKernel (context, name, true)
    381     , _filter_bank (fb)
    382     , _decomposition_levels (WAVELET_DECOMPOSITION_LEVELS)
    383     , _channel (channel)
    384     , _current_layer (layer)
    385     , _bayes_shrink (bayes_shrink)
    386     , _handler (handler)
    387 {
    388 }
    389 
    390 XCamReturn
    391 CLWaveletTransformKernel::prepare_arguments (
    392     CLArgList &args, CLWorkSize &work_size)
    393 {
    394     SmartPtr<VideoBuffer> input = _handler->get_input_buf ();
    395     SmartPtr<VideoBuffer> output = _handler->get_output_buf ();
    396     SmartPtr<CLContext> context = get_context ();
    397 
    398     const VideoBufferInfo & video_info_in = input->get_video_info ();
    399     const VideoBufferInfo & video_info_out = output->get_video_info ();
    400 
    401     _decomposition_levels = WAVELET_DECOMPOSITION_LEVELS;
    402     float soft_threshold = _handler->get_denoise_config ().threshold[0];
    403     float hard_threshold = _handler->get_denoise_config ().threshold[1];
    404 
    405     CLImageDesc cl_desc_in, cl_desc_out;
    406     cl_desc_in.format.image_channel_data_type = CL_UNORM_INT8;
    407     cl_desc_in.format.image_channel_order = CL_RGBA;
    408     cl_desc_in.width = XCAM_ALIGN_UP (video_info_in.width, 4) / 4;
    409     cl_desc_in.height = video_info_in.height;
    410     cl_desc_in.row_pitch = video_info_in.strides[0];
    411 
    412     cl_desc_out.format.image_channel_data_type = CL_UNORM_INT8;
    413     cl_desc_out.format.image_channel_order = CL_RGBA;
    414     cl_desc_out.width = XCAM_ALIGN_UP (video_info_out.width, 4) / 4;
    415     cl_desc_out.height = video_info_out.height;
    416     cl_desc_out.row_pitch = video_info_out.strides[0];
    417 
    418     SmartPtr<CLImage> image_in = convert_to_climage (context, input, cl_desc_in, video_info_in.offsets[0]);
    419     SmartPtr<CLImage> image_out = convert_to_climage (context, output, cl_desc_out, video_info_out.offsets[0]);
    420 
    421     cl_desc_in.height = XCAM_ALIGN_UP (video_info_in.height, 2) / 2;
    422     cl_desc_in.row_pitch = video_info_in.strides[1];
    423 
    424     cl_desc_out.height = XCAM_ALIGN_UP (video_info_out.height, 2) / 2;
    425     cl_desc_out.row_pitch = video_info_out.strides[1];
    426 
    427     SmartPtr<CLImage> image_in_uv = convert_to_climage (context, input, cl_desc_in, video_info_in.offsets[1]);
    428     SmartPtr<CLImage> image_out_uv = convert_to_climage (context, output, cl_desc_out, video_info_out.offsets[1]);
    429 
    430     XCAM_FAIL_RETURN (
    431         WARNING,
    432         image_in->is_valid () && image_in_uv->is_valid () &&
    433         image_out->is_valid () && image_out_uv->is_valid(),
    434         XCAM_RETURN_ERROR_MEM,
    435         "cl image kernel(%s) in/out memory not available", get_kernel_name ());
    436 
    437     //set args;
    438     work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
    439     work_size.local[0] = 8;
    440     work_size.local[1] = 4;
    441     if (_channel == CL_IMAGE_CHANNEL_Y) {
    442         work_size.global[0] = XCAM_ALIGN_UP ((video_info_in.width >> _current_layer) / 4 , work_size.local[0]);
    443         work_size.global[1] = XCAM_ALIGN_UP (video_info_in.height >> _current_layer, work_size.local[1]);
    444     } else if (_channel == CL_IMAGE_CHANNEL_UV) {
    445         work_size.global[0] = XCAM_ALIGN_UP ((video_info_in.width >> _current_layer) / 4 , work_size.local[0]);
    446         work_size.global[1] = XCAM_ALIGN_UP (video_info_in.height >> (_current_layer + 1), work_size.local[1]);
    447     }
    448 
    449     SmartPtr<CLWaveletDecompBuffer> buffer;
    450     if (_current_layer == 1) {
    451         if (_filter_bank == CL_WAVELET_HAAR_ANALYSIS) {
    452             if (_channel == CL_IMAGE_CHANNEL_Y) {
    453                 args.push_back (new CLMemArgument (image_in));
    454             } else if (_channel == CL_IMAGE_CHANNEL_UV) {
    455                 args.push_back (new CLMemArgument (image_in_uv));
    456             }
    457         } else if (_filter_bank == CL_WAVELET_HAAR_SYNTHESIS) {
    458             if (_channel == CL_IMAGE_CHANNEL_Y) {
    459                 args.push_back (new CLMemArgument (image_out));
    460             } else if (_channel == CL_IMAGE_CHANNEL_UV) {
    461                 args.push_back (new CLMemArgument (image_out_uv));
    462             }
    463         }
    464     } else {
    465         buffer = get_decomp_buffer (_channel, _current_layer - 1);
    466         args.push_back (new CLMemArgument (buffer->ll));
    467     }
    468 
    469     buffer = get_decomp_buffer (_channel, _current_layer);
    470     args.push_back (new CLMemArgument (buffer->ll));
    471 
    472     if (_bayes_shrink == true) {
    473         if (_filter_bank == CL_WAVELET_HAAR_ANALYSIS) {
    474             args.push_back (new CLMemArgument (buffer->hl[0]));
    475             args.push_back (new CLMemArgument (buffer->lh[0]));
    476             args.push_back (new CLMemArgument (buffer->hh[0]));
    477         } else if (_filter_bank == CL_WAVELET_HAAR_SYNTHESIS) {
    478             args.push_back (new CLMemArgument (buffer->hl[2]));
    479             args.push_back (new CLMemArgument (buffer->lh[2]));
    480             args.push_back (new CLMemArgument (buffer->hh[2]));
    481         }
    482     } else {
    483         args.push_back (new CLMemArgument (buffer->hl[0]));
    484         args.push_back (new CLMemArgument (buffer->lh[0]));
    485         args.push_back (new CLMemArgument (buffer->hh[0]));
    486     }
    487 
    488     args.push_back (new CLArgumentT<uint32_t> (_current_layer));
    489     args.push_back (new CLArgumentT<uint32_t> (_decomposition_levels));
    490     args.push_back (new CLArgumentT<float> (hard_threshold));
    491     args.push_back (new CLArgumentT<float> (soft_threshold));
    492 
    493     return XCAM_RETURN_NO_ERROR;
    494 }
    495 
    496 SmartPtr<CLWaveletDecompBuffer>
    497 CLWaveletTransformKernel::get_decomp_buffer (uint32_t channel, int layer)
    498 {
    499     SmartPtr<CLWaveletDecompBuffer> buffer;
    500     if (_handler.ptr ()) {
    501         buffer = _handler->get_decomp_buffer (channel, layer);
    502     }
    503 
    504     if (!buffer.ptr ()) {
    505         XCAM_LOG_ERROR ("get channel(%d) layer(%d) decomposition buffer failed!", channel, layer);
    506     }
    507     XCAM_ASSERT (buffer.ptr ());
    508     return buffer;
    509 }
    510 
    511 CLNewWaveletDenoiseImageHandler::CLNewWaveletDenoiseImageHandler (
    512     const SmartPtr<CLContext> &context, const char *name, uint32_t channel)
    513     : CLImageHandler (context, name)
    514     , _channel (channel)
    515 {
    516     _config.decomposition_levels = 5;
    517     _config.threshold[0] = 0.5;
    518     _config.threshold[1] = 5.0;
    519     xcam_mem_clear (_noise_variance);
    520 }
    521 
    522 XCamReturn
    523 CLNewWaveletDenoiseImageHandler::prepare_output_buf (SmartPtr<VideoBuffer> &input, SmartPtr<VideoBuffer> &output)
    524 {
    525     XCamReturn ret = XCAM_RETURN_NO_ERROR;
    526     CLImageHandler::prepare_output_buf(input, output);
    527 
    528     SmartPtr<CLContext> context = get_context ();
    529     const VideoBufferInfo & video_info = input->get_video_info ();
    530     CLImageDesc cl_desc;
    531     SmartPtr<CLWaveletDecompBuffer> decompBuffer;
    532 
    533     CLImage::video_info_2_cl_image_desc (video_info, cl_desc);
    534 
    535     _decompBufferList.clear ();
    536 
    537     if (_channel & CL_IMAGE_CHANNEL_Y) {
    538         for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
    539             decompBuffer = new CLWaveletDecompBuffer ();
    540             if (decompBuffer.ptr ()) {
    541                 decompBuffer->width = XCAM_ALIGN_UP (video_info.width, 1 << layer) >> layer;
    542                 decompBuffer->height = XCAM_ALIGN_UP (video_info.height, 1 << layer) >> layer;
    543                 decompBuffer->width = XCAM_ALIGN_UP (decompBuffer->width, 4);
    544                 decompBuffer->height = XCAM_ALIGN_UP (decompBuffer->height, 2);
    545 
    546                 decompBuffer->channel = CL_IMAGE_CHANNEL_Y;
    547                 decompBuffer->layer = layer;
    548                 decompBuffer->noise_variance[0] = 0;
    549 
    550                 cl_desc.width = decompBuffer->width / 4;
    551                 cl_desc.height = decompBuffer->height;
    552                 cl_desc.slice_pitch = 0;
    553                 cl_desc.format.image_channel_order = CL_RGBA;
    554                 cl_desc.format.image_channel_data_type = CL_UNORM_INT8;
    555 
    556                 decompBuffer->ll = new CLImage2D (context, cl_desc);
    557 
    558                 decompBuffer->hl[0] = new CLImage2D (context, cl_desc);
    559                 decompBuffer->lh[0] = new CLImage2D (context, cl_desc);
    560                 decompBuffer->hh[0] = new CLImage2D (context, cl_desc);
    561                 /*
    562                                 uint32_t width = decompBuffer->width / 4;
    563                                 uint32_t height = decompBuffer->height;
    564                                 SmartPtr<CLBuffer> hh_buffer = new CLBuffer (
    565                                     context, sizeof(uint8_t) * width * height,
    566                                     CL_MEM_READ_WRITE, NULL);
    567                                 CLImageDesc hh_desc;
    568                                 hh_desc.format = {CL_RGBA, CL_UNORM_INT8};
    569                                 hh_desc.width = width;
    570                                 hh_desc.height = height;
    571                                 hh_desc.row_pitch = sizeof(uint8_t) * width;
    572                                 hh_desc.slice_pitch = 0;
    573                                 hh_desc.size = 0;
    574                                 hh_desc.array_size = 0;
    575 
    576                                 decompBuffer->hh[0] = new CLImage2D (
    577                                     context, hh_desc, 0, hh_buffer);
    578                 */
    579 
    580                 cl_desc.format.image_channel_data_type = CL_UNORM_INT16;
    581                 decompBuffer->hl[1] = new CLImage2D (context, cl_desc);
    582                 decompBuffer->lh[1] = new CLImage2D (context, cl_desc);
    583                 decompBuffer->hh[1] = new CLImage2D (context, cl_desc);
    584 
    585                 cl_desc.format.image_channel_data_type = CL_UNORM_INT8;
    586                 decompBuffer->hl[2] = new CLImage2D (context, cl_desc);
    587                 decompBuffer->lh[2] = new CLImage2D (context, cl_desc);
    588                 decompBuffer->hh[2] = new CLImage2D (context, cl_desc);
    589 
    590                 _decompBufferList.push_back (decompBuffer);
    591             } else {
    592                 XCAM_LOG_ERROR ("create Y decomposition buffer failed!");
    593                 ret = XCAM_RETURN_ERROR_MEM;
    594             }
    595         }
    596     }
    597 
    598     if (_channel & CL_IMAGE_CHANNEL_UV) {
    599         for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
    600             decompBuffer = new CLWaveletDecompBuffer ();
    601             if (decompBuffer.ptr ()) {
    602                 decompBuffer->width = XCAM_ALIGN_UP (video_info.width, 1 << layer) >> layer;
    603                 decompBuffer->height = XCAM_ALIGN_UP (video_info.height, 1 << (layer + 1)) >> (layer + 1);
    604                 decompBuffer->width = XCAM_ALIGN_UP (decompBuffer->width, 4);
    605                 decompBuffer->height = XCAM_ALIGN_UP (decompBuffer->height, 2);
    606 
    607                 decompBuffer->channel = CL_IMAGE_CHANNEL_UV;
    608                 decompBuffer->layer = layer;
    609                 decompBuffer->noise_variance[1] = 0;
    610                 decompBuffer->noise_variance[2] = 0;
    611 
    612                 cl_desc.width = decompBuffer->width / 4;
    613                 cl_desc.height = decompBuffer->height;
    614                 cl_desc.slice_pitch = 0;
    615                 cl_desc.format.image_channel_order = CL_RGBA;
    616                 cl_desc.format.image_channel_data_type = CL_UNORM_INT8;
    617 
    618                 decompBuffer->ll = new CLImage2D (context, cl_desc);
    619 
    620                 decompBuffer->hl[0] = new CLImage2D (context, cl_desc);
    621                 decompBuffer->lh[0] = new CLImage2D (context, cl_desc);
    622                 decompBuffer->hh[0] = new CLImage2D (context, cl_desc);
    623                 /*
    624                                 uint32_t width = decompBuffer->width / 4;
    625                                 uint32_t height = decompBuffer->height;
    626                                 SmartPtr<CLBuffer> hh_buffer = new CLBuffer (
    627                                     context, sizeof(uint8_t) * width * height,
    628                                     CL_MEM_READ_WRITE, NULL);
    629                                 CLImageDesc hh_desc;
    630                                 hh_desc.format = {CL_RGBA, CL_UNORM_INT8};
    631                                 hh_desc.width = width;
    632                                 hh_desc.height = height;
    633                                 hh_desc.row_pitch = sizeof(uint8_t) * width;
    634                                 hh_desc.slice_pitch = 0;
    635                                 hh_desc.size = 0;
    636                                 hh_desc.array_size = 0;
    637                                 decompBuffer->hh[0] = new CLImage2D (
    638                                     context, hh_desc, 0, hh_buffer);
    639                 */
    640                 cl_desc.format.image_channel_data_type = CL_UNORM_INT16;
    641                 decompBuffer->hl[1] = new CLImage2D (context, cl_desc);
    642                 decompBuffer->lh[1] = new CLImage2D (context, cl_desc);
    643                 decompBuffer->hh[1] = new CLImage2D (context, cl_desc);
    644 
    645                 cl_desc.format.image_channel_data_type = CL_UNORM_INT8;
    646                 decompBuffer->hl[2] = new CLImage2D (context, cl_desc);
    647                 decompBuffer->lh[2] = new CLImage2D (context, cl_desc);
    648                 decompBuffer->hh[2] = new CLImage2D (context, cl_desc);
    649 
    650                 _decompBufferList.push_back (decompBuffer);
    651             } else {
    652                 XCAM_LOG_ERROR ("create UV decomposition buffer failed!");
    653                 ret = XCAM_RETURN_ERROR_MEM;
    654             }
    655         }
    656     }
    657     return ret;
    658 }
    659 
    660 bool
    661 CLNewWaveletDenoiseImageHandler::set_denoise_config (const XCam3aResultWaveletNoiseReduction& config)
    662 {
    663     _config = config;
    664 
    665     return true;
    666 }
    667 
    668 SmartPtr<CLWaveletDecompBuffer>
    669 CLNewWaveletDenoiseImageHandler::get_decomp_buffer (uint32_t channel, int layer)
    670 {
    671     SmartPtr<CLWaveletDecompBuffer> buffer;
    672 
    673     for (CLWaveletDecompBufferList::iterator it = _decompBufferList.begin ();
    674             it != _decompBufferList.end (); ++it) {
    675         if ((channel == (*it)->channel) && (layer == (*it)->layer))
    676             buffer = (*it);
    677     }
    678     return buffer;
    679 }
    680 
    681 void
    682 CLNewWaveletDenoiseImageHandler::set_estimated_noise_variation (float* noise_var)
    683 {
    684     if (noise_var == NULL) {
    685         XCAM_LOG_ERROR ("invalid input noise variation!");
    686         return;
    687     }
    688     _noise_variance[0] = noise_var[0];
    689     _noise_variance[1] = noise_var[1];
    690     _noise_variance[2] = noise_var[2];
    691 }
    692 
    693 void
    694 CLNewWaveletDenoiseImageHandler::get_estimated_noise_variation (float* noise_var)
    695 {
    696     if (noise_var == NULL) {
    697         XCAM_LOG_ERROR ("invalid output parameters!");
    698         return;
    699     }
    700     noise_var[0] = _noise_variance[0];
    701     noise_var[1] = _noise_variance[1];
    702     noise_var[2] = _noise_variance[2];
    703 }
    704 
    705 void
    706 CLNewWaveletDenoiseImageHandler::dump_coeff (SmartPtr<CLImage> image, uint32_t channel, uint32_t layer, uint32_t subband)
    707 {
    708     FILE *file;
    709 
    710     void *buf_ptr = NULL;
    711     SmartPtr<CLEvent> map_event = new CLEvent;
    712 
    713     CLImageDesc cl_desc = image->get_image_desc ();
    714 
    715     uint32_t cl_width = XCAM_ALIGN_UP (cl_desc.width, 2);
    716     uint32_t cl_height = XCAM_ALIGN_UP (cl_desc.height, 2);
    717 
    718     size_t origin[3] = {0, 0, 0};
    719     size_t row_pitch = cl_desc.row_pitch;
    720     size_t slice_pitch = 0;
    721     size_t region[3] = {cl_width, cl_height, 1};
    722 
    723     image->enqueue_map (buf_ptr,
    724                         origin, region,
    725                         &row_pitch, &slice_pitch,
    726                         CL_MAP_READ,
    727                         CLEvent::EmptyList,
    728                         map_event);
    729     XCAM_ASSERT (map_event->get_event_id ());
    730 
    731     map_event->wait ();
    732 
    733     uint8_t* pixel = (uint8_t*)buf_ptr;
    734     uint32_t pixel_count = row_pitch * cl_height;
    735 
    736     char file_name[512];
    737     snprintf (file_name, sizeof(file_name),
    738               "wavelet_cl_coeff_"
    739               "channel%d_"
    740               "layer%d_"
    741               "subband%d_"
    742               "rowpitch%d_"
    743               "width%dxheight%d"
    744               ".raw",
    745               channel, layer, subband, (uint32_t)row_pitch, cl_width, cl_height);
    746     file = fopen(file_name, "wb");
    747 
    748     if (file != NULL) {
    749         if (fwrite (pixel, pixel_count, 1, file) <= 0) {
    750             XCAM_LOG_WARNING ("write frame failed.");
    751         }
    752         fclose (file);
    753     }
    754     map_event.release ();
    755 
    756     SmartPtr<CLEvent> unmap_event = new CLEvent;
    757     image->enqueue_unmap (buf_ptr, CLEvent::EmptyList, unmap_event);
    758     XCAM_ASSERT (unmap_event->get_event_id ());
    759 
    760     unmap_event->wait ();
    761     unmap_event.release ();
    762 }
    763 
    764 static SmartPtr<CLWaveletTransformKernel>
    765 create_kernel_haar_decomposition (
    766     const SmartPtr<CLContext> &context,
    767     SmartPtr<CLNewWaveletDenoiseImageHandler> handler,
    768     uint32_t channel,
    769     uint32_t layer,
    770     bool bayes_shrink)
    771 {
    772     SmartPtr<CLWaveletTransformKernel> haar_decomp_kernel;
    773 
    774     char build_options[1024];
    775     xcam_mem_clear (build_options);
    776 
    777     snprintf (build_options, sizeof (build_options),
    778               " -DWAVELET_DENOISE_Y=%d "
    779               " -DWAVELET_DENOISE_UV=%d ",
    780               (channel == CL_IMAGE_CHANNEL_Y ? 1 : 0),
    781               (channel == CL_IMAGE_CHANNEL_UV ? 1 : 0));
    782 
    783     haar_decomp_kernel = new CLWaveletTransformKernel (context, "kernel_wavelet_haar_decomposition",
    784             handler, CL_WAVELET_HAAR_ANALYSIS, channel, layer, bayes_shrink);
    785 
    786     XCAM_ASSERT (haar_decomp_kernel.ptr ());
    787     XCAM_FAIL_RETURN (
    788         WARNING,
    789         haar_decomp_kernel->build_kernel (kernel_new_wavelet_info[KernelWaveletDecompose], build_options) == XCAM_RETURN_NO_ERROR,
    790         NULL,
    791         "wavelet denoise build kernel(%s) failed", kernel_new_wavelet_info[KernelWaveletDecompose].kernel_name);
    792     XCAM_ASSERT (haar_decomp_kernel->is_valid ());
    793 
    794     return haar_decomp_kernel;
    795 }
    796 
    797 static SmartPtr<CLWaveletTransformKernel>
    798 create_kernel_haar_reconstruction (
    799     const SmartPtr<CLContext> &context,
    800     SmartPtr<CLNewWaveletDenoiseImageHandler> handler,
    801     uint32_t channel,
    802     uint32_t layer,
    803     bool bayes_shrink)
    804 {
    805     SmartPtr<CLWaveletTransformKernel> haar_reconstruction_kernel;
    806 
    807     char build_options[1024];
    808     xcam_mem_clear (build_options);
    809     snprintf (build_options, sizeof (build_options),
    810               " -DWAVELET_DENOISE_Y=%d "
    811               " -DWAVELET_DENOISE_UV=%d "
    812               " -DWAVELET_BAYES_SHRINK=%d",
    813               (channel == CL_IMAGE_CHANNEL_Y ? 1 : 0),
    814               (channel == CL_IMAGE_CHANNEL_UV ? 1 : 0),
    815               (bayes_shrink == true ? 1 : 0));
    816 
    817     haar_reconstruction_kernel = new CLWaveletTransformKernel (context, "kernel_wavelet_haar_reconstruction",
    818             handler, CL_WAVELET_HAAR_SYNTHESIS, channel, layer, bayes_shrink);
    819 
    820     XCAM_ASSERT (haar_reconstruction_kernel.ptr ());
    821     XCAM_FAIL_RETURN (
    822         WARNING,
    823         haar_reconstruction_kernel->build_kernel (kernel_new_wavelet_info[KernelWaveletReconstruct], build_options) == XCAM_RETURN_NO_ERROR,
    824         NULL,
    825         "wavelet denoise build kernel(%s) failed", kernel_new_wavelet_info[KernelWaveletReconstruct].kernel_name);
    826     XCAM_ASSERT (haar_reconstruction_kernel->is_valid ());
    827 
    828     return haar_reconstruction_kernel;
    829 }
    830 
    831 static SmartPtr<CLWaveletNoiseEstimateKernel>
    832 create_kernel_noise_estimation (
    833     const SmartPtr<CLContext> &context,
    834     SmartPtr<CLNewWaveletDenoiseImageHandler> handler,
    835     uint32_t channel, uint32_t subband, uint32_t layer)
    836 {
    837     SmartPtr<CLWaveletNoiseEstimateKernel> estimation_kernel;
    838 
    839     char build_options[1024];
    840     xcam_mem_clear (build_options);
    841 
    842     snprintf (build_options, sizeof (build_options),
    843               " -DWAVELET_DENOISE_Y=%d "
    844               " -DWAVELET_DENOISE_UV=%d ",
    845               (channel == CL_IMAGE_CHANNEL_Y ? 1 : 0),
    846               (channel == CL_IMAGE_CHANNEL_UV ? 1 : 0));
    847 
    848     estimation_kernel = new CLWaveletNoiseEstimateKernel (
    849         context, "kernel_wavelet_coeff_variance", handler, channel, subband, layer);
    850     XCAM_ASSERT (estimation_kernel.ptr ());
    851     XCAM_FAIL_RETURN (
    852         WARNING,
    853         estimation_kernel->build_kernel (kernel_new_wavelet_info[KernelWaveletNoiseEstimate], build_options) == XCAM_RETURN_NO_ERROR,
    854         NULL,
    855         "wavelet denoise build kernel(%s) failed", kernel_new_wavelet_info[KernelWaveletNoiseEstimate].kernel_name);
    856     XCAM_ASSERT (estimation_kernel->is_valid ());
    857 
    858     return estimation_kernel;
    859 }
    860 
    861 static SmartPtr<CLWaveletThresholdingKernel>
    862 create_kernel_thresholding (
    863     const SmartPtr<CLContext> &context,
    864     SmartPtr<CLNewWaveletDenoiseImageHandler> handler,
    865     uint32_t channel, uint32_t layer)
    866 {
    867     SmartPtr<CLWaveletThresholdingKernel> threshold_kernel;
    868 
    869     char build_options[1024];
    870     xcam_mem_clear (build_options);
    871 
    872     snprintf (build_options, sizeof (build_options),
    873               " -DWAVELET_DENOISE_Y=%d "
    874               " -DWAVELET_DENOISE_UV=%d ",
    875               (channel == CL_IMAGE_CHANNEL_Y ? 1 : 0),
    876               (channel == CL_IMAGE_CHANNEL_UV ? 1 : 0));
    877 
    878     threshold_kernel = new CLWaveletThresholdingKernel (context,
    879             "kernel_wavelet_coeff_thresholding",
    880             handler, channel, layer);
    881     XCAM_ASSERT (threshold_kernel.ptr ());
    882     XCAM_FAIL_RETURN (
    883         WARNING,
    884         threshold_kernel->build_kernel (kernel_new_wavelet_info[KernelWaveletThreshold], build_options) == XCAM_RETURN_NO_ERROR,
    885         NULL,
    886         "wavelet denoise build kernel(%s) failed", kernel_new_wavelet_info[KernelWaveletThreshold].kernel_name);
    887     XCAM_ASSERT (threshold_kernel->is_valid ());
    888 
    889     return threshold_kernel;
    890 }
    891 
    892 SmartPtr<CLImageHandler>
    893 create_cl_newwavelet_denoise_image_handler (
    894     const SmartPtr<CLContext> &context, uint32_t channel, bool bayes_shrink)
    895 {
    896     SmartPtr<CLNewWaveletDenoiseImageHandler> wavelet_handler;
    897     SmartPtr<CLWaveletTransformKernel> haar_decomposition_kernel;
    898     SmartPtr<CLWaveletTransformKernel> haar_reconstruction_kernel;
    899 
    900     wavelet_handler = new CLNewWaveletDenoiseImageHandler (context, "cl_newwavelet_denoise_handler", channel);
    901     XCAM_ASSERT (wavelet_handler.ptr ());
    902 
    903     if (channel & CL_IMAGE_CHANNEL_Y) {
    904         for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
    905             SmartPtr<CLImageKernel> image_kernel =
    906                 create_kernel_haar_decomposition (context, wavelet_handler, CL_IMAGE_CHANNEL_Y, layer, bayes_shrink);
    907             wavelet_handler->add_kernel (image_kernel);
    908         }
    909 
    910         if (bayes_shrink) {
    911             for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
    912                 SmartPtr<CLImageKernel> image_kernel;
    913 
    914                 image_kernel = create_kernel_noise_estimation (context, wavelet_handler,
    915                                CL_IMAGE_CHANNEL_Y, CL_WAVELET_SUBBAND_HH, layer);
    916                 wavelet_handler->add_kernel (image_kernel);
    917 
    918                 image_kernel = create_kernel_noise_estimation (context, wavelet_handler,
    919                                CL_IMAGE_CHANNEL_Y, CL_WAVELET_SUBBAND_LH, layer);
    920                 wavelet_handler->add_kernel (image_kernel);
    921 
    922                 image_kernel = create_kernel_noise_estimation (context, wavelet_handler,
    923                                CL_IMAGE_CHANNEL_Y, CL_WAVELET_SUBBAND_HL, layer);
    924                 wavelet_handler->add_kernel (image_kernel);
    925             }
    926             for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
    927                 SmartPtr<CLImageKernel> image_kernel;
    928                 image_kernel = create_kernel_thresholding (context, wavelet_handler, CL_IMAGE_CHANNEL_Y, layer);
    929                 wavelet_handler->add_kernel (image_kernel);
    930             }
    931         }
    932 
    933         for (int layer = WAVELET_DECOMPOSITION_LEVELS; layer >= 1; layer--) {
    934             SmartPtr<CLImageKernel> image_kernel =
    935                 create_kernel_haar_reconstruction (context, wavelet_handler, CL_IMAGE_CHANNEL_Y, layer, bayes_shrink);
    936             wavelet_handler->add_kernel (image_kernel);
    937         }
    938     }
    939 
    940     if (channel & CL_IMAGE_CHANNEL_UV) {
    941         for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
    942             SmartPtr<CLImageKernel> image_kernel =
    943                 create_kernel_haar_decomposition (context, wavelet_handler, CL_IMAGE_CHANNEL_UV, layer, bayes_shrink);
    944             wavelet_handler->add_kernel (image_kernel);
    945         }
    946 
    947         if (bayes_shrink) {
    948             for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
    949                 SmartPtr<CLImageKernel> image_kernel;
    950 
    951                 image_kernel = create_kernel_noise_estimation (context, wavelet_handler,
    952                                CL_IMAGE_CHANNEL_UV, CL_WAVELET_SUBBAND_HH, layer);
    953                 wavelet_handler->add_kernel (image_kernel);
    954 
    955                 image_kernel = create_kernel_noise_estimation (context, wavelet_handler,
    956                                CL_IMAGE_CHANNEL_UV, CL_WAVELET_SUBBAND_LH, layer);
    957                 wavelet_handler->add_kernel (image_kernel);
    958 
    959                 image_kernel = create_kernel_noise_estimation (context, wavelet_handler,
    960                                CL_IMAGE_CHANNEL_UV, CL_WAVELET_SUBBAND_HL, layer);
    961                 wavelet_handler->add_kernel (image_kernel);
    962             }
    963             for (int layer = 1; layer <= WAVELET_DECOMPOSITION_LEVELS; layer++) {
    964                 SmartPtr<CLImageKernel> image_kernel;
    965                 image_kernel = create_kernel_thresholding (context, wavelet_handler, CL_IMAGE_CHANNEL_UV, layer);
    966                 wavelet_handler->add_kernel (image_kernel);
    967             }
    968         }
    969 
    970         for (int layer = WAVELET_DECOMPOSITION_LEVELS; layer >= 1; layer--) {
    971             SmartPtr<CLImageKernel> image_kernel =
    972                 create_kernel_haar_reconstruction (context, wavelet_handler, CL_IMAGE_CHANNEL_UV, layer, bayes_shrink);
    973             wavelet_handler->add_kernel (image_kernel);
    974         }
    975     }
    976 
    977     return wavelet_handler;
    978 }
    979 
    980 };
    981