Home | History | Annotate | Download | only in opencl
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
     14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // @Authors
     18 //    Shengen Yan,yanshengen (at) gmail.com
     19 //
     20 // Redistribution and use in source and binary forms, with or without modification,
     21 // are permitted provided that the following conditions are met:
     22 //
     23 //   * Redistribution's of source code must retain the above copyright notice,
     24 //     this list of conditions and the following disclaimer.
     25 //
     26 //   * Redistribution's in binary form must reproduce the above copyright notice,
     27 //     this list of conditions and the following disclaimer in the documentation
     28 //     and/or other materials provided with the distribution.
     29 //
     30 //   * The name of the copyright holders may not be used to endorse or promote products
     31 //     derived from this software without specific prior written permission.
     32 //
     33 // This software is provided by the copyright holders and contributors as is and
     34 // any express or implied warranties, including, but not limited to, the implied
     35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     36 // In no event shall the Intel Corporation or contributors be liable for any direct,
     37 // indirect, incidental, special, exemplary, or consequential damages
     38 // (including, but not limited to, procurement of substitute goods or services;
     39 // loss of use, data, or profits; or business interruption) however caused
     40 // and on any theory of liability, whether in contract, strict liability,
     41 // or tort (including negligence or otherwise) arising in any way out of
     42 // the use of this software, even if advised of the possibility of such damage.
     43 //
     44 //M*/
     45 
     46 ///////////////////////////////////////////////////////////////////////////////////////////////////
     47 /////////////////////////////////Macro for border type////////////////////////////////////////////
     48 /////////////////////////////////////////////////////////////////////////////////////////////////
     49 
     50 #ifdef BORDER_CONSTANT
     51 #elif defined BORDER_REPLICATE
     52 #define EXTRAPOLATE(x, maxV) \
     53     { \
     54         x = max(min(x, maxV - 1), 0); \
     55     }
     56 #elif defined BORDER_WRAP
     57 #define EXTRAPOLATE(x, maxV) \
     58     { \
     59         if (x < 0) \
     60             x -= ((x - maxV + 1) / maxV) * maxV; \
     61         if (x >= maxV) \
     62             x %= maxV; \
     63     }
     64 #elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT101)
     65 #define EXTRAPOLATE_(x, maxV, delta) \
     66     { \
     67         if (maxV == 1) \
     68             x = 0; \
     69         else \
     70             do \
     71             { \
     72                 if ( x < 0 ) \
     73                     x = -x - 1 + delta; \
     74                 else \
     75                     x = maxV - 1 - (x - maxV) - delta; \
     76             } \
     77             while (x >= maxV || x < 0); \
     78     }
     79 #ifdef BORDER_REFLECT
     80 #define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 0)
     81 #else
     82 #define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 1)
     83 #endif
     84 #else
     85 #error No extrapolation method
     86 #endif
     87 
     88 #define THREADS 256
     89 
     90 ///////////////////////////////////////////////////////////////////////////////////////////////////
     91 /////////////////////////////////////calcHarris////////////////////////////////////////////////////
     92 ///////////////////////////////////////////////////////////////////////////////////////////////////
     93 
     94 __kernel void corner(__global const float * Dx, int dx_step, int dx_offset, int dx_whole_rows, int dx_whole_cols,
     95                      __global const float * Dy, int dy_step, int dy_offset, int dy_whole_rows, int dy_whole_cols,
     96                      __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float k)
     97 {
     98     int col = get_local_id(0);
     99     int gX = get_group_id(0);
    100     int gY = get_group_id(1);
    101     int gly = get_global_id(1);
    102 
    103     int dx_x_off = (dx_offset % dx_step) >> 2;
    104     int dx_y_off = dx_offset / dx_step;
    105     int dy_x_off = (dy_offset % dy_step) >> 2;
    106     int dy_y_off = dy_offset / dy_step;
    107     int dst_x_off = (dst_offset % dst_step) >> 2;
    108     int dst_y_off = dst_offset / dst_step;
    109 
    110     int dx_startX = gX * (THREADS-ksX+1) - anX + dx_x_off;
    111     int dx_startY = (gY << 1) - anY + dx_y_off;
    112     int dy_startX = gX * (THREADS-ksX+1) - anX + dy_x_off;
    113     int dy_startY = (gY << 1) - anY + dy_y_off;
    114     int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
    115     int dst_startY = (gY << 1) + dst_y_off;
    116 
    117     float data[3][ksY+1];
    118     __local float temp[6][THREADS];
    119 
    120 #ifdef BORDER_CONSTANT
    121     for (int i=0; i < ksY+1; i++)
    122     {
    123         bool dx_con = dx_startX+col >= 0 && dx_startX+col < dx_whole_cols && dx_startY+i >= 0 && dx_startY+i < dx_whole_rows;
    124         int indexDx = mad24(dx_startY+i, dx_step>>2, dx_startX+col);
    125         float dx_s = dx_con ? Dx[indexDx] : 0.0f;
    126 
    127         bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
    128         int indexDy = mad24(dy_startY+i, dy_step>>2, dy_startX+col);
    129         float dy_s = dy_con ? Dy[indexDy] : 0.0f;
    130 
    131         data[0][i] = dx_s * dx_s;
    132         data[1][i] = dx_s * dy_s;
    133         data[2][i] = dy_s * dy_s;
    134     }
    135 #else
    136     int clamped_col = min(2*dst_cols, col);
    137     for (int i=0; i < ksY+1; i++)
    138     {
    139         int dx_selected_row = dx_startY+i, dx_selected_col = dx_startX+clamped_col;
    140         EXTRAPOLATE(dx_selected_row, dx_whole_rows)
    141         EXTRAPOLATE(dx_selected_col, dx_whole_cols)
    142         float dx_s = Dx[mad24(dx_selected_row, dx_step>>2, dx_selected_col)];
    143 
    144         int dy_selected_row = dy_startY+i, dy_selected_col = dy_startX+clamped_col;
    145         EXTRAPOLATE(dy_selected_row, dy_whole_rows)
    146         EXTRAPOLATE(dy_selected_col, dy_whole_cols)
    147         float dy_s = Dy[mad24(dy_selected_row, dy_step>>2, dy_selected_col)];
    148 
    149         data[0][i] = dx_s * dx_s;
    150         data[1][i] = dx_s * dy_s;
    151         data[2][i] = dy_s * dy_s;
    152     }
    153 #endif
    154     float sum0 = 0.0f, sum1 = 0.0f, sum2 = 0.0f;
    155     for (int i=1; i < ksY; i++)
    156     {
    157         sum0 += data[0][i];
    158         sum1 += data[1][i];
    159         sum2 += data[2][i];
    160     }
    161 
    162     float sum01 = sum0 + data[0][0];
    163     float sum02 = sum0 + data[0][ksY];
    164     temp[0][col] = sum01;
    165     temp[1][col] = sum02;
    166     float sum11 = sum1 + data[1][0];
    167     float sum12 = sum1 + data[1][ksY];
    168     temp[2][col] = sum11;
    169     temp[3][col] = sum12;
    170     float sum21 = sum2 + data[2][0];
    171     float sum22 = sum2 + data[2][ksY];
    172     temp[4][col] = sum21;
    173     temp[5][col] = sum22;
    174     barrier(CLK_LOCAL_MEM_FENCE);
    175 
    176     if (col < (THREADS - (ksX - 1)))
    177     {
    178         col += anX;
    179         int posX = dst_startX - dst_x_off + col - anX;
    180         int posY = (gly << 1);
    181         int till = (ksX + 1) & 1;
    182         float tmp_sum[6] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
    183         for (int k=0; k<6; k++)
    184         {
    185             float temp_sum = 0;
    186             for (int i=-anX; i<=anX - till; i++)
    187                 temp_sum += temp[k][col+i];
    188             tmp_sum[k] = temp_sum;
    189         }
    190 
    191 #ifdef CORNER_HARRIS
    192         if (posX < dst_cols && (posY) < dst_rows)
    193         {
    194             int dst_index = mad24(dst_step, dst_startY, (int)sizeof(float) * (dst_startX + col - anX));
    195             *(__global float *)(dst + dst_index) =
    196                     tmp_sum[0] * tmp_sum[4] - tmp_sum[2] * tmp_sum[2] - k * (tmp_sum[0] + tmp_sum[4]) * (tmp_sum[0] + tmp_sum[4]);
    197         }
    198         if (posX < dst_cols && (posY + 1) < dst_rows)
    199         {
    200             int dst_index = mad24(dst_step, dst_startY + 1, (int)sizeof(float) * (dst_startX + col - anX));
    201             *(__global float *)(dst + dst_index) =
    202                     tmp_sum[1] * tmp_sum[5] - tmp_sum[3] * tmp_sum[3] - k * (tmp_sum[1] + tmp_sum[5]) * (tmp_sum[1] + tmp_sum[5]);
    203         }
    204 #elif defined CORNER_MINEIGENVAL
    205         if (posX < dst_cols && (posY) < dst_rows)
    206         {
    207             int dst_index = mad24(dst_step, dst_startY, (int)sizeof(float) * (dst_startX + col - anX));
    208             float a = tmp_sum[0] * 0.5f;
    209             float b = tmp_sum[2];
    210             float c = tmp_sum[4] * 0.5f;
    211             *(__global float *)(dst + dst_index) = (float)((a+c) - native_sqrt((a-c)*(a-c) + b*b));
    212         }
    213         if (posX < dst_cols && (posY + 1) < dst_rows)
    214         {
    215             int dst_index = mad24(dst_step, dst_startY + 1, (int)sizeof(float) * (dst_startX + col - anX));
    216             float a = tmp_sum[1] * 0.5f;
    217             float b = tmp_sum[3];
    218             float c = tmp_sum[5] * 0.5f;
    219             *(__global float *)(dst + dst_index) = (float)((a+c) - native_sqrt((a-c)*(a-c) + b*b));
    220         }
    221 #else
    222 #error "No such corners type"
    223 #endif
    224     }
    225 }
    226