Home | History | Annotate | Download | only in opencl
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
     14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // @Authors
     18 //    Peng Xiao, pengxiao (at) multicorewareinc.com
     19 //
     20 // Redistribution and use in source and binary forms, with or without modification,
     21 // are permitted provided that the following conditions are met:
     22 //
     23 //   * Redistribution's of source code must retain the above copyright notice,
     24 //     this list of conditions and the following disclaimer.
     25 //
     26 //   * Redistribution's in binary form must reproduce the above copyright notice,
     27 //     this list of conditions and the following disclaimer in the documentation
     28 //     and/or other materials provided with the distribution.
     29 //
     30 //   * The name of the copyright holders may not be used to endorse or promote products
     31 //     derived from this software without specific prior written permission.
     32 //
     33 // This software is provided by the copyright holders and contributors as is and
     34 // any express or implied warranties, including, but not limited to, the implied
     35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     36 // In no event shall the Intel Corporation or contributors be liable for any direct,
     37 // indirect, incidental, special, exemplary, or consequential damages
     38 // (including, but not limited to, procurement of substitute goods or services;
     39 // loss of use, data, or profits; or business interruption) however caused
     40 // and on any theory of liability, whether in contract, strict liability,
     41 // or tort (including negligence or otherwise) arising in any way out of
     42 // the use of this software, even if advised of the possibility of such damage.
     43 //
     44 //M*/
     45 
     46 #define TG22 0.4142135623730950488016887242097f
     47 #define TG67 2.4142135623730950488016887242097f
     48 
     49 #ifdef WITH_SOBEL
     50 
     51 #if cn == 1
     52 #define loadpix(addr) convert_floatN(*(__global const TYPE *)(addr))
     53 #else
     54 #define loadpix(addr) convert_floatN(vload3(0, (__global const TYPE *)(addr)))
     55 #endif
     56 #define storepix(value, addr) *(__global int *)(addr) = (int)(value)
     57 
     58 /*
     59     stage1_with_sobel:
     60         Sobel operator
     61         Calc magnitudes
     62         Non maxima suppression
     63         Double thresholding
     64 */
     65 
     66 __constant int prev[4][2] = {
     67     { 0, -1 },
     68     { -1, 1 },
     69     { -1, 0 },
     70     { -1, -1 }
     71 };
     72 
     73 __constant int next[4][2] = {
     74     { 0, 1 },
     75     { 1, -1 },
     76     { 1, 0 },
     77     { 1, 1 }
     78 };
     79 
     80 inline float3 sobel(int idx, __local const floatN *smem)
     81 {
     82     // result: x, y, mag
     83     float3 res;
     84 
     85     floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4],
     86         smem[idx + 2] - smem[idx] + smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]);
     87 
     88     floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9],
     89         smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]);
     90 
     91 #ifdef L2GRAD
     92     floatN magN = fma(dx, dx, dy * dy);
     93 #else
     94     floatN magN = fabs(dx) + fabs(dy);
     95 #endif
     96 #if cn == 1
     97     res.z = magN;
     98     res.x = dx;
     99     res.y = dy;
    100 #else
    101     res.z = max(magN.x, max(magN.y, magN.z));
    102     if (res.z == magN.y)
    103     {
    104         dx.x = dx.y;
    105         dy.x = dy.y;
    106     }
    107     else if (res.z == magN.z)
    108     {
    109         dx.x = dx.z;
    110         dy.x = dy.z;
    111     }
    112     res.x = dx.x;
    113     res.y = dy.x;
    114 #endif
    115 
    116     return res;
    117 }
    118 
    119 __kernel void stage1_with_sobel(__global const uchar *src, int src_step, int src_offset, int rows, int cols,
    120                                 __global uchar *map, int map_step, int map_offset,
    121                                 float low_thr, float high_thr)
    122 {
    123     __local floatN smem[(GRP_SIZEX + 4) * (GRP_SIZEY + 4)];
    124 
    125     int lidx = get_local_id(0);
    126     int lidy = get_local_id(1);
    127 
    128     int start_x = GRP_SIZEX * get_group_id(0);
    129     int start_y = GRP_SIZEY * get_group_id(1);
    130 
    131     int i = lidx + lidy * GRP_SIZEX;
    132     for (int j = i;  j < (GRP_SIZEX + 4) * (GRP_SIZEY + 4); j += GRP_SIZEX * GRP_SIZEY)
    133     {
    134         int x = clamp(start_x - 2 + (j % (GRP_SIZEX + 4)), 0, cols - 1);
    135         int y = clamp(start_y - 2 + (j / (GRP_SIZEX + 4)), 0, rows - 1);
    136         smem[j] = loadpix(src + mad24(y, src_step, mad24(x, cn * (int)sizeof(TYPE), src_offset)));
    137     }
    138 
    139     barrier(CLK_LOCAL_MEM_FENCE);
    140 
    141     //// Sobel, Magnitude
    142     //
    143 
    144     __local float mag[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)];
    145 
    146     lidx++;
    147     lidy++;
    148 
    149     if (i < GRP_SIZEX + 2)
    150     {
    151         int grp_sizey = min(GRP_SIZEY + 1, rows - start_y);
    152         mag[i] = (sobel(i, smem)).z;
    153         mag[i + grp_sizey * (GRP_SIZEX + 2)] = (sobel(i + grp_sizey * (GRP_SIZEX + 4), smem)).z;
    154     }
    155     if (i < GRP_SIZEY + 2)
    156     {
    157         int grp_sizex = min(GRP_SIZEX + 1, cols - start_x);
    158         mag[i * (GRP_SIZEX + 2)] = (sobel(i * (GRP_SIZEX + 4), smem)).z;
    159         mag[i * (GRP_SIZEX + 2) + grp_sizex] = (sobel(i * (GRP_SIZEX + 4) + grp_sizex, smem)).z;
    160     }
    161 
    162     int idx = lidx + lidy * (GRP_SIZEX + 4);
    163     i = lidx + lidy * (GRP_SIZEX + 2);
    164 
    165     float3 res = sobel(idx, smem);
    166     mag[i] = res.z;
    167     barrier(CLK_LOCAL_MEM_FENCE);
    168 
    169     int x = (int) res.x;
    170     int y = (int) res.y;
    171 
    172     //// Threshold + Non maxima suppression
    173     //
    174 
    175     /*
    176         Sector numbers
    177 
    178         3   2   1
    179          *  *  *
    180           * * *
    181         0*******0
    182           * * *
    183          *  *  *
    184         1   2   3
    185 
    186         We need to determine arctg(dy / dx) to one of the four directions: 0, 45, 90 or 135 degrees.
    187         Therefore if abs(dy / dx) belongs to the interval
    188         [0, tg(22.5)]           -> 0 direction
    189         [tg(22.5), tg(67.5)]    -> 1 or 3
    190         [tg(67,5), +oo)         -> 2
    191 
    192         Since tg(67.5) = 1 / tg(22.5), if we take
    193         a = abs(dy / dx) * tg(22.5) and b = abs(dy / dx) * tg(67.5)
    194         we can get another intervals
    195 
    196         in case a:
    197         [0, tg(22.5)^2]     -> 0
    198         [tg(22.5)^2, 1]     -> 1, 3
    199         [1, +oo)            -> 2
    200 
    201         in case b:
    202         [0, 1]              -> 0
    203         [1, tg(67.5)^2]     -> 1,3
    204         [tg(67.5)^2, +oo)   -> 2
    205 
    206         that can help to find direction without conditions.
    207 
    208         0 - might belong to an edge
    209         1 - pixel doesn't belong to an edge
    210         2 - belong to an edge
    211     */
    212 
    213     int gidx = get_global_id(0);
    214     int gidy = get_global_id(1);
    215 
    216     if (gidx >= cols || gidy >= rows)
    217         return;
    218 
    219     float mag0 = mag[i];
    220 
    221     int value = 1;
    222     if (mag0 > low_thr)
    223     {
    224         int a = (y / (float)x) * TG22;
    225         int b = (y / (float)x) * TG67;
    226 
    227         a = min((int)abs(a), 1) + 1;
    228         b = min((int)abs(b), 1);
    229 
    230         //  a = { 1, 2 }
    231         //  b = { 0, 1 }
    232         //  a * b = { 0, 1, 2 } - directions that we need ( + 3 if x ^ y < 0)
    233 
    234         int dir3 = (a * b) & (((x ^ y) & 0x80000000) >> 31); // if a = 1, b = 1, dy ^ dx < 0
    235         int dir = a * b + 2 * dir3;
    236         float prev_mag = mag[(lidy + prev[dir][0]) * (GRP_SIZEX + 2) + lidx + prev[dir][1]];
    237         float next_mag = mag[(lidy + next[dir][0]) * (GRP_SIZEX + 2) + lidx + next[dir][1]] + (dir & 1);
    238 
    239         if (mag0 > prev_mag && mag0 >= next_mag)
    240         {
    241             value = (mag0 > high_thr) ? 2 : 0;
    242         }
    243     }
    244 
    245     storepix(value, map + mad24(gidy, map_step, mad24(gidx, (int)sizeof(int), map_offset)));
    246 }
    247 
    248 #elif defined WITHOUT_SOBEL
    249 
    250 /*
    251     stage1_without_sobel:
    252         Calc magnitudes
    253         Non maxima suppression
    254         Double thresholding
    255 */
    256 
    257 #define loadpix(addr) (__global short *)(addr)
    258 #define storepix(val, addr) *(__global int *)(addr) = (int)(val)
    259 
    260 #ifdef L2GRAD
    261 #define dist(x, y) ((int)(x) * (x) + (int)(y) * (y))
    262 #else
    263 #define dist(x, y) (abs(x) + abs(y))
    264 #endif
    265 
    266 __constant int prev[4][2] = {
    267     { 0, -1 },
    268     { -1, -1 },
    269     { -1, 0 },
    270     { -1, 1 }
    271 };
    272 
    273 __constant int next[4][2] = {
    274     { 0, 1 },
    275     { 1, 1 },
    276     { 1, 0 },
    277     { 1, -1 }
    278 };
    279 
    280 __kernel void stage1_without_sobel(__global const uchar *dxptr, int dx_step, int dx_offset,
    281                                    __global const uchar *dyptr, int dy_step, int dy_offset,
    282                                    __global uchar *map, int map_step, int map_offset, int rows, int cols,
    283                                    int low_thr, int high_thr)
    284 {
    285     int start_x = get_group_id(0) * GRP_SIZEX;
    286     int start_y = get_group_id(1) * GRP_SIZEY;
    287 
    288     int lidx = get_local_id(0);
    289     int lidy = get_local_id(1);
    290 
    291     __local int mag[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)];
    292     __local short2 sigma[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)];
    293 
    294 #pragma unroll
    295     for (int i = lidx + lidy * GRP_SIZEX; i < (GRP_SIZEX + 2) * (GRP_SIZEY + 2); i += GRP_SIZEX * GRP_SIZEY)
    296     {
    297         int x = clamp(start_x - 1 + i % (GRP_SIZEX + 2), 0, cols - 1);
    298         int y = clamp(start_y - 1 + i / (GRP_SIZEX + 2), 0, rows - 1);
    299 
    300         int dx_index = mad24(y, dx_step, mad24(x, cn * (int)sizeof(short), dx_offset));
    301         int dy_index = mad24(y, dy_step, mad24(x, cn * (int)sizeof(short), dy_offset));
    302 
    303         __global short *dx = loadpix(dxptr + dx_index);
    304         __global short *dy = loadpix(dyptr + dy_index);
    305 
    306         int mag0 = dist(dx[0], dy[0]);
    307 #if cn > 1
    308         short cdx = dx[0], cdy = dy[0];
    309 #pragma unroll
    310         for (int j = 1; j < cn; ++j)
    311         {
    312             int mag1 = dist(dx[j], dy[j]);
    313             if (mag1 > mag0)
    314             {
    315                 mag0 = mag1;
    316                 cdx = dx[j];
    317                 cdy = dy[j];
    318             }
    319         }
    320         dx[0] = cdx;
    321         dy[0] = cdy;
    322 #endif
    323         mag[i] = mag0;
    324         sigma[i] = (short2)(dx[0], dy[0]);
    325     }
    326 
    327     barrier(CLK_LOCAL_MEM_FENCE);
    328 
    329     int gidx = get_global_id(0);
    330     int gidy = get_global_id(1);
    331 
    332     if (gidx >= cols || gidy >= rows)
    333         return;
    334 
    335     lidx++;
    336     lidy++;
    337 
    338     int mag0 = mag[lidx + lidy * (GRP_SIZEX + 2)];
    339     short x = (sigma[lidx + lidy * (GRP_SIZEX + 2)]).x;
    340     short y = (sigma[lidx + lidy * (GRP_SIZEX + 2)]).y;
    341 
    342     int value = 1;
    343     if (mag0 > low_thr)
    344     {
    345         int a = (y / (float)x) * TG22;
    346         int b = (y / (float)x) * TG67;
    347 
    348         a = min((int)abs(a), 1) + 1;
    349         b = min((int)abs(b), 1);
    350 
    351         int dir3 = (a * b) & (((x ^ y) & 0x80000000) >> 31);
    352         int dir = a * b + 2 * dir3;
    353         int prev_mag = mag[(lidy + prev[dir][0]) * (GRP_SIZEX + 2) + lidx + prev[dir][1]];
    354         int next_mag = mag[(lidy + next[dir][0]) * (GRP_SIZEX + 2) + lidx + next[dir][1]] + (dir & 1);
    355 
    356         if (mag0 > prev_mag && mag0 >= next_mag)
    357         {
    358             value = (mag0 > high_thr) ? 2 : 0;
    359         }
    360     }
    361 
    362     storepix(value, map + mad24(gidy, map_step, mad24(gidx, (int)sizeof(int), map_offset)));
    363 }
    364 
    365 #undef TG22
    366 #undef CANNY_SHIFT
    367 
    368 #elif defined STAGE2
    369 /*
    370     stage2:
    371         hysteresis (add edges labeled 0 if they are connected with an edge labeled 2)
    372 */
    373 
    374 #define loadpix(addr) *(__global int *)(addr)
    375 #define storepix(val, addr) *(__global int *)(addr) = (int)(val)
    376 #define LOCAL_TOTAL (LOCAL_X*LOCAL_Y)
    377 #define l_stack_size (4*LOCAL_TOTAL)
    378 #define p_stack_size 8
    379 
    380 __constant short move_dir[2][8] = {
    381     { -1, -1, -1, 0, 0, 1, 1, 1 },
    382     { -1, 0, 1, -1, 1, -1, 0, 1 }
    383 };
    384 
    385 __kernel void stage2_hysteresis(__global uchar *map_ptr, int map_step, int map_offset, int rows, int cols)
    386 {
    387     map_ptr += map_offset;
    388 
    389     int x = get_global_id(0);
    390     int y = get_global_id(1) * PIX_PER_WI;
    391 
    392     int lid = get_local_id(0) + get_local_id(1) * LOCAL_X;
    393 
    394     __local ushort2 l_stack[l_stack_size];
    395     __local int l_counter;
    396 
    397     if (lid == 0)
    398         l_counter = 0;
    399     barrier(CLK_LOCAL_MEM_FENCE);
    400 
    401     if (x < cols)
    402     {
    403         __global uchar* map = map_ptr + mad24(y, map_step, x * (int)sizeof(int));
    404 
    405         #pragma unroll
    406         for (int cy = 0; cy < PIX_PER_WI; ++cy)
    407         {
    408             if (y < rows)
    409             {
    410                 int type = loadpix(map);
    411                 if (type == 2)
    412                 {
    413                     l_stack[atomic_inc(&l_counter)] = (ushort2)(x, y);
    414                 }
    415 
    416                 y++;
    417                 map += map_step;
    418             }
    419         }
    420     }
    421     barrier(CLK_LOCAL_MEM_FENCE);
    422 
    423     ushort2 p_stack[p_stack_size];
    424     int p_counter = 0;
    425 
    426     while(l_counter != 0)
    427     {
    428         int mod = l_counter % LOCAL_TOTAL;
    429         int pix_per_thr = l_counter / LOCAL_TOTAL + ((lid < mod) ? 1 : 0);
    430 
    431         for (int i = 0; i < pix_per_thr; ++i)
    432         {
    433             int index = atomic_dec(&l_counter) - 1;
    434             if (index < 0)
    435                continue;
    436             ushort2 pos = l_stack[ index ];
    437 
    438             #pragma unroll
    439             for (int j = 0; j < 8; ++j)
    440             {
    441                 ushort posx = pos.x + move_dir[0][j];
    442                 ushort posy = pos.y + move_dir[1][j];
    443                 if (posx < 0 || posy < 0 || posx >= cols || posy >= rows)
    444                     continue;
    445                 __global uchar *addr = map_ptr + mad24(posy, map_step, posx * (int)sizeof(int));
    446                 int type = loadpix(addr);
    447                 if (type == 0)
    448                 {
    449                     p_stack[p_counter++] = (ushort2)(posx, posy);
    450                     storepix(2, addr);
    451                 }
    452             }
    453         }
    454         barrier(CLK_LOCAL_MEM_FENCE);
    455         if (l_counter < 0)
    456             l_counter = 0;
    457         barrier(CLK_LOCAL_MEM_FENCE);
    458 
    459         while (p_counter > 0)
    460         {
    461             l_stack[ atomic_inc(&l_counter) ] = p_stack[--p_counter];
    462         }
    463         barrier(CLK_LOCAL_MEM_FENCE);
    464     }
    465 }
    466 
    467 #elif defined GET_EDGES
    468 
    469 // Get the edge result. egde type of value 2 will be marked as an edge point and set to 255. Otherwise 0.
    470 // map      edge type mappings
    471 // dst      edge output
    472 
    473 __kernel void getEdges(__global const uchar *mapptr, int map_step, int map_offset, int rows, int cols,
    474                        __global uchar *dst, int dst_step, int dst_offset)
    475 {
    476     int x = get_global_id(0);
    477     int y = get_global_id(1) * PIX_PER_WI;
    478 
    479     if (x < cols)
    480     {
    481         int map_index = mad24(map_step, y, mad24(x, (int)sizeof(int), map_offset));
    482         int dst_index = mad24(dst_step, y, x + dst_offset);
    483 
    484         #pragma unroll
    485         for (int cy = 0; cy < PIX_PER_WI; ++cy)
    486         {
    487             if (y < rows)
    488             {
    489                 __global const int * map = (__global const int *)(mapptr + map_index);
    490                 dst[dst_index] = (uchar)(-(map[0] >> 1));
    491 
    492                 y++;
    493                 map_index += map_step;
    494                 dst_index += dst_step;
    495             }
    496         }
    497     }
    498 }
    499 
    500 #endif
    501