Home | History | Annotate | Download | only in opencl
      1 // This file is part of OpenCV project.
      2 // It is subject to the license terms in the LICENSE file found in the top-level directory
      3 // of this distribution and at http://opencv.org/license.html.
      4 
      5 // Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
      6 // Third party copyrights are property of their respective owners.
      7 
      8 ///////////////////////////////////////////////////////////////////////////////////////////////////
      9 /////////////////////////////////Macro for border type////////////////////////////////////////////
     10 /////////////////////////////////////////////////////////////////////////////////////////////////
     11 
     12 #ifdef BORDER_CONSTANT
     13 //CCCCCC|abcdefgh|CCCCCCC
     14 #define EXTRAPOLATE(x, maxV)
     15 #elif defined BORDER_REPLICATE
     16 //aaaaaa|abcdefgh|hhhhhhh
     17 #define EXTRAPOLATE(x, maxV) \
     18     { \
     19         (x) = clamp((x), 0, (maxV)-1); \
     20     }
     21 #elif defined BORDER_WRAP
     22 //cdefgh|abcdefgh|abcdefg
     23 #define EXTRAPOLATE(x, maxV) \
     24     { \
     25         (x) = ( (x) + (maxV) ) % (maxV); \
     26     }
     27 #elif defined BORDER_REFLECT
     28 //fedcba|abcdefgh|hgfedcb
     29 #define EXTRAPOLATE(x, maxV) \
     30     { \
     31         (x) = min( mad24((maxV)-1,2,-(x))+1 , max((x),-(x)-1) ); \
     32     }
     33 #elif defined BORDER_REFLECT_101 || defined BORDER_REFLECT101
     34 //gfedcb|abcdefgh|gfedcba
     35 #define EXTRAPOLATE(x, maxV) \
     36     { \
     37         (x) = min( mad24((maxV)-1,2,-(x)), max((x),-(x)) ); \
     38     }
     39 #else
     40 #error No extrapolation method
     41 #endif
     42 
     43 #define SRC(_x,_y) convert_float(((global SRCTYPE*)(Src+(_y)*src_step))[_x])
     44 
     45 #ifdef BORDER_CONSTANT
     46 //CCCCCC|abcdefgh|CCCCCCC
     47 #define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
     48 #else
     49 #define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
     50 #endif
     51 
     52 #define DSTX(_x,_y) (((global float*)(DstX+DstXOffset+(_y)*DstXPitch))[_x])
     53 #define DSTY(_x,_y) (((global float*)(DstY+DstYOffset+(_y)*DstYPitch))[_x])
     54 
     55 #define INIT_AND_READ_LOCAL_SOURCE(width, height, fill_const, kernel_border) \
     56     int srcX = x + srcOffsetX - (kernel_border); \
     57     int srcY = y + srcOffsetY - (kernel_border); \
     58     int xb = srcX; \
     59     int yb = srcY; \
     60     \
     61     EXTRAPOLATE(xb, (width)); \
     62     EXTRAPOLATE(yb, (height)); \
     63     lsmem[liy][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
     64     \
     65     if(lix < ((kernel_border)*2)) \
     66     { \
     67         int xb = srcX+BLK_X; \
     68         EXTRAPOLATE(xb,(width)); \
     69         lsmem[liy][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
     70     } \
     71     if(liy< ((kernel_border)*2)) \
     72     { \
     73         int yb = srcY+BLK_Y; \
     74         EXTRAPOLATE(yb, (height)); \
     75         lsmem[liy+BLK_Y][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
     76     } \
     77     if(lix<((kernel_border)*2) && liy<((kernel_border)*2)) \
     78     { \
     79         int xb = srcX+BLK_X; \
     80         int yb = srcY+BLK_Y; \
     81         EXTRAPOLATE(xb,(width)); \
     82         EXTRAPOLATE(yb,(height)); \
     83         lsmem[liy+BLK_Y][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
     84     }
     85 
     86 __kernel void sobel3(__global const uchar * Src, int src_step, int srcOffsetX, int srcOffsetY,
     87                      __global uchar * DstX, int DstXPitch, int DstXOffset,
     88                      __global uchar * DstY, int DstYPitch, int DstYOffset, int dstHeight, int dstWidth,
     89                      int height, int width, float scale)
     90 {
     91     __local float lsmem[BLK_Y+2][BLK_X+2];
     92 
     93     int lix = get_local_id(0);
     94     int liy = get_local_id(1);
     95 
     96     int x = (int)get_global_id(0);
     97     int y = (int)get_global_id(1);
     98 
     99     INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 1)
    100     barrier(CLK_LOCAL_MEM_FENCE);
    101 
    102     if( x >= dstWidth || y >=dstHeight )  return;
    103 
    104     float u1 = lsmem[liy][lix];
    105     float u2 = lsmem[liy][lix+1];
    106     float u3 = lsmem[liy][lix+2];
    107 
    108     float m1 = lsmem[liy+1][lix];
    109     float m3 = lsmem[liy+1][lix+2];
    110 
    111     float b1 = lsmem[liy+2][lix];
    112     float b2 = lsmem[liy+2][lix+1];
    113     float b3 = lsmem[liy+2][lix+2];
    114 
    115     //calc and store dx and dy;//
    116 #ifdef SCHARR
    117     DSTX(x,y) = mad(10.0f, m3 - m1, 3.0f * (u3 - u1 + b3 - b1)) * scale;
    118     DSTY(x,y) = mad(10.0f, b2 - u2, 3.0f * (b1 - u1 + b3 - u3)) * scale;
    119 #else
    120     DSTX(x,y) = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1) * scale;
    121     DSTY(x,y) = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3) * scale;
    122 #endif
    123 }
    124 
    125 __kernel void sobel5(__global const uchar * Src, int src_step, int srcOffsetX, int srcOffsetY,
    126                      __global uchar * DstX, int DstXPitch, int DstXOffset,
    127                      __global uchar * DstY, int DstYPitch, int DstYOffset, int dstHeight, int dstWidth,
    128                      int height, int width, float scale)
    129 {
    130     __local float lsmem[BLK_Y+4][BLK_X+4];
    131 
    132     int lix = get_local_id(0);
    133     int liy = get_local_id(1);
    134 
    135     int x = (int)get_global_id(0);
    136     int y = (int)get_global_id(1);
    137 
    138     INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 2)
    139     barrier(CLK_LOCAL_MEM_FENCE);
    140 
    141     if( x >= dstWidth || y >=dstHeight )  return;
    142 
    143     float t1 = lsmem[liy][lix];
    144     float t2 = lsmem[liy][lix+1];
    145     float t3 = lsmem[liy][lix+2];
    146     float t4 = lsmem[liy][lix+3];
    147     float t5 = lsmem[liy][lix+4];
    148 
    149     float u1 = lsmem[liy+1][lix];
    150     float u2 = lsmem[liy+1][lix+1];
    151     float u3 = lsmem[liy+1][lix+2];
    152     float u4 = lsmem[liy+1][lix+3];
    153     float u5 = lsmem[liy+1][lix+4];
    154 
    155     float m1 = lsmem[liy+2][lix];
    156     float m2 = lsmem[liy+2][lix+1];
    157     float m4 = lsmem[liy+2][lix+3];
    158     float m5 = lsmem[liy+2][lix+4];
    159 
    160     float l1 = lsmem[liy+3][lix];
    161     float l2 = lsmem[liy+3][lix+1];
    162     float l3 = lsmem[liy+3][lix+2];
    163     float l4 = lsmem[liy+3][lix+3];
    164     float l5 = lsmem[liy+3][lix+4];
    165 
    166     float b1 = lsmem[liy+4][lix];
    167     float b2 = lsmem[liy+4][lix+1];
    168     float b3 = lsmem[liy+4][lix+2];
    169     float b4 = lsmem[liy+4][lix+3];
    170     float b5 = lsmem[liy+4][lix+4];
    171 
    172     //calc and store dx and dy;//
    173     DSTX(x,y) = scale *
    174         mad(12.0f, m4 - m2,
    175             mad(6.0f, m5 - m1,
    176                 mad(8.0f, u4 - u2 + l4 - l2,
    177                     mad(4.0f, u5 - u1 + l5 - l1,
    178                         mad(2.0f, t4 - t2 + b4 - b2, t5 - t1 + b5 - b1 )
    179                         )
    180                     )
    181                 )
    182             );
    183 
    184     DSTY(x,y) = scale *
    185         mad(12.0f, l3 - u3,
    186             mad(6.0f, b3 - t3,
    187                 mad(8.0f, l2 - u2 + l4 - u4,
    188                     mad(4.0f, b2 - t2 + b4 - t4,
    189                         mad(2.0f, l1 - u1 + l5 - u5, b1 - t1 + b5 - t5 )
    190                         )
    191                     )
    192                 )
    193             );
    194 }
    195 
    196 __kernel void sobel7(__global const uchar * Src, int src_step, int srcOffsetX, int srcOffsetY,
    197                      __global uchar * DstX, int DstXPitch, int DstXOffset,
    198                      __global uchar * DstY, int DstYPitch, int DstYOffset, int dstHeight, int dstWidth,
    199                      int height, int width, float scale)
    200 {
    201     __local float lsmem[BLK_Y+6][BLK_X+6];
    202 
    203     int lix = get_local_id(0);
    204     int liy = get_local_id(1);
    205 
    206     int x = (int)get_global_id(0);
    207     int y = (int)get_global_id(1);
    208 
    209     INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 3)
    210     barrier(CLK_LOCAL_MEM_FENCE);
    211 
    212     if( x >= dstWidth || y >=dstHeight )  return;
    213 
    214     float tt1 = lsmem[liy][lix];
    215     float tt2 = lsmem[liy][lix+1];
    216     float tt3 = lsmem[liy][lix+2];
    217     float tt4 = lsmem[liy][lix+3];
    218     float tt5 = lsmem[liy][lix+4];
    219     float tt6 = lsmem[liy][lix+5];
    220     float tt7 = lsmem[liy][lix+6];
    221 
    222     float t1 = lsmem[liy+1][lix];
    223     float t2 = lsmem[liy+1][lix+1];
    224     float t3 = lsmem[liy+1][lix+2];
    225     float t4 = lsmem[liy+1][lix+3];
    226     float t5 = lsmem[liy+1][lix+4];
    227     float t6 = lsmem[liy+1][lix+5];
    228     float t7 = lsmem[liy+1][lix+6];
    229 
    230     float u1 = lsmem[liy+2][lix];
    231     float u2 = lsmem[liy+2][lix+1];
    232     float u3 = lsmem[liy+2][lix+2];
    233     float u4 = lsmem[liy+2][lix+3];
    234     float u5 = lsmem[liy+2][lix+4];
    235     float u6 = lsmem[liy+2][lix+5];
    236     float u7 = lsmem[liy+2][lix+6];
    237 
    238     float m1 = lsmem[liy+3][lix];
    239     float m2 = lsmem[liy+3][lix+1];
    240     float m3 = lsmem[liy+3][lix+2];
    241     float m5 = lsmem[liy+3][lix+4];
    242     float m6 = lsmem[liy+3][lix+5];
    243     float m7 = lsmem[liy+3][lix+6];
    244 
    245     float l1 = lsmem[liy+4][lix];
    246     float l2 = lsmem[liy+4][lix+1];
    247     float l3 = lsmem[liy+4][lix+2];
    248     float l4 = lsmem[liy+4][lix+3];
    249     float l5 = lsmem[liy+4][lix+4];
    250     float l6 = lsmem[liy+4][lix+5];
    251     float l7 = lsmem[liy+4][lix+6];
    252 
    253     float b1 = lsmem[liy+5][lix];
    254     float b2 = lsmem[liy+5][lix+1];
    255     float b3 = lsmem[liy+5][lix+2];
    256     float b4 = lsmem[liy+5][lix+3];
    257     float b5 = lsmem[liy+5][lix+4];
    258     float b6 = lsmem[liy+5][lix+5];
    259     float b7 = lsmem[liy+5][lix+6];
    260 
    261     float bb1 = lsmem[liy+6][lix];
    262     float bb2 = lsmem[liy+6][lix+1];
    263     float bb3 = lsmem[liy+6][lix+2];
    264     float bb4 = lsmem[liy+6][lix+3];
    265     float bb5 = lsmem[liy+6][lix+4];
    266     float bb6 = lsmem[liy+6][lix+5];
    267     float bb7 = lsmem[liy+6][lix+6];
    268 
    269     //calc and store dx and dy
    270     DSTX(x,y) = scale *
    271         mad(100.0f, m5 - m3,
    272             mad(80.0f, m6 - m2,
    273                 mad(20.0f, m7 - m1,
    274                     mad(75.0f, u5 - u3 + l5 - l3,
    275                         mad(60.0f, u6 - u2 + l6 - l2,
    276                             mad(15.0f, u7 - u1 + l7 - l1,
    277                                 mad(30.0f, t5 - t3 + b5 - b3,
    278                                     mad(24.0f, t6 - t2 + b6 - b2,
    279                                         mad(6.0f, t7 - t1 + b7 - b1,
    280                                             mad(5.0f, tt5 - tt3 + bb5 - bb3,
    281                                                 mad(4.0f, tt6 - tt2 + bb6 - bb2, tt7 - tt1 + bb7 - bb1 )
    282                                                 )
    283                                             )
    284                                         )
    285                                     )
    286                                 )
    287                             )
    288                         )
    289                     )
    290                 )
    291             );
    292 
    293     DSTY(x,y) = scale *
    294         mad(100.0f, l4 - u4,
    295             mad(80.0f, b4 - t4,
    296                 mad(20.0f, bb4 - tt4,
    297                     mad(75.0f, l5 - u5 + l3 - u3,
    298                         mad(60.0f, b5 - t5 + b3 - t3,
    299                             mad(15.0f, bb5 - tt5 + bb3 - tt3,
    300                                 mad(30.0f, l6 - u6 + l2 - u2,
    301                                     mad(24.0f, b6 - t6 + b2 - t2,
    302                                         mad(6.0f, bb6 - tt6 + bb2 - tt2,
    303                                             mad(5.0f, l7 - u7 + l1 - u1,
    304                                                 mad(4.0f, b7 - t7 + b1 - t1, bb7 - tt7 + bb1 - tt1 )
    305                                                 )
    306                                             )
    307                                         )
    308                                     )
    309                                 )
    310                             )
    311                         )
    312                     )
    313                 )
    314             );
    315 }
    316