Home | History | Annotate | Download | only in opencl
      1 // OpenCL port of the FAST corner detector.
      2 // Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org
      3 
      4 inline int cornerScore(__global const uchar* img, int step)
      5 {
      6     int k, tofs, v = img[0], a0 = 0, b0;
      7     int d[16];
      8     #define LOAD2(idx, ofs) \
      9         tofs = ofs; d[idx] = (short)(v - img[tofs]); d[idx+8] = (short)(v - img[-tofs])
     10     LOAD2(0, 3);
     11     LOAD2(1, -step+3);
     12     LOAD2(2, -step*2+2);
     13     LOAD2(3, -step*3+1);
     14     LOAD2(4, -step*3);
     15     LOAD2(5, -step*3-1);
     16     LOAD2(6, -step*2-2);
     17     LOAD2(7, -step-3);
     18 
     19     #pragma unroll
     20     for( k = 0; k < 16; k += 2 )
     21     {
     22         int a = min((int)d[(k+1)&15], (int)d[(k+2)&15]);
     23         a = min(a, (int)d[(k+3)&15]);
     24         a = min(a, (int)d[(k+4)&15]);
     25         a = min(a, (int)d[(k+5)&15]);
     26         a = min(a, (int)d[(k+6)&15]);
     27         a = min(a, (int)d[(k+7)&15]);
     28         a = min(a, (int)d[(k+8)&15]);
     29         a0 = max(a0, min(a, (int)d[k&15]));
     30         a0 = max(a0, min(a, (int)d[(k+9)&15]));
     31     }
     32 
     33     b0 = -a0;
     34     #pragma unroll
     35     for( k = 0; k < 16; k += 2 )
     36     {
     37         int b = max((int)d[(k+1)&15], (int)d[(k+2)&15]);
     38         b = max(b, (int)d[(k+3)&15]);
     39         b = max(b, (int)d[(k+4)&15]);
     40         b = max(b, (int)d[(k+5)&15]);
     41         b = max(b, (int)d[(k+6)&15]);
     42         b = max(b, (int)d[(k+7)&15]);
     43         b = max(b, (int)d[(k+8)&15]);
     44 
     45         b0 = min(b0, max(b, (int)d[k]));
     46         b0 = min(b0, max(b, (int)d[(k+9)&15]));
     47     }
     48 
     49     return -b0-1;
     50 }
     51 
     52 __kernel
     53 void FAST_findKeypoints(
     54     __global const uchar * _img, int step, int img_offset,
     55     int img_rows, int img_cols,
     56     volatile __global int* kp_loc,
     57     int max_keypoints, int threshold )
     58 {
     59     int j = get_global_id(0) + 3;
     60     int i = get_global_id(1) + 3;
     61 
     62     if (i < img_rows - 3 && j < img_cols - 3)
     63     {
     64         __global const uchar* img = _img + mad24(i, step, j + img_offset);
     65         int v = img[0], t0 = v - threshold, t1 = v + threshold;
     66         int k, tofs, v0, v1;
     67         int m0 = 0, m1 = 0;
     68 
     69         #define UPDATE_MASK(idx, ofs) \
     70             tofs = ofs; v0 = img[tofs]; v1 = img[-tofs]; \
     71             m0 |= ((v0 < t0) << idx) | ((v1 < t0) << (8 + idx)); \
     72             m1 |= ((v0 > t1) << idx) | ((v1 > t1) << (8 + idx))
     73 
     74         UPDATE_MASK(0, 3);
     75         if( (m0 | m1) == 0 )
     76             return;
     77 
     78         UPDATE_MASK(2, -step*2+2);
     79         UPDATE_MASK(4, -step*3);
     80         UPDATE_MASK(6, -step*2-2);
     81 
     82         #define EVEN_MASK (1+4+16+64)
     83 
     84         if( ((m0 | (m0 >> 8)) & EVEN_MASK) != EVEN_MASK &&
     85             ((m1 | (m1 >> 8)) & EVEN_MASK) != EVEN_MASK )
     86             return;
     87 
     88         UPDATE_MASK(1, -step+3);
     89         UPDATE_MASK(3, -step*3+1);
     90         UPDATE_MASK(5, -step*3-1);
     91         UPDATE_MASK(7, -step-3);
     92         if( ((m0 | (m0 >> 8)) & 255) != 255 &&
     93             ((m1 | (m1 >> 8)) & 255) != 255 )
     94             return;
     95 
     96         m0 |= m0 << 16;
     97         m1 |= m1 << 16;
     98 
     99         #define CHECK0(i) ((m0 & (511 << i)) == (511 << i))
    100         #define CHECK1(i) ((m1 & (511 << i)) == (511 << i))
    101 
    102         if( CHECK0(0) + CHECK0(1) + CHECK0(2) + CHECK0(3) +
    103             CHECK0(4) + CHECK0(5) + CHECK0(6) + CHECK0(7) +
    104             CHECK0(8) + CHECK0(9) + CHECK0(10) + CHECK0(11) +
    105             CHECK0(12) + CHECK0(13) + CHECK0(14) + CHECK0(15) +
    106 
    107             CHECK1(0) + CHECK1(1) + CHECK1(2) + CHECK1(3) +
    108             CHECK1(4) + CHECK1(5) + CHECK1(6) + CHECK1(7) +
    109             CHECK1(8) + CHECK1(9) + CHECK1(10) + CHECK1(11) +
    110             CHECK1(12) + CHECK1(13) + CHECK1(14) + CHECK1(15) == 0 )
    111             return;
    112 
    113         {
    114             int idx = atomic_inc(kp_loc);
    115             if( idx < max_keypoints )
    116             {
    117                 kp_loc[1 + 2*idx] = j;
    118                 kp_loc[2 + 2*idx] = i;
    119             }
    120         }
    121     }
    122 }
    123 
    124 ///////////////////////////////////////////////////////////////////////////
    125 // nonmaxSupression
    126 
    127 __kernel
    128 void FAST_nonmaxSupression(
    129     __global const int* kp_in, volatile __global int* kp_out,
    130     __global const uchar * _img, int step, int img_offset,
    131     int rows, int cols, int counter, int max_keypoints)
    132 {
    133     const int idx = get_global_id(0);
    134 
    135     if (idx < counter)
    136     {
    137         int x = kp_in[1 + 2*idx];
    138         int y = kp_in[2 + 2*idx];
    139         __global const uchar* img = _img + mad24(y, step, x + img_offset);
    140 
    141         int s = cornerScore(img, step);
    142 
    143         if( (x < 4 || s > cornerScore(img-1, step)) +
    144             (y < 4 || s > cornerScore(img-step, step)) != 2 )
    145             return;
    146         if( (x >= cols - 4 || s > cornerScore(img+1, step)) +
    147             (y >= rows - 4 || s > cornerScore(img+step, step)) +
    148             (x < 4 || y < 4 || s > cornerScore(img-step-1, step)) +
    149             (x >= cols - 4 || y < 4 || s > cornerScore(img-step+1, step)) +
    150             (x < 4 || y >= rows - 4 || s > cornerScore(img+step-1, step)) +
    151             (x >= cols - 4 || y >= rows - 4 || s > cornerScore(img+step+1, step)) == 6)
    152         {
    153             int new_idx = atomic_inc(kp_out);
    154             if( new_idx < max_keypoints )
    155             {
    156                 kp_out[1 + 3*new_idx] = x;
    157                 kp_out[2 + 3*new_idx] = y;
    158                 kp_out[3 + 3*new_idx] = s;
    159             }
    160         }
    161     }
    162 }
    163