Home | History | Annotate | Download | only in opencl
      1 // OpenCL port of the ORB feature detector and descriptor extractor
      2 // Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org
      3 //
      4 // The original code has been contributed by Peter Andreas Entschev, peter (a] entschev.com
      5 
      6 #define LAYERINFO_SIZE 1
      7 #define LAYERINFO_OFS 0
      8 #define KEYPOINT_SIZE 3
      9 #define ORIENTED_KEYPOINT_SIZE 4
     10 #define KEYPOINT_X 0
     11 #define KEYPOINT_Y 1
     12 #define KEYPOINT_Z 2
     13 #define KEYPOINT_ANGLE 3
     14 
     15 /////////////////////////////////////////////////////////////
     16 
     17 #ifdef ORB_RESPONSES
     18 
     19 __kernel void
     20 ORB_HarrisResponses(__global const uchar* imgbuf, int imgstep, int imgoffset0,
     21                     __global const int* layerinfo, __global const int* keypoints,
     22                     __global float* responses, int nkeypoints )
     23 {
     24     int idx = get_global_id(0);
     25     if( idx < nkeypoints )
     26     {
     27         __global const int* kpt = keypoints + idx*KEYPOINT_SIZE;
     28         __global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE;
     29         __global const uchar* img = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] +
     30             (kpt[KEYPOINT_Y] - blockSize/2)*imgstep + (kpt[KEYPOINT_X] - blockSize/2);
     31 
     32         int i, j;
     33         int a = 0, b = 0, c = 0;
     34         for( i = 0; i < blockSize; i++, img += imgstep-blockSize )
     35         {
     36             for( j = 0; j < blockSize; j++, img++ )
     37             {
     38                 int Ix = (img[1] - img[-1])*2 + img[-imgstep+1] - img[-imgstep-1] + img[imgstep+1] - img[imgstep-1];
     39                 int Iy = (img[imgstep] - img[-imgstep])*2 + img[imgstep-1] - img[-imgstep-1] + img[imgstep+1] - img[-imgstep+1];
     40                 a += Ix*Ix;
     41                 b += Iy*Iy;
     42                 c += Ix*Iy;
     43             }
     44         }
     45         responses[idx] = ((float)a * b - (float)c * c - HARRIS_K * (float)(a + b) * (a + b))*scale_sq_sq;
     46     }
     47 }
     48 
     49 #endif
     50 
     51 /////////////////////////////////////////////////////////////
     52 
     53 #ifdef ORB_ANGLES
     54 
     55 #define _DBL_EPSILON 2.2204460492503131e-16f
     56 #define atan2_p1 (0.9997878412794807f*57.29577951308232f)
     57 #define atan2_p3 (-0.3258083974640975f*57.29577951308232f)
     58 #define atan2_p5 (0.1555786518463281f*57.29577951308232f)
     59 #define atan2_p7 (-0.04432655554792128f*57.29577951308232f)
     60 
     61 inline float fastAtan2( float y, float x )
     62 {
     63     float ax = fabs(x), ay = fabs(y);
     64     float a, c, c2;
     65     if( ax >= ay )
     66     {
     67         c = ay/(ax + _DBL_EPSILON);
     68         c2 = c*c;
     69         a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
     70     }
     71     else
     72     {
     73         c = ax/(ay + _DBL_EPSILON);
     74         c2 = c*c;
     75         a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
     76     }
     77     if( x < 0 )
     78         a = 180.f - a;
     79     if( y < 0 )
     80         a = 360.f - a;
     81     return a;
     82 }
     83 
     84 
     85 __kernel void
     86 ORB_ICAngle(__global const uchar* imgbuf, int imgstep, int imgoffset0,
     87             __global const int* layerinfo, __global const int* keypoints,
     88             __global float* responses, const __global int* u_max,
     89             int nkeypoints, int half_k )
     90 {
     91     int idx = get_global_id(0);
     92     if( idx < nkeypoints )
     93     {
     94         __global const int* kpt = keypoints + idx*KEYPOINT_SIZE;
     95 
     96         __global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE;
     97         __global const uchar* center = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] +
     98             kpt[KEYPOINT_Y]*imgstep + kpt[KEYPOINT_X];
     99 
    100         int u, v, m_01 = 0, m_10 = 0;
    101 
    102         // Treat the center line differently, v=0
    103         for( u = -half_k; u <= half_k; u++ )
    104             m_10 += u * center[u];
    105 
    106         // Go line by line in the circular patch
    107         for( v = 1; v <= half_k; v++ )
    108         {
    109             // Proceed over the two lines
    110             int v_sum = 0;
    111             int d = u_max[v];
    112             for( u = -d; u <= d; u++ )
    113             {
    114                 int val_plus = center[u + v*imgstep], val_minus = center[u - v*imgstep];
    115                 v_sum += (val_plus - val_minus);
    116                 m_10 += u * (val_plus + val_minus);
    117             }
    118             m_01 += v * v_sum;
    119         }
    120 
    121         // we do not use OpenCL's atan2 intrinsic,
    122         // because we want to get _exactly_ the same results as the CPU version
    123         responses[idx] = fastAtan2((float)m_01, (float)m_10);
    124     }
    125 }
    126 
    127 #endif
    128 
    129 /////////////////////////////////////////////////////////////
    130 
    131 #ifdef ORB_DESCRIPTORS
    132 
    133 __kernel void
    134 ORB_computeDescriptor(__global const uchar* imgbuf, int imgstep, int imgoffset0,
    135                       __global const int* layerinfo, __global const int* keypoints,
    136                       __global uchar* _desc, const __global int* pattern,
    137                       int nkeypoints, int dsize )
    138 {
    139     int idx = get_global_id(0);
    140     if( idx < nkeypoints )
    141     {
    142         int i;
    143         __global const int* kpt = keypoints + idx*ORIENTED_KEYPOINT_SIZE;
    144 
    145         __global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE;
    146         __global const uchar* center = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] +
    147                                 kpt[KEYPOINT_Y]*imgstep + kpt[KEYPOINT_X];
    148         float angle = as_float(kpt[KEYPOINT_ANGLE]);
    149         angle *= 0.01745329251994329547f;
    150 
    151         float cosa;
    152         float sina = sincos(angle, &cosa);
    153 
    154         __global uchar* desc = _desc + idx*dsize;
    155 
    156         #define GET_VALUE(idx) \
    157             center[mad24(convert_int_rte(pattern[(idx)*2] * sina + pattern[(idx)*2+1] * cosa), imgstep, \
    158                         convert_int_rte(pattern[(idx)*2] * cosa - pattern[(idx)*2+1] * sina))]
    159 
    160         for( i = 0; i < dsize; i++ )
    161         {
    162             int val;
    163         #if WTA_K == 2
    164             int t0, t1;
    165 
    166             t0 = GET_VALUE(0); t1 = GET_VALUE(1);
    167             val = t0 < t1;
    168 
    169             t0 = GET_VALUE(2); t1 = GET_VALUE(3);
    170             val |= (t0 < t1) << 1;
    171 
    172             t0 = GET_VALUE(4); t1 = GET_VALUE(5);
    173             val |= (t0 < t1) << 2;
    174 
    175             t0 = GET_VALUE(6); t1 = GET_VALUE(7);
    176             val |= (t0 < t1) << 3;
    177 
    178             t0 = GET_VALUE(8); t1 = GET_VALUE(9);
    179             val |= (t0 < t1) << 4;
    180 
    181             t0 = GET_VALUE(10); t1 = GET_VALUE(11);
    182             val |= (t0 < t1) << 5;
    183 
    184             t0 = GET_VALUE(12); t1 = GET_VALUE(13);
    185             val |= (t0 < t1) << 6;
    186 
    187             t0 = GET_VALUE(14); t1 = GET_VALUE(15);
    188             val |= (t0 < t1) << 7;
    189 
    190             pattern += 16*2;
    191 
    192         #elif WTA_K == 3
    193             int t0, t1, t2;
    194 
    195             t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);
    196             val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);
    197 
    198             t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);
    199             val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;
    200 
    201             t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);
    202             val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;
    203 
    204             t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);
    205             val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;
    206 
    207             pattern += 12*2;
    208 
    209         #elif WTA_K == 4
    210             int t0, t1, t2, t3, k;
    211             int a, b;
    212 
    213             t0 = GET_VALUE(0); t1 = GET_VALUE(1);
    214             t2 = GET_VALUE(2); t3 = GET_VALUE(3);
    215             a = 0, b = 2;
    216             if( t1 > t0 ) t0 = t1, a = 1;
    217             if( t3 > t2 ) t2 = t3, b = 3;
    218             k = t0 > t2 ? a : b;
    219             val = k;
    220 
    221             t0 = GET_VALUE(4); t1 = GET_VALUE(5);
    222             t2 = GET_VALUE(6); t3 = GET_VALUE(7);
    223             a = 0, b = 2;
    224             if( t1 > t0 ) t0 = t1, a = 1;
    225             if( t3 > t2 ) t2 = t3, b = 3;
    226             k = t0 > t2 ? a : b;
    227             val |= k << 2;
    228 
    229             t0 = GET_VALUE(8); t1 = GET_VALUE(9);
    230             t2 = GET_VALUE(10); t3 = GET_VALUE(11);
    231             a = 0, b = 2;
    232             if( t1 > t0 ) t0 = t1, a = 1;
    233             if( t3 > t2 ) t2 = t3, b = 3;
    234             k = t0 > t2 ? a : b;
    235             val |= k << 4;
    236 
    237             t0 = GET_VALUE(12); t1 = GET_VALUE(13);
    238             t2 = GET_VALUE(14); t3 = GET_VALUE(15);
    239             a = 0, b = 2;
    240             if( t1 > t0 ) t0 = t1, a = 1;
    241             if( t3 > t2 ) t2 = t3, b = 3;
    242             k = t0 > t2 ? a : b;
    243             val |= k << 6;
    244 
    245             pattern += 16*2;
    246         #else
    247             #error "unknown/undefined WTA_K value; should be 2, 3 or 4"
    248         #endif
    249             desc[i] = (uchar)val;
    250         }
    251     }
    252 }
    253 
    254 #endif
    255