1 // OpenCL port of the FAST corner detector. 2 // Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org 3 4 inline int cornerScore(__global const uchar* img, int step) 5 { 6 int k, tofs, v = img[0], a0 = 0, b0; 7 int d[16]; 8 #define LOAD2(idx, ofs) \ 9 tofs = ofs; d[idx] = (short)(v - img[tofs]); d[idx+8] = (short)(v - img[-tofs]) 10 LOAD2(0, 3); 11 LOAD2(1, -step+3); 12 LOAD2(2, -step*2+2); 13 LOAD2(3, -step*3+1); 14 LOAD2(4, -step*3); 15 LOAD2(5, -step*3-1); 16 LOAD2(6, -step*2-2); 17 LOAD2(7, -step-3); 18 19 #pragma unroll 20 for( k = 0; k < 16; k += 2 ) 21 { 22 int a = min((int)d[(k+1)&15], (int)d[(k+2)&15]); 23 a = min(a, (int)d[(k+3)&15]); 24 a = min(a, (int)d[(k+4)&15]); 25 a = min(a, (int)d[(k+5)&15]); 26 a = min(a, (int)d[(k+6)&15]); 27 a = min(a, (int)d[(k+7)&15]); 28 a = min(a, (int)d[(k+8)&15]); 29 a0 = max(a0, min(a, (int)d[k&15])); 30 a0 = max(a0, min(a, (int)d[(k+9)&15])); 31 } 32 33 b0 = -a0; 34 #pragma unroll 35 for( k = 0; k < 16; k += 2 ) 36 { 37 int b = max((int)d[(k+1)&15], (int)d[(k+2)&15]); 38 b = max(b, (int)d[(k+3)&15]); 39 b = max(b, (int)d[(k+4)&15]); 40 b = max(b, (int)d[(k+5)&15]); 41 b = max(b, (int)d[(k+6)&15]); 42 b = max(b, (int)d[(k+7)&15]); 43 b = max(b, (int)d[(k+8)&15]); 44 45 b0 = min(b0, max(b, (int)d[k])); 46 b0 = min(b0, max(b, (int)d[(k+9)&15])); 47 } 48 49 return -b0-1; 50 } 51 52 __kernel 53 void FAST_findKeypoints( 54 __global const uchar * _img, int step, int img_offset, 55 int img_rows, int img_cols, 56 volatile __global int* kp_loc, 57 int max_keypoints, int threshold ) 58 { 59 int j = get_global_id(0) + 3; 60 int i = get_global_id(1) + 3; 61 62 if (i < img_rows - 3 && j < img_cols - 3) 63 { 64 __global const uchar* img = _img + mad24(i, step, j + img_offset); 65 int v = img[0], t0 = v - threshold, t1 = v + threshold; 66 int k, tofs, v0, v1; 67 int m0 = 0, m1 = 0; 68 69 #define UPDATE_MASK(idx, ofs) \ 70 tofs = ofs; v0 = img[tofs]; v1 = img[-tofs]; \ 71 m0 |= ((v0 < t0) << idx) | ((v1 < t0) << (8 + idx)); \ 72 m1 |= ((v0 > t1) << idx) | ((v1 > t1) << (8 + idx)) 73 74 UPDATE_MASK(0, 3); 75 if( (m0 | m1) == 0 ) 76 return; 77 78 UPDATE_MASK(2, -step*2+2); 79 UPDATE_MASK(4, -step*3); 80 UPDATE_MASK(6, -step*2-2); 81 82 #define EVEN_MASK (1+4+16+64) 83 84 if( ((m0 | (m0 >> 8)) & EVEN_MASK) != EVEN_MASK && 85 ((m1 | (m1 >> 8)) & EVEN_MASK) != EVEN_MASK ) 86 return; 87 88 UPDATE_MASK(1, -step+3); 89 UPDATE_MASK(3, -step*3+1); 90 UPDATE_MASK(5, -step*3-1); 91 UPDATE_MASK(7, -step-3); 92 if( ((m0 | (m0 >> 8)) & 255) != 255 && 93 ((m1 | (m1 >> 8)) & 255) != 255 ) 94 return; 95 96 m0 |= m0 << 16; 97 m1 |= m1 << 16; 98 99 #define CHECK0(i) ((m0 & (511 << i)) == (511 << i)) 100 #define CHECK1(i) ((m1 & (511 << i)) == (511 << i)) 101 102 if( CHECK0(0) + CHECK0(1) + CHECK0(2) + CHECK0(3) + 103 CHECK0(4) + CHECK0(5) + CHECK0(6) + CHECK0(7) + 104 CHECK0(8) + CHECK0(9) + CHECK0(10) + CHECK0(11) + 105 CHECK0(12) + CHECK0(13) + CHECK0(14) + CHECK0(15) + 106 107 CHECK1(0) + CHECK1(1) + CHECK1(2) + CHECK1(3) + 108 CHECK1(4) + CHECK1(5) + CHECK1(6) + CHECK1(7) + 109 CHECK1(8) + CHECK1(9) + CHECK1(10) + CHECK1(11) + 110 CHECK1(12) + CHECK1(13) + CHECK1(14) + CHECK1(15) == 0 ) 111 return; 112 113 { 114 int idx = atomic_inc(kp_loc); 115 if( idx < max_keypoints ) 116 { 117 kp_loc[1 + 2*idx] = j; 118 kp_loc[2 + 2*idx] = i; 119 } 120 } 121 } 122 } 123 124 /////////////////////////////////////////////////////////////////////////// 125 // nonmaxSupression 126 127 __kernel 128 void FAST_nonmaxSupression( 129 __global const int* kp_in, volatile __global int* kp_out, 130 __global const uchar * _img, int step, int img_offset, 131 int rows, int cols, int counter, int max_keypoints) 132 { 133 const int idx = get_global_id(0); 134 135 if (idx < counter) 136 { 137 int x = kp_in[1 + 2*idx]; 138 int y = kp_in[2 + 2*idx]; 139 __global const uchar* img = _img + mad24(y, step, x + img_offset); 140 141 int s = cornerScore(img, step); 142 143 if( (x < 4 || s > cornerScore(img-1, step)) + 144 (y < 4 || s > cornerScore(img-step, step)) != 2 ) 145 return; 146 if( (x >= cols - 4 || s > cornerScore(img+1, step)) + 147 (y >= rows - 4 || s > cornerScore(img+step, step)) + 148 (x < 4 || y < 4 || s > cornerScore(img-step-1, step)) + 149 (x >= cols - 4 || y < 4 || s > cornerScore(img-step+1, step)) + 150 (x < 4 || y >= rows - 4 || s > cornerScore(img+step-1, step)) + 151 (x >= cols - 4 || y >= rows - 4 || s > cornerScore(img+step+1, step)) == 6) 152 { 153 int new_idx = atomic_inc(kp_out); 154 if( new_idx < max_keypoints ) 155 { 156 kp_out[1 + 3*new_idx] = x; 157 kp_out[2 + 3*new_idx] = y; 158 kp_out[3 + 3*new_idx] = s; 159 } 160 } 161 } 162 } 163