1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved. 16 // Third party copyrights are property of their respective owners. 17 // 18 // Redistribution and use in source and binary forms, with or without modification, 19 // are permitted provided that the following conditions are met: 20 // 21 // * Redistribution's of source code must retain the above copyright notice, 22 // this list of conditions and the following disclaimer. 23 // 24 // * Redistribution's in binary form must reproduce the above copyright notice, 25 // this list of conditions and the following disclaimer in the documentation 26 // and/or other materials provided with the distribution. 27 // 28 // * The name of the copyright holders may not be used to endorse or promote products 29 // derived from this software without specific prior written permission. 30 // 31 // This software is provided by the copyright holders and contributors "as is" and 32 // any express or implied warranties, including, but not limited to, the implied 33 // warranties of merchantability and fitness for a particular purpose are disclaimed. 34 // In no event shall the Intel Corporation or contributors be liable for any direct, 35 // indirect, incidental, special, exemplary, or consequential damages 36 // (including, but not limited to, procurement of substitute goods or services; 37 // loss of use, data, or profits; or business interruption) however caused 38 // and on any theory of liability, whether in contract, strict liability, 39 // or tort (including negligence or otherwise) arising in any way out of 40 // the use of this software, even if advised of the possibility of such damage. 41 // 42 //M*/ 43 44 /* 45 This is a variation of 46 "Stereo Processing by Semiglobal Matching and Mutual Information" 47 by Heiko Hirschmuller. 48 49 We match blocks rather than individual pixels, thus the algorithm is called 50 SGBM (Semi-global block matching) 51 */ 52 53 #include "precomp.hpp" 54 #include <limits.h> 55 56 namespace cv 57 { 58 59 typedef uchar PixType; 60 typedef short CostType; 61 typedef short DispType; 62 63 enum { NR = 16, NR2 = NR/2 }; 64 65 66 struct StereoSGBMParams 67 { 68 StereoSGBMParams() 69 { 70 minDisparity = numDisparities = 0; 71 SADWindowSize = 0; 72 P1 = P2 = 0; 73 disp12MaxDiff = 0; 74 preFilterCap = 0; 75 uniquenessRatio = 0; 76 speckleWindowSize = 0; 77 speckleRange = 0; 78 mode = StereoSGBM::MODE_SGBM; 79 } 80 81 StereoSGBMParams( int _minDisparity, int _numDisparities, int _SADWindowSize, 82 int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap, 83 int _uniquenessRatio, int _speckleWindowSize, int _speckleRange, 84 int _mode ) 85 { 86 minDisparity = _minDisparity; 87 numDisparities = _numDisparities; 88 SADWindowSize = _SADWindowSize; 89 P1 = _P1; 90 P2 = _P2; 91 disp12MaxDiff = _disp12MaxDiff; 92 preFilterCap = _preFilterCap; 93 uniquenessRatio = _uniquenessRatio; 94 speckleWindowSize = _speckleWindowSize; 95 speckleRange = _speckleRange; 96 mode = _mode; 97 } 98 99 int minDisparity; 100 int numDisparities; 101 int SADWindowSize; 102 int preFilterCap; 103 int uniquenessRatio; 104 int P1; 105 int P2; 106 int speckleWindowSize; 107 int speckleRange; 108 int disp12MaxDiff; 109 int mode; 110 }; 111 112 /* 113 For each pixel row1[x], max(-maxD, 0) <= minX <= x < maxX <= width - max(0, -minD), 114 and for each disparity minD<=d<maxD the function 115 computes the cost (cost[(x-minX)*(maxD - minD) + (d - minD)]), depending on the difference between 116 row1[x] and row2[x-d]. The subpixel algorithm from 117 "Depth Discontinuities by Pixel-to-Pixel Stereo" by Stan Birchfield and C. Tomasi 118 is used, hence the suffix BT. 119 120 the temporary buffer should contain width2*2 elements 121 */ 122 static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y, 123 int minD, int maxD, CostType* cost, 124 PixType* buffer, const PixType* tab, 125 int tabOfs, int ) 126 { 127 int x, c, width = img1.cols, cn = img1.channels(); 128 int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0); 129 int minX2 = std::max(minX1 - maxD, 0), maxX2 = std::min(maxX1 - minD, width); 130 int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2; 131 const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y); 132 PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2; 133 134 tab += tabOfs; 135 136 for( c = 0; c < cn*2; c++ ) 137 { 138 prow1[width*c] = prow1[width*c + width-1] = 139 prow2[width*c] = prow2[width*c + width-1] = tab[0]; 140 } 141 142 int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0; 143 int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0; 144 145 if( cn == 1 ) 146 { 147 for( x = 1; x < width-1; x++ ) 148 { 149 prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]]; 150 prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]]; 151 152 prow1[x+width] = row1[x]; 153 prow2[width-1-x+width] = row2[x]; 154 } 155 } 156 else 157 { 158 for( x = 1; x < width-1; x++ ) 159 { 160 prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]]; 161 prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]]; 162 prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]]; 163 164 prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]]; 165 prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]]; 166 prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]]; 167 168 prow1[x+width*3] = row1[x*3]; 169 prow1[x+width*4] = row1[x*3+1]; 170 prow1[x+width*5] = row1[x*3+2]; 171 172 prow2[width-1-x+width*3] = row2[x*3]; 173 prow2[width-1-x+width*4] = row2[x*3+1]; 174 prow2[width-1-x+width*5] = row2[x*3+2]; 175 } 176 } 177 178 memset( cost, 0, width1*D*sizeof(cost[0]) ); 179 180 buffer -= minX2; 181 cost -= minX1*D + minD; // simplify the cost indices inside the loop 182 183 #if CV_SSE2 184 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2); 185 #endif 186 187 #if 1 188 for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width ) 189 { 190 int diff_scale = c < cn ? 0 : 2; 191 192 // precompute 193 // v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and 194 // v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and 195 for( x = minX2; x < maxX2; x++ ) 196 { 197 int v = prow2[x]; 198 int vl = x > 0 ? (v + prow2[x-1])/2 : v; 199 int vr = x < width-1 ? (v + prow2[x+1])/2 : v; 200 int v0 = std::min(vl, vr); v0 = std::min(v0, v); 201 int v1 = std::max(vl, vr); v1 = std::max(v1, v); 202 buffer[x] = (PixType)v0; 203 buffer[x + width2] = (PixType)v1; 204 } 205 206 for( x = minX1; x < maxX1; x++ ) 207 { 208 int u = prow1[x]; 209 int ul = x > 0 ? (u + prow1[x-1])/2 : u; 210 int ur = x < width-1 ? (u + prow1[x+1])/2 : u; 211 int u0 = std::min(ul, ur); u0 = std::min(u0, u); 212 int u1 = std::max(ul, ur); u1 = std::max(u1, u); 213 214 #if CV_SSE2 215 if( useSIMD ) 216 { 217 __m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0); 218 __m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128(); 219 __m128i ds = _mm_cvtsi32_si128(diff_scale); 220 221 for( int d = minD; d < maxD; d += 16 ) 222 { 223 __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d)); 224 __m128i _v0 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d)); 225 __m128i _v1 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d + width2)); 226 __m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u)); 227 __m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v)); 228 __m128i diff = _mm_min_epu8(c0, c1); 229 230 c0 = _mm_load_si128((__m128i*)(cost + x*D + d)); 231 c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8)); 232 233 _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds))); 234 _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds))); 235 } 236 } 237 else 238 #endif 239 { 240 for( int d = minD; d < maxD; d++ ) 241 { 242 int v = prow2[width-x-1 + d]; 243 int v0 = buffer[width-x-1 + d]; 244 int v1 = buffer[width-x-1 + d + width2]; 245 int c0 = std::max(0, u - v1); c0 = std::max(c0, v0 - u); 246 int c1 = std::max(0, v - u1); c1 = std::max(c1, u0 - v); 247 248 cost[x*D + d] = (CostType)(cost[x*D+d] + (std::min(c0, c1) >> diff_scale)); 249 } 250 } 251 } 252 } 253 #else 254 for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width ) 255 { 256 for( x = minX1; x < maxX1; x++ ) 257 { 258 int u = prow1[x]; 259 #if CV_SSE2 260 if( useSIMD ) 261 { 262 __m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128(); 263 264 for( int d = minD; d < maxD; d += 16 ) 265 { 266 __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d)); 267 __m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u)); 268 __m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d)); 269 __m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8)); 270 271 _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z))); 272 _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z))); 273 } 274 } 275 else 276 #endif 277 { 278 for( int d = minD; d < maxD; d++ ) 279 { 280 int v = prow2[width-1-x + d]; 281 cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v)); 282 } 283 } 284 } 285 } 286 #endif 287 } 288 289 290 /* 291 computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf. 292 that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y). 293 minD <= d < maxD. 294 disp2full is the reverse disparity map, that is: 295 disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y) 296 297 note that disp1buf will have the same size as the roi and 298 disp2full will have the same size as img1 (or img2). 299 On exit disp2buf is not the final disparity, it is an intermediate result that becomes 300 final after all the tiles are processed. 301 302 the disparity in disp1buf is written with sub-pixel accuracy 303 (4 fractional bits, see StereoSGBM::DISP_SCALE), 304 using quadratic interpolation, while the disparity in disp2buf 305 is written as is, without interpolation. 306 307 disp2cost also has the same size as img1 (or img2). 308 It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost. 309 */ 310 static void computeDisparitySGBM( const Mat& img1, const Mat& img2, 311 Mat& disp1, const StereoSGBMParams& params, 312 Mat& buffer ) 313 { 314 #if CV_SSE2 315 static const uchar LSBTab[] = 316 { 317 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 318 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 319 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 320 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 321 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 322 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 323 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 324 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 325 }; 326 327 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2); 328 #endif 329 330 const int ALIGN = 16; 331 const int DISP_SHIFT = StereoMatcher::DISP_SHIFT; 332 const int DISP_SCALE = (1 << DISP_SHIFT); 333 const CostType MAX_COST = SHRT_MAX; 334 335 int minD = params.minDisparity, maxD = minD + params.numDisparities; 336 Size SADWindowSize; 337 SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5; 338 int ftzero = std::max(params.preFilterCap, 15) | 1; 339 int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10; 340 int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1; 341 int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1); 342 int k, width = disp1.cols, height = disp1.rows; 343 int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0); 344 int D = maxD - minD, width1 = maxX1 - minX1; 345 int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE; 346 int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2; 347 bool fullDP = params.mode == StereoSGBM::MODE_HH; 348 int npasses = fullDP ? 2 : 1; 349 const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2; 350 PixType clipTab[TAB_SIZE]; 351 352 for( k = 0; k < TAB_SIZE; k++ ) 353 clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero); 354 355 if( minX1 >= maxX1 ) 356 { 357 disp1 = Scalar::all(INVALID_DISP_SCALED); 358 return; 359 } 360 361 CV_Assert( D % 16 == 0 ); 362 363 // NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8. 364 // if you change NR, please, modify the loop as well. 365 int D2 = D+16, NRD2 = NR2*D2; 366 367 // the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer: 368 // for 8-way dynamic programming we need the current row and 369 // the previous row, i.e. 2 rows in total 370 const int NLR = 2; 371 const int LrBorder = NLR - 1; 372 373 // for each possible stereo match (img1(x,y) <=> img2(x-d,y)) 374 // we keep pixel difference cost (C) and the summary cost over NR directions (S). 375 // we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k) 376 size_t costBufSize = width1*D; 377 size_t CSBufSize = costBufSize*(fullDP ? height : 1); 378 size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2; 379 int hsumBufNRows = SH2*2 + 2; 380 size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[] 381 costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff 382 CSBufSize*2*sizeof(CostType) + // C, S 383 width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost 384 width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2 385 386 if( buffer.empty() || !buffer.isContinuous() || 387 buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize ) 388 buffer.create(1, (int)totalBufSize, CV_8U); 389 390 // summary cost over different (nDirs) directions 391 CostType* Cbuf = (CostType*)alignPtr(buffer.ptr(), ALIGN); 392 CostType* Sbuf = Cbuf + CSBufSize; 393 CostType* hsumBuf = Sbuf + CSBufSize; 394 CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows; 395 396 CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR; 397 DispType* disp2ptr = (DispType*)(disp2cost + width); 398 PixType* tempBuf = (PixType*)(disp2ptr + width); 399 400 // add P2 to every C(x,y). it saves a few operations in the inner loops 401 for( k = 0; k < width1*D; k++ ) 402 Cbuf[k] = (CostType)P2; 403 404 for( int pass = 1; pass <= npasses; pass++ ) 405 { 406 int x1, y1, x2, y2, dx, dy; 407 408 if( pass == 1 ) 409 { 410 y1 = 0; y2 = height; dy = 1; 411 x1 = 0; x2 = width1; dx = 1; 412 } 413 else 414 { 415 y1 = height-1; y2 = -1; dy = -1; 416 x1 = width1-1; x2 = -1; dx = -1; 417 } 418 419 CostType *Lr[NLR]={0}, *minLr[NLR]={0}; 420 421 for( k = 0; k < NLR; k++ ) 422 { 423 // shift Lr[k] and minLr[k] pointers, because we allocated them with the borders, 424 // and will occasionally use negative indices with the arrays 425 // we need to shift Lr[k] pointers by 1, to give the space for d=-1. 426 // however, then the alignment will be imperfect, i.e. bad for SSE, 427 // thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment) 428 Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8; 429 memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) ); 430 minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*LrBorder; 431 memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) ); 432 } 433 434 for( int y = y1; y != y2; y += dy ) 435 { 436 int x, d; 437 DispType* disp1ptr = disp1.ptr<DispType>(y); 438 CostType* C = Cbuf + (!fullDP ? 0 : y*costBufSize); 439 CostType* S = Sbuf + (!fullDP ? 0 : y*costBufSize); 440 441 if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any. 442 { 443 int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1; 444 445 for( k = dy1; k <= dy2; k++ ) 446 { 447 CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize; 448 449 if( k < height ) 450 { 451 calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero ); 452 453 memset(hsumAdd, 0, D*sizeof(CostType)); 454 for( x = 0; x <= SW2*D; x += D ) 455 { 456 int scale = x == 0 ? SW2 + 1 : 1; 457 for( d = 0; d < D; d++ ) 458 hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale); 459 } 460 461 if( y > 0 ) 462 { 463 const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize; 464 const CostType* Cprev = !fullDP || y == 0 ? C : C - costBufSize; 465 466 for( x = D; x < width1*D; x += D ) 467 { 468 const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D); 469 const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0); 470 471 #if CV_SSE2 472 if( useSIMD ) 473 { 474 for( d = 0; d < D; d += 8 ) 475 { 476 __m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d)); 477 __m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d)); 478 hv = _mm_adds_epi16(_mm_subs_epi16(hv, 479 _mm_load_si128((const __m128i*)(pixSub + d))), 480 _mm_load_si128((const __m128i*)(pixAdd + d))); 481 Cx = _mm_adds_epi16(_mm_subs_epi16(Cx, 482 _mm_load_si128((const __m128i*)(hsumSub + x + d))), 483 hv); 484 _mm_store_si128((__m128i*)(hsumAdd + x + d), hv); 485 _mm_store_si128((__m128i*)(C + x + d), Cx); 486 } 487 } 488 else 489 #endif 490 { 491 for( d = 0; d < D; d++ ) 492 { 493 int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]); 494 C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]); 495 } 496 } 497 } 498 } 499 else 500 { 501 for( x = D; x < width1*D; x += D ) 502 { 503 const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D); 504 const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0); 505 506 for( d = 0; d < D; d++ ) 507 hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]); 508 } 509 } 510 } 511 512 if( y == 0 ) 513 { 514 int scale = k == 0 ? SH2 + 1 : 1; 515 for( x = 0; x < width1*D; x++ ) 516 C[x] = (CostType)(C[x] + hsumAdd[x]*scale); 517 } 518 } 519 520 // also, clear the S buffer 521 for( k = 0; k < width1*D; k++ ) 522 S[k] = 0; 523 } 524 525 // clear the left and the right borders 526 memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) ); 527 memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) ); 528 memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) ); 529 memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) ); 530 531 /* 532 [formula 13 in the paper] 533 compute L_r(p, d) = C(p, d) + 534 min(L_r(p-r, d), 535 L_r(p-r, d-1) + P1, 536 L_r(p-r, d+1) + P1, 537 min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k) 538 where p = (x,y), r is one of the directions. 539 we process all the directions at once: 540 0: r=(-dx, 0) 541 1: r=(-1, -dy) 542 2: r=(0, -dy) 543 3: r=(1, -dy) 544 4: r=(-2, -dy) 545 5: r=(-1, -dy*2) 546 6: r=(1, -dy*2) 547 7: r=(2, -dy) 548 */ 549 for( x = x1; x != x2; x += dx ) 550 { 551 int xm = x*NR2, xd = xm*D2; 552 553 int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2; 554 int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2; 555 556 CostType* Lr_p0 = Lr[0] + xd - dx*NRD2; 557 CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2; 558 CostType* Lr_p2 = Lr[1] + xd + D2*2; 559 CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3; 560 561 Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] = 562 Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST; 563 564 CostType* Lr_p = Lr[0] + xd; 565 const CostType* Cp = C + x*D; 566 CostType* Sp = S + x*D; 567 568 #if CV_SSE2 569 if( useSIMD ) 570 { 571 __m128i _P1 = _mm_set1_epi16((short)P1); 572 573 __m128i _delta0 = _mm_set1_epi16((short)delta0); 574 __m128i _delta1 = _mm_set1_epi16((short)delta1); 575 __m128i _delta2 = _mm_set1_epi16((short)delta2); 576 __m128i _delta3 = _mm_set1_epi16((short)delta3); 577 __m128i _minL0 = _mm_set1_epi16((short)MAX_COST); 578 579 for( d = 0; d < D; d += 8 ) 580 { 581 __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)); 582 __m128i L0, L1, L2, L3; 583 584 L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d)); 585 L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d)); 586 L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d)); 587 L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d)); 588 589 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1)); 590 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1)); 591 592 L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1)); 593 L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1)); 594 595 L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1)); 596 L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1)); 597 598 L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1)); 599 L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1)); 600 601 L0 = _mm_min_epi16(L0, _delta0); 602 L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd); 603 604 L1 = _mm_min_epi16(L1, _delta1); 605 L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd); 606 607 L2 = _mm_min_epi16(L2, _delta2); 608 L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd); 609 610 L3 = _mm_min_epi16(L3, _delta3); 611 L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd); 612 613 _mm_store_si128( (__m128i*)(Lr_p + d), L0); 614 _mm_store_si128( (__m128i*)(Lr_p + d + D2), L1); 615 _mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2); 616 _mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3); 617 618 __m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2)); 619 __m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3)); 620 t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1)); 621 _minL0 = _mm_min_epi16(_minL0, t0); 622 623 __m128i Sval = _mm_load_si128((const __m128i*)(Sp + d)); 624 625 L0 = _mm_adds_epi16(L0, L1); 626 L2 = _mm_adds_epi16(L2, L3); 627 Sval = _mm_adds_epi16(Sval, L0); 628 Sval = _mm_adds_epi16(Sval, L2); 629 630 _mm_store_si128((__m128i*)(Sp + d), Sval); 631 } 632 633 _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8)); 634 _mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0); 635 } 636 else 637 #endif 638 { 639 int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST; 640 641 for( d = 0; d < D; d++ ) 642 { 643 int Cpd = Cp[d], L0, L1, L2, L3; 644 645 L0 = Cpd + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0; 646 L1 = Cpd + std::min((int)Lr_p1[d], std::min(Lr_p1[d-1] + P1, std::min(Lr_p1[d+1] + P1, delta1))) - delta1; 647 L2 = Cpd + std::min((int)Lr_p2[d], std::min(Lr_p2[d-1] + P1, std::min(Lr_p2[d+1] + P1, delta2))) - delta2; 648 L3 = Cpd + std::min((int)Lr_p3[d], std::min(Lr_p3[d-1] + P1, std::min(Lr_p3[d+1] + P1, delta3))) - delta3; 649 650 Lr_p[d] = (CostType)L0; 651 minL0 = std::min(minL0, L0); 652 653 Lr_p[d + D2] = (CostType)L1; 654 minL1 = std::min(minL1, L1); 655 656 Lr_p[d + D2*2] = (CostType)L2; 657 minL2 = std::min(minL2, L2); 658 659 Lr_p[d + D2*3] = (CostType)L3; 660 minL3 = std::min(minL3, L3); 661 662 Sp[d] = saturate_cast<CostType>(Sp[d] + L0 + L1 + L2 + L3); 663 } 664 minLr[0][xm] = (CostType)minL0; 665 minLr[0][xm+1] = (CostType)minL1; 666 minLr[0][xm+2] = (CostType)minL2; 667 minLr[0][xm+3] = (CostType)minL3; 668 } 669 } 670 671 if( pass == npasses ) 672 { 673 for( x = 0; x < width; x++ ) 674 { 675 disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED; 676 disp2cost[x] = MAX_COST; 677 } 678 679 for( x = width1 - 1; x >= 0; x-- ) 680 { 681 CostType* Sp = S + x*D; 682 int minS = MAX_COST, bestDisp = -1; 683 684 if( npasses == 1 ) 685 { 686 int xm = x*NR2, xd = xm*D2; 687 688 int minL0 = MAX_COST; 689 int delta0 = minLr[0][xm + NR2] + P2; 690 CostType* Lr_p0 = Lr[0] + xd + NRD2; 691 Lr_p0[-1] = Lr_p0[D] = MAX_COST; 692 CostType* Lr_p = Lr[0] + xd; 693 694 const CostType* Cp = C + x*D; 695 696 #if CV_SSE2 697 if( useSIMD ) 698 { 699 __m128i _P1 = _mm_set1_epi16((short)P1); 700 __m128i _delta0 = _mm_set1_epi16((short)delta0); 701 702 __m128i _minL0 = _mm_set1_epi16((short)minL0); 703 __m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1); 704 __m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8); 705 706 for( d = 0; d < D; d += 8 ) 707 { 708 __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0; 709 710 L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d)); 711 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1)); 712 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1)); 713 L0 = _mm_min_epi16(L0, _delta0); 714 L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd); 715 716 _mm_store_si128((__m128i*)(Lr_p + d), L0); 717 _minL0 = _mm_min_epi16(_minL0, L0); 718 L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d)); 719 _mm_store_si128((__m128i*)(Sp + d), L0); 720 721 __m128i mask = _mm_cmpgt_epi16(_minS, L0); 722 _minS = _mm_min_epi16(_minS, L0); 723 _bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask)); 724 _d8 = _mm_adds_epi16(_d8, _8); 725 } 726 727 short CV_DECL_ALIGNED(16) bestDispBuf[8]; 728 _mm_store_si128((__m128i*)bestDispBuf, _bestDisp); 729 730 _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8)); 731 _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4)); 732 _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2)); 733 734 __m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8)); 735 qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4)); 736 qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2)); 737 738 minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0); 739 minS = (CostType)_mm_cvtsi128_si32(qS); 740 741 qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0); 742 qS = _mm_cmpeq_epi16(_minS, qS); 743 int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255; 744 745 bestDisp = bestDispBuf[LSBTab[idx]]; 746 } 747 else 748 #endif 749 { 750 for( d = 0; d < D; d++ ) 751 { 752 int L0 = Cp[d] + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0; 753 754 Lr_p[d] = (CostType)L0; 755 minL0 = std::min(minL0, L0); 756 757 int Sval = Sp[d] = saturate_cast<CostType>(Sp[d] + L0); 758 if( Sval < minS ) 759 { 760 minS = Sval; 761 bestDisp = d; 762 } 763 } 764 minLr[0][xm] = (CostType)minL0; 765 } 766 } 767 else 768 { 769 for( d = 0; d < D; d++ ) 770 { 771 int Sval = Sp[d]; 772 if( Sval < minS ) 773 { 774 minS = Sval; 775 bestDisp = d; 776 } 777 } 778 } 779 780 for( d = 0; d < D; d++ ) 781 { 782 if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 ) 783 break; 784 } 785 if( d < D ) 786 continue; 787 d = bestDisp; 788 int _x2 = x + minX1 - d - minD; 789 if( disp2cost[_x2] > minS ) 790 { 791 disp2cost[_x2] = (CostType)minS; 792 disp2ptr[_x2] = (DispType)(d + minD); 793 } 794 795 if( 0 < d && d < D-1 ) 796 { 797 // do subpixel quadratic interpolation: 798 // fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1]) 799 // then find minimum of the parabola. 800 int denom2 = std::max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1); 801 d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2); 802 } 803 else 804 d *= DISP_SCALE; 805 disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE); 806 } 807 808 for( x = minX1; x < maxX1; x++ ) 809 { 810 // we round the computed disparity both towards -inf and +inf and check 811 // if either of the corresponding disparities in disp2 is consistent. 812 // This is to give the computed disparity a chance to look valid if it is. 813 int d1 = disp1ptr[x]; 814 if( d1 == INVALID_DISP_SCALED ) 815 continue; 816 int _d = d1 >> DISP_SHIFT; 817 int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT; 818 int _x = x - _d, x_ = x - d_; 819 if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff && 820 0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff ) 821 disp1ptr[x] = (DispType)INVALID_DISP_SCALED; 822 } 823 } 824 825 // now shift the cyclic buffers 826 std::swap( Lr[0], Lr[1] ); 827 std::swap( minLr[0], minLr[1] ); 828 } 829 } 830 } 831 832 class StereoSGBMImpl : public StereoSGBM 833 { 834 public: 835 StereoSGBMImpl() 836 { 837 params = StereoSGBMParams(); 838 } 839 840 StereoSGBMImpl( int _minDisparity, int _numDisparities, int _SADWindowSize, 841 int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap, 842 int _uniquenessRatio, int _speckleWindowSize, int _speckleRange, 843 int _mode ) 844 { 845 params = StereoSGBMParams( _minDisparity, _numDisparities, _SADWindowSize, 846 _P1, _P2, _disp12MaxDiff, _preFilterCap, 847 _uniquenessRatio, _speckleWindowSize, _speckleRange, 848 _mode ); 849 } 850 851 void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr ) 852 { 853 Mat left = leftarr.getMat(), right = rightarr.getMat(); 854 CV_Assert( left.size() == right.size() && left.type() == right.type() && 855 left.depth() == CV_8U ); 856 857 disparr.create( left.size(), CV_16S ); 858 Mat disp = disparr.getMat(); 859 860 computeDisparitySGBM( left, right, disp, params, buffer ); 861 medianBlur(disp, disp, 3); 862 863 if( params.speckleWindowSize > 0 ) 864 filterSpeckles(disp, (params.minDisparity - 1)*StereoMatcher::DISP_SCALE, params.speckleWindowSize, 865 StereoMatcher::DISP_SCALE*params.speckleRange, buffer); 866 } 867 868 int getMinDisparity() const { return params.minDisparity; } 869 void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; } 870 871 int getNumDisparities() const { return params.numDisparities; } 872 void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; } 873 874 int getBlockSize() const { return params.SADWindowSize; } 875 void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; } 876 877 int getSpeckleWindowSize() const { return params.speckleWindowSize; } 878 void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; } 879 880 int getSpeckleRange() const { return params.speckleRange; } 881 void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; } 882 883 int getDisp12MaxDiff() const { return params.disp12MaxDiff; } 884 void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; } 885 886 int getPreFilterCap() const { return params.preFilterCap; } 887 void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; } 888 889 int getUniquenessRatio() const { return params.uniquenessRatio; } 890 void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; } 891 892 int getP1() const { return params.P1; } 893 void setP1(int P1) { params.P1 = P1; } 894 895 int getP2() const { return params.P2; } 896 void setP2(int P2) { params.P2 = P2; } 897 898 int getMode() const { return params.mode; } 899 void setMode(int mode) { params.mode = mode; } 900 901 void write(FileStorage& fs) const 902 { 903 fs << "name" << name_ 904 << "minDisparity" << params.minDisparity 905 << "numDisparities" << params.numDisparities 906 << "blockSize" << params.SADWindowSize 907 << "speckleWindowSize" << params.speckleWindowSize 908 << "speckleRange" << params.speckleRange 909 << "disp12MaxDiff" << params.disp12MaxDiff 910 << "preFilterCap" << params.preFilterCap 911 << "uniquenessRatio" << params.uniquenessRatio 912 << "P1" << params.P1 913 << "P2" << params.P2 914 << "mode" << params.mode; 915 } 916 917 void read(const FileNode& fn) 918 { 919 FileNode n = fn["name"]; 920 CV_Assert( n.isString() && String(n) == name_ ); 921 params.minDisparity = (int)fn["minDisparity"]; 922 params.numDisparities = (int)fn["numDisparities"]; 923 params.SADWindowSize = (int)fn["blockSize"]; 924 params.speckleWindowSize = (int)fn["speckleWindowSize"]; 925 params.speckleRange = (int)fn["speckleRange"]; 926 params.disp12MaxDiff = (int)fn["disp12MaxDiff"]; 927 params.preFilterCap = (int)fn["preFilterCap"]; 928 params.uniquenessRatio = (int)fn["uniquenessRatio"]; 929 params.P1 = (int)fn["P1"]; 930 params.P2 = (int)fn["P2"]; 931 params.mode = (int)fn["mode"]; 932 } 933 934 StereoSGBMParams params; 935 Mat buffer; 936 static const char* name_; 937 }; 938 939 const char* StereoSGBMImpl::name_ = "StereoMatcher.SGBM"; 940 941 942 Ptr<StereoSGBM> StereoSGBM::create(int minDisparity, int numDisparities, int SADWindowSize, 943 int P1, int P2, int disp12MaxDiff, 944 int preFilterCap, int uniquenessRatio, 945 int speckleWindowSize, int speckleRange, 946 int mode) 947 { 948 return Ptr<StereoSGBM>( 949 new StereoSGBMImpl(minDisparity, numDisparities, SADWindowSize, 950 P1, P2, disp12MaxDiff, 951 preFilterCap, uniquenessRatio, 952 speckleWindowSize, speckleRange, 953 mode)); 954 } 955 956 Rect getValidDisparityROI( Rect roi1, Rect roi2, 957 int minDisparity, 958 int numberOfDisparities, 959 int SADWindowSize ) 960 { 961 int SW2 = SADWindowSize/2; 962 int minD = minDisparity, maxD = minDisparity + numberOfDisparities - 1; 963 964 int xmin = std::max(roi1.x, roi2.x + maxD) + SW2; 965 int xmax = std::min(roi1.x + roi1.width, roi2.x + roi2.width - minD) - SW2; 966 int ymin = std::max(roi1.y, roi2.y) + SW2; 967 int ymax = std::min(roi1.y + roi1.height, roi2.y + roi2.height) - SW2; 968 969 Rect r(xmin, ymin, xmax - xmin, ymax - ymin); 970 971 return r.width > 0 && r.height > 0 ? r : Rect(); 972 } 973 974 typedef cv::Point_<short> Point2s; 975 976 template <typename T> 977 void filterSpecklesImpl(cv::Mat& img, int newVal, int maxSpeckleSize, int maxDiff, cv::Mat& _buf) 978 { 979 using namespace cv; 980 981 int width = img.cols, height = img.rows, npixels = width*height; 982 size_t bufSize = npixels*(int)(sizeof(Point2s) + sizeof(int) + sizeof(uchar)); 983 if( !_buf.isContinuous() || _buf.empty() || _buf.cols*_buf.rows*_buf.elemSize() < bufSize ) 984 _buf.create(1, (int)bufSize, CV_8U); 985 986 uchar* buf = _buf.ptr(); 987 int i, j, dstep = (int)(img.step/sizeof(T)); 988 int* labels = (int*)buf; 989 buf += npixels*sizeof(labels[0]); 990 Point2s* wbuf = (Point2s*)buf; 991 buf += npixels*sizeof(wbuf[0]); 992 uchar* rtype = (uchar*)buf; 993 int curlabel = 0; 994 995 // clear out label assignments 996 memset(labels, 0, npixels*sizeof(labels[0])); 997 998 for( i = 0; i < height; i++ ) 999 { 1000 T* ds = img.ptr<T>(i); 1001 int* ls = labels + width*i; 1002 1003 for( j = 0; j < width; j++ ) 1004 { 1005 if( ds[j] != newVal ) // not a bad disparity 1006 { 1007 if( ls[j] ) // has a label, check for bad label 1008 { 1009 if( rtype[ls[j]] ) // small region, zero out disparity 1010 ds[j] = (T)newVal; 1011 } 1012 // no label, assign and propagate 1013 else 1014 { 1015 Point2s* ws = wbuf; // initialize wavefront 1016 Point2s p((short)j, (short)i); // current pixel 1017 curlabel++; // next label 1018 int count = 0; // current region size 1019 ls[j] = curlabel; 1020 1021 // wavefront propagation 1022 while( ws >= wbuf ) // wavefront not empty 1023 { 1024 count++; 1025 // put neighbors onto wavefront 1026 T* dpp = &img.at<T>(p.y, p.x); 1027 T dp = *dpp; 1028 int* lpp = labels + width*p.y + p.x; 1029 1030 if( p.y < height-1 && !lpp[+width] && dpp[+dstep] != newVal && std::abs(dp - dpp[+dstep]) <= maxDiff ) 1031 { 1032 lpp[+width] = curlabel; 1033 *ws++ = Point2s(p.x, p.y+1); 1034 } 1035 1036 if( p.y > 0 && !lpp[-width] && dpp[-dstep] != newVal && std::abs(dp - dpp[-dstep]) <= maxDiff ) 1037 { 1038 lpp[-width] = curlabel; 1039 *ws++ = Point2s(p.x, p.y-1); 1040 } 1041 1042 if( p.x < width-1 && !lpp[+1] && dpp[+1] != newVal && std::abs(dp - dpp[+1]) <= maxDiff ) 1043 { 1044 lpp[+1] = curlabel; 1045 *ws++ = Point2s(p.x+1, p.y); 1046 } 1047 1048 if( p.x > 0 && !lpp[-1] && dpp[-1] != newVal && std::abs(dp - dpp[-1]) <= maxDiff ) 1049 { 1050 lpp[-1] = curlabel; 1051 *ws++ = Point2s(p.x-1, p.y); 1052 } 1053 1054 // pop most recent and propagate 1055 // NB: could try least recent, maybe better convergence 1056 p = *--ws; 1057 } 1058 1059 // assign label type 1060 if( count <= maxSpeckleSize ) // speckle region 1061 { 1062 rtype[ls[j]] = 1; // small region label 1063 ds[j] = (T)newVal; 1064 } 1065 else 1066 rtype[ls[j]] = 0; // large region label 1067 } 1068 } 1069 } 1070 } 1071 } 1072 1073 } 1074 1075 void cv::filterSpeckles( InputOutputArray _img, double _newval, int maxSpeckleSize, 1076 double _maxDiff, InputOutputArray __buf ) 1077 { 1078 Mat img = _img.getMat(); 1079 int type = img.type(); 1080 Mat temp, &_buf = __buf.needed() ? __buf.getMatRef() : temp; 1081 CV_Assert( type == CV_8UC1 || type == CV_16SC1 ); 1082 1083 int newVal = cvRound(_newval), maxDiff = cvRound(_maxDiff); 1084 1085 #if IPP_VERSION_X100 >= 801 1086 CV_IPP_CHECK() 1087 { 1088 Ipp32s bufsize = 0; 1089 IppiSize roisize = { img.cols, img.rows }; 1090 IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp16s; 1091 1092 if (!__buf.needed() && (type == CV_8UC1 || type == CV_16SC1)) 1093 { 1094 IppStatus status = ippiMarkSpecklesGetBufferSize(roisize, datatype, CV_MAT_CN(type), &bufsize); 1095 Ipp8u * buffer = ippsMalloc_8u(bufsize); 1096 1097 if ((int)status >= 0) 1098 { 1099 if (type == CV_8UC1) 1100 status = ippiMarkSpeckles_8u_C1IR(img.ptr<Ipp8u>(), (int)img.step, roisize, 1101 (Ipp8u)newVal, maxSpeckleSize, (Ipp8u)maxDiff, ippiNormL1, buffer); 1102 else 1103 status = ippiMarkSpeckles_16s_C1IR(img.ptr<Ipp16s>(), (int)img.step, roisize, 1104 (Ipp16s)newVal, maxSpeckleSize, (Ipp16s)maxDiff, ippiNormL1, buffer); 1105 } 1106 1107 if (status >= 0) 1108 { 1109 CV_IMPL_ADD(CV_IMPL_IPP); 1110 return; 1111 } 1112 setIppErrorStatus(); 1113 } 1114 } 1115 #endif 1116 1117 if (type == CV_8UC1) 1118 filterSpecklesImpl<uchar>(img, newVal, maxSpeckleSize, maxDiff, _buf); 1119 else 1120 filterSpecklesImpl<short>(img, newVal, maxSpeckleSize, maxDiff, _buf); 1121 } 1122 1123 void cv::validateDisparity( InputOutputArray _disp, InputArray _cost, int minDisparity, 1124 int numberOfDisparities, int disp12MaxDiff ) 1125 { 1126 Mat disp = _disp.getMat(), cost = _cost.getMat(); 1127 int cols = disp.cols, rows = disp.rows; 1128 int minD = minDisparity, maxD = minDisparity + numberOfDisparities; 1129 int x, minX1 = std::max(maxD, 0), maxX1 = cols + std::min(minD, 0); 1130 AutoBuffer<int> _disp2buf(cols*2); 1131 int* disp2buf = _disp2buf; 1132 int* disp2cost = disp2buf + cols; 1133 const int DISP_SHIFT = 4, DISP_SCALE = 1 << DISP_SHIFT; 1134 int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE; 1135 int costType = cost.type(); 1136 1137 disp12MaxDiff *= DISP_SCALE; 1138 1139 CV_Assert( numberOfDisparities > 0 && disp.type() == CV_16S && 1140 (costType == CV_16S || costType == CV_32S) && 1141 disp.size() == cost.size() ); 1142 1143 for( int y = 0; y < rows; y++ ) 1144 { 1145 short* dptr = disp.ptr<short>(y); 1146 1147 for( x = 0; x < cols; x++ ) 1148 { 1149 disp2buf[x] = INVALID_DISP_SCALED; 1150 disp2cost[x] = INT_MAX; 1151 } 1152 1153 if( costType == CV_16S ) 1154 { 1155 const short* cptr = cost.ptr<short>(y); 1156 1157 for( x = minX1; x < maxX1; x++ ) 1158 { 1159 int d = dptr[x], c = cptr[x]; 1160 int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT); 1161 1162 if( disp2cost[x2] > c ) 1163 { 1164 disp2cost[x2] = c; 1165 disp2buf[x2] = d; 1166 } 1167 } 1168 } 1169 else 1170 { 1171 const int* cptr = cost.ptr<int>(y); 1172 1173 for( x = minX1; x < maxX1; x++ ) 1174 { 1175 int d = dptr[x], c = cptr[x]; 1176 int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT); 1177 1178 if( disp2cost[x2] < c ) 1179 { 1180 disp2cost[x2] = c; 1181 disp2buf[x2] = d; 1182 } 1183 } 1184 } 1185 1186 for( x = minX1; x < maxX1; x++ ) 1187 { 1188 // we round the computed disparity both towards -inf and +inf and check 1189 // if either of the corresponding disparities in disp2 is consistent. 1190 // This is to give the computed disparity a chance to look valid if it is. 1191 int d = dptr[x]; 1192 if( d == INVALID_DISP_SCALED ) 1193 continue; 1194 int d0 = d >> DISP_SHIFT; 1195 int d1 = (d + DISP_SCALE-1) >> DISP_SHIFT; 1196 int x0 = x - d0, x1 = x - d1; 1197 if( (0 <= x0 && x0 < cols && disp2buf[x0] > INVALID_DISP_SCALED && std::abs(disp2buf[x0] - d) > disp12MaxDiff) && 1198 (0 <= x1 && x1 < cols && disp2buf[x1] > INVALID_DISP_SCALED && std::abs(disp2buf[x1] - d) > disp12MaxDiff) ) 1199 dptr[x] = (short)INVALID_DISP_SCALED; 1200 } 1201 } 1202 } 1203