1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #include "precomp.hpp" 44 45 using namespace cv; 46 using namespace cv::cuda; 47 48 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) 49 50 void cv::cuda::StereoConstantSpaceBP::estimateRecommendedParams(int, int, int&, int&, int&, int&) { throw_no_cuda(); } 51 52 Ptr<cuda::StereoConstantSpaceBP> cv::cuda::createStereoConstantSpaceBP(int, int, int, int, int) { throw_no_cuda(); return Ptr<cuda::StereoConstantSpaceBP>(); } 53 54 #else /* !defined (HAVE_CUDA) */ 55 56 #include "cuda/stereocsbp.hpp" 57 58 namespace 59 { 60 class StereoCSBPImpl : public cuda::StereoConstantSpaceBP 61 { 62 public: 63 StereoCSBPImpl(int ndisp, int iters, int levels, int nr_plane, int msg_type); 64 65 void compute(InputArray left, InputArray right, OutputArray disparity); 66 void compute(InputArray left, InputArray right, OutputArray disparity, Stream& stream); 67 void compute(InputArray data, OutputArray disparity, Stream& stream); 68 69 int getMinDisparity() const { return min_disp_th_; } 70 void setMinDisparity(int minDisparity) { min_disp_th_ = minDisparity; } 71 72 int getNumDisparities() const { return ndisp_; } 73 void setNumDisparities(int numDisparities) { ndisp_ = numDisparities; } 74 75 int getBlockSize() const { return 0; } 76 void setBlockSize(int /*blockSize*/) {} 77 78 int getSpeckleWindowSize() const { return 0; } 79 void setSpeckleWindowSize(int /*speckleWindowSize*/) {} 80 81 int getSpeckleRange() const { return 0; } 82 void setSpeckleRange(int /*speckleRange*/) {} 83 84 int getDisp12MaxDiff() const { return 0; } 85 void setDisp12MaxDiff(int /*disp12MaxDiff*/) {} 86 87 int getNumIters() const { return iters_; } 88 void setNumIters(int iters) { iters_ = iters; } 89 90 int getNumLevels() const { return levels_; } 91 void setNumLevels(int levels) { levels_ = levels; } 92 93 double getMaxDataTerm() const { return max_data_term_; } 94 void setMaxDataTerm(double max_data_term) { max_data_term_ = (float) max_data_term; } 95 96 double getDataWeight() const { return data_weight_; } 97 void setDataWeight(double data_weight) { data_weight_ = (float) data_weight; } 98 99 double getMaxDiscTerm() const { return max_disc_term_; } 100 void setMaxDiscTerm(double max_disc_term) { max_disc_term_ = (float) max_disc_term; } 101 102 double getDiscSingleJump() const { return disc_single_jump_; } 103 void setDiscSingleJump(double disc_single_jump) { disc_single_jump_ = (float) disc_single_jump; } 104 105 int getMsgType() const { return msg_type_; } 106 void setMsgType(int msg_type) { msg_type_ = msg_type; } 107 108 int getNrPlane() const { return nr_plane_; } 109 void setNrPlane(int nr_plane) { nr_plane_ = nr_plane; } 110 111 bool getUseLocalInitDataCost() const { return use_local_init_data_cost_; } 112 void setUseLocalInitDataCost(bool use_local_init_data_cost) { use_local_init_data_cost_ = use_local_init_data_cost; } 113 114 private: 115 int min_disp_th_; 116 int ndisp_; 117 int iters_; 118 int levels_; 119 float max_data_term_; 120 float data_weight_; 121 float max_disc_term_; 122 float disc_single_jump_; 123 int msg_type_; 124 int nr_plane_; 125 bool use_local_init_data_cost_; 126 127 GpuMat mbuf_; 128 GpuMat temp_; 129 GpuMat outBuf_; 130 }; 131 132 const float DEFAULT_MAX_DATA_TERM = 30.0f; 133 const float DEFAULT_DATA_WEIGHT = 1.0f; 134 const float DEFAULT_MAX_DISC_TERM = 160.0f; 135 const float DEFAULT_DISC_SINGLE_JUMP = 10.0f; 136 137 StereoCSBPImpl::StereoCSBPImpl(int ndisp, int iters, int levels, int nr_plane, int msg_type) : 138 min_disp_th_(0), ndisp_(ndisp), iters_(iters), levels_(levels), 139 max_data_term_(DEFAULT_MAX_DATA_TERM), data_weight_(DEFAULT_DATA_WEIGHT), 140 max_disc_term_(DEFAULT_MAX_DISC_TERM), disc_single_jump_(DEFAULT_DISC_SINGLE_JUMP), 141 msg_type_(msg_type), nr_plane_(nr_plane), use_local_init_data_cost_(true) 142 { 143 } 144 145 void StereoCSBPImpl::compute(InputArray left, InputArray right, OutputArray disparity) 146 { 147 compute(left, right, disparity, Stream::Null()); 148 } 149 150 void StereoCSBPImpl::compute(InputArray _left, InputArray _right, OutputArray disp, Stream& _stream) 151 { 152 using namespace cv::cuda::device::stereocsbp; 153 154 CV_Assert( msg_type_ == CV_32F || msg_type_ == CV_16S ); 155 CV_Assert( 0 < ndisp_ && 0 < iters_ && 0 < levels_ && 0 < nr_plane_ && levels_ <= 8 ); 156 157 GpuMat left = _left.getGpuMat(); 158 GpuMat right = _right.getGpuMat(); 159 160 CV_Assert( left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4 ); 161 CV_Assert( left.size() == right.size() && left.type() == right.type() ); 162 163 cudaStream_t stream = StreamAccessor::getStream(_stream); 164 165 //////////////////////////////////////////////////////////////////////////////////////////// 166 // Init 167 168 int rows = left.rows; 169 int cols = left.cols; 170 171 levels_ = std::min(levels_, int(log((double)ndisp_) / log(2.0))); 172 173 // compute sizes 174 AutoBuffer<int> buf(levels_ * 3); 175 int* cols_pyr = buf; 176 int* rows_pyr = cols_pyr + levels_; 177 int* nr_plane_pyr = rows_pyr + levels_; 178 179 cols_pyr[0] = cols; 180 rows_pyr[0] = rows; 181 nr_plane_pyr[0] = nr_plane_; 182 183 for (int i = 1; i < levels_; i++) 184 { 185 cols_pyr[i] = cols_pyr[i-1] / 2; 186 rows_pyr[i] = rows_pyr[i-1] / 2; 187 nr_plane_pyr[i] = nr_plane_pyr[i-1] * 2; 188 } 189 190 GpuMat u[2], d[2], l[2], r[2], disp_selected_pyr[2], data_cost, data_cost_selected; 191 192 //allocate buffers 193 int buffers_count = 10; // (up + down + left + right + disp_selected_pyr) * 2 194 buffers_count += 2; // data_cost has twice more rows than other buffers, what's why +2, not +1; 195 buffers_count += 1; // data_cost_selected 196 mbuf_.create(rows * nr_plane_ * buffers_count, cols, msg_type_); 197 198 data_cost = mbuf_.rowRange(0, rows * nr_plane_ * 2); 199 data_cost_selected = mbuf_.rowRange(data_cost.rows, data_cost.rows + rows * nr_plane_); 200 201 for(int k = 0; k < 2; ++k) // in/out 202 { 203 GpuMat sub1 = mbuf_.rowRange(data_cost.rows + data_cost_selected.rows, mbuf_.rows); 204 GpuMat sub2 = sub1.rowRange((k+0)*sub1.rows/2, (k+1)*sub1.rows/2); 205 206 GpuMat *buf_ptrs[] = { &u[k], &d[k], &l[k], &r[k], &disp_selected_pyr[k] }; 207 for(int _r = 0; _r < 5; ++_r) 208 { 209 *buf_ptrs[_r] = sub2.rowRange(_r * sub2.rows/5, (_r+1) * sub2.rows/5); 210 CV_DbgAssert( buf_ptrs[_r]->cols == cols && buf_ptrs[_r]->rows == rows * nr_plane_ ); 211 } 212 }; 213 214 size_t elem_step = mbuf_.step / mbuf_.elemSize(); 215 216 Size temp_size = data_cost.size(); 217 if ((size_t)temp_size.area() < elem_step * rows_pyr[levels_ - 1] * ndisp_) 218 temp_size = Size(static_cast<int>(elem_step), rows_pyr[levels_ - 1] * ndisp_); 219 220 temp_.create(temp_size, msg_type_); 221 222 //////////////////////////////////////////////////////////////////////////// 223 // Compute 224 225 l[0].setTo(0, _stream); 226 d[0].setTo(0, _stream); 227 r[0].setTo(0, _stream); 228 u[0].setTo(0, _stream); 229 230 l[1].setTo(0, _stream); 231 d[1].setTo(0, _stream); 232 r[1].setTo(0, _stream); 233 u[1].setTo(0, _stream); 234 235 data_cost.setTo(0, _stream); 236 data_cost_selected.setTo(0, _stream); 237 238 int cur_idx = 0; 239 240 if (msg_type_ == CV_32F) 241 { 242 for (int i = levels_ - 1; i >= 0; i--) 243 { 244 if (i == levels_ - 1) 245 { 246 init_data_cost(left.ptr<uchar>(), right.ptr<uchar>(), temp_.ptr<uchar>(), left.step, left.rows, left.cols, disp_selected_pyr[cur_idx].ptr<float>(), data_cost_selected.ptr<float>(), 247 elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), data_weight_, max_data_term_, min_disp_th_, use_local_init_data_cost_, stream); 248 } 249 else 250 { 251 compute_data_cost(left.ptr<uchar>(), right.ptr<uchar>(), left.step, disp_selected_pyr[cur_idx].ptr<float>(), data_cost.ptr<float>(), elem_step, 252 left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), data_weight_, max_data_term_, min_disp_th_, stream); 253 254 int new_idx = (cur_idx + 1) & 1; 255 256 init_message(temp_.ptr<uchar>(), 257 u[new_idx].ptr<float>(), d[new_idx].ptr<float>(), l[new_idx].ptr<float>(), r[new_idx].ptr<float>(), 258 u[cur_idx].ptr<float>(), d[cur_idx].ptr<float>(), l[cur_idx].ptr<float>(), r[cur_idx].ptr<float>(), 259 disp_selected_pyr[new_idx].ptr<float>(), disp_selected_pyr[cur_idx].ptr<float>(), 260 data_cost_selected.ptr<float>(), data_cost.ptr<float>(), elem_step, rows_pyr[i], 261 cols_pyr[i], nr_plane_pyr[i], rows_pyr[i+1], cols_pyr[i+1], nr_plane_pyr[i+1], stream); 262 263 cur_idx = new_idx; 264 } 265 266 calc_all_iterations(temp_.ptr<uchar>(), u[cur_idx].ptr<float>(), d[cur_idx].ptr<float>(), l[cur_idx].ptr<float>(), r[cur_idx].ptr<float>(), 267 data_cost_selected.ptr<float>(), disp_selected_pyr[cur_idx].ptr<float>(), elem_step, 268 rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, max_disc_term_, disc_single_jump_, stream); 269 } 270 } 271 else 272 { 273 for (int i = levels_ - 1; i >= 0; i--) 274 { 275 if (i == levels_ - 1) 276 { 277 init_data_cost(left.ptr<uchar>(), right.ptr<uchar>(), temp_.ptr<uchar>(), left.step, left.rows, left.cols, disp_selected_pyr[cur_idx].ptr<short>(), data_cost_selected.ptr<short>(), 278 elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), data_weight_, max_data_term_, min_disp_th_, use_local_init_data_cost_, stream); 279 } 280 else 281 { 282 compute_data_cost(left.ptr<uchar>(), right.ptr<uchar>(), left.step, disp_selected_pyr[cur_idx].ptr<short>(), data_cost.ptr<short>(), elem_step, 283 left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), data_weight_, max_data_term_, min_disp_th_, stream); 284 285 int new_idx = (cur_idx + 1) & 1; 286 287 init_message(temp_.ptr<uchar>(), 288 u[new_idx].ptr<short>(), d[new_idx].ptr<short>(), l[new_idx].ptr<short>(), r[new_idx].ptr<short>(), 289 u[cur_idx].ptr<short>(), d[cur_idx].ptr<short>(), l[cur_idx].ptr<short>(), r[cur_idx].ptr<short>(), 290 disp_selected_pyr[new_idx].ptr<short>(), disp_selected_pyr[cur_idx].ptr<short>(), 291 data_cost_selected.ptr<short>(), data_cost.ptr<short>(), elem_step, rows_pyr[i], 292 cols_pyr[i], nr_plane_pyr[i], rows_pyr[i+1], cols_pyr[i+1], nr_plane_pyr[i+1], stream); 293 294 cur_idx = new_idx; 295 } 296 297 calc_all_iterations(temp_.ptr<uchar>(), u[cur_idx].ptr<short>(), d[cur_idx].ptr<short>(), l[cur_idx].ptr<short>(), r[cur_idx].ptr<short>(), 298 data_cost_selected.ptr<short>(), disp_selected_pyr[cur_idx].ptr<short>(), elem_step, 299 rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, max_disc_term_, disc_single_jump_, stream); 300 } 301 } 302 303 const int dtype = disp.fixedType() ? disp.type() : CV_16SC1; 304 305 disp.create(rows, cols, dtype); 306 GpuMat out = disp.getGpuMat(); 307 308 if (dtype != CV_16SC1) 309 { 310 outBuf_.create(rows, cols, CV_16SC1); 311 out = outBuf_; 312 } 313 314 out.setTo(0, _stream); 315 316 if (msg_type_ == CV_32F) 317 { 318 compute_disp(u[cur_idx].ptr<float>(), d[cur_idx].ptr<float>(), l[cur_idx].ptr<float>(), r[cur_idx].ptr<float>(), 319 data_cost_selected.ptr<float>(), disp_selected_pyr[cur_idx].ptr<float>(), elem_step, out, nr_plane_pyr[0], stream); 320 } 321 else 322 { 323 compute_disp(u[cur_idx].ptr<short>(), d[cur_idx].ptr<short>(), l[cur_idx].ptr<short>(), r[cur_idx].ptr<short>(), 324 data_cost_selected.ptr<short>(), disp_selected_pyr[cur_idx].ptr<short>(), elem_step, out, nr_plane_pyr[0], stream); 325 } 326 327 if (dtype != CV_16SC1) 328 out.convertTo(disp, dtype, _stream); 329 } 330 331 void StereoCSBPImpl::compute(InputArray /*data*/, OutputArray /*disparity*/, Stream& /*stream*/) 332 { 333 CV_Error(Error::StsNotImplemented, "Not implemented"); 334 } 335 } 336 337 Ptr<cuda::StereoConstantSpaceBP> cv::cuda::createStereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, int msg_type) 338 { 339 return makePtr<StereoCSBPImpl>(ndisp, iters, levels, nr_plane, msg_type); 340 } 341 342 void cv::cuda::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane) 343 { 344 ndisp = (int) ((float) width / 3.14f); 345 if ((ndisp & 1) != 0) 346 ndisp++; 347 348 int mm = std::max(width, height); 349 iters = mm / 100 + ((mm > 1200)? - 4 : 4); 350 351 levels = (int)::log(static_cast<double>(mm)) * 2 / 3; 352 if (levels == 0) levels++; 353 354 nr_plane = (int) ((float) ndisp / std::pow(2.0, levels + 1)); 355 } 356 357 #endif /* !defined (HAVE_CUDA) */ 358