1 /* 2 * 3 * select3obj.cpp With a calibration chessboard on a table, mark an object in a 3D box and 4 * track that object in all subseqent frames as long as the camera can see 5 * the chessboard. Also segments the object using the box projection. This 6 * program is useful for collecting large datasets of many views of an object 7 * on a table. 8 * 9 */ 10 11 #include "opencv2/core.hpp" 12 #include <opencv2/core/utility.hpp> 13 #include "opencv2/imgproc.hpp" 14 #include "opencv2/calib3d.hpp" 15 #include "opencv2/imgcodecs.hpp" 16 #include "opencv2/videoio.hpp" 17 #include "opencv2/highgui.hpp" 18 19 #include <ctype.h> 20 #include <stdio.h> 21 #include <stdlib.h> 22 23 using namespace std; 24 using namespace cv; 25 26 const char* helphelp = 27 "\nThis program's purpose is to collect data sets of an object and its segmentation mask.\n" 28 "\n" 29 "It shows how to use a calibrated camera together with a calibration pattern to\n" 30 "compute the homography of the plane the calibration pattern is on. It also shows grabCut\n" 31 "segmentation etc.\n" 32 "\n" 33 "select3dobj -w <board_width> -h <board_height> [-s <square_size>]\n" 34 " -i <camera_intrinsics_filename> -o <output_prefix> [video_filename/cameraId]\n" 35 "\n" 36 " -w <board_width> Number of chessboard corners wide\n" 37 " -h <board_height> Number of chessboard corners width\n" 38 " [-s <square_size>] Optional measure of chessboard squares in meters\n" 39 " -i <camera_intrinsics_filename> Camera matrix .yml file from calibration.cpp\n" 40 " -o <output_prefix> Prefix the output segmentation images with this\n" 41 " [video_filename/cameraId] If present, read from that video file or that ID\n" 42 "\n" 43 "Using a camera's intrinsics (from calibrating a camera -- see calibration.cpp) and an\n" 44 "image of the object sitting on a planar surface with a calibration pattern of\n" 45 "(board_width x board_height) on the surface, we draw a 3D box aroung the object. From\n" 46 "then on, we can move a camera and as long as it sees the chessboard calibration pattern,\n" 47 "it will store a mask of where the object is. We get succesive images using <output_prefix>\n" 48 "of the segmentation mask containing the object. This makes creating training sets easy.\n" 49 "It is best of the chessboard is odd x even in dimensions to avoid amiguous poses.\n" 50 "\n" 51 "The actions one can use while the program is running are:\n" 52 "\n" 53 " Select object as 3D box with the mouse.\n" 54 " First draw one line on the plane to outline the projection of that object on the plane\n" 55 " Then extend that line into a box to encompass the projection of that object onto the plane\n" 56 " The use the mouse again to extend the box upwards from the plane to encase the object.\n" 57 " Then use the following commands\n" 58 " ESC - Reset the selection\n" 59 " SPACE - Skip the frame; move to the next frame (not in video mode)\n" 60 " ENTER - Confirm the selection. Grab next object in video mode.\n" 61 " q - Exit the program\n" 62 "\n\n"; 63 64 // static void help() 65 // { 66 // puts(helphelp); 67 // } 68 69 70 struct MouseEvent 71 { 72 MouseEvent() { event = -1; buttonState = 0; } 73 Point pt; 74 int event; 75 int buttonState; 76 }; 77 78 static void onMouse(int event, int x, int y, int flags, void* userdata) 79 { 80 MouseEvent* data = (MouseEvent*)userdata; 81 data->event = event; 82 data->pt = Point(x,y); 83 data->buttonState = flags; 84 } 85 86 static bool readCameraMatrix(const string& filename, 87 Mat& cameraMatrix, Mat& distCoeffs, 88 Size& calibratedImageSize ) 89 { 90 FileStorage fs(filename, FileStorage::READ); 91 fs["image_width"] >> calibratedImageSize.width; 92 fs["image_height"] >> calibratedImageSize.height; 93 fs["distortion_coefficients"] >> distCoeffs; 94 fs["camera_matrix"] >> cameraMatrix; 95 96 if( distCoeffs.type() != CV_64F ) 97 distCoeffs = Mat_<double>(distCoeffs); 98 if( cameraMatrix.type() != CV_64F ) 99 cameraMatrix = Mat_<double>(cameraMatrix); 100 101 return true; 102 } 103 104 static void calcChessboardCorners(Size boardSize, float squareSize, vector<Point3f>& corners) 105 { 106 corners.resize(0); 107 108 for( int i = 0; i < boardSize.height; i++ ) 109 for( int j = 0; j < boardSize.width; j++ ) 110 corners.push_back(Point3f(float(j*squareSize), 111 float(i*squareSize), 0)); 112 } 113 114 115 static Point3f image2plane(Point2f imgpt, const Mat& R, const Mat& tvec, 116 const Mat& cameraMatrix, double Z) 117 { 118 Mat R1 = R.clone(); 119 R1.col(2) = R1.col(2)*Z + tvec; 120 Mat_<double> v = (cameraMatrix*R1).inv()*(Mat_<double>(3,1) << imgpt.x, imgpt.y, 1); 121 double iw = fabs(v(2,0)) > DBL_EPSILON ? 1./v(2,0) : 0; 122 return Point3f((float)(v(0,0)*iw), (float)(v(1,0)*iw), (float)Z); 123 } 124 125 126 static Rect extract3DBox(const Mat& frame, Mat& shownFrame, Mat& selectedObjFrame, 127 const Mat& cameraMatrix, const Mat& rvec, const Mat& tvec, 128 const vector<Point3f>& box, int nobjpt, bool runExtraSegmentation) 129 { 130 selectedObjFrame = Mat::zeros(frame.size(), frame.type()); 131 if( nobjpt == 0 ) 132 return Rect(); 133 vector<Point3f> objpt; 134 vector<Point2f> imgpt; 135 136 objpt.push_back(box[0]); 137 if( nobjpt > 1 ) 138 objpt.push_back(box[1]); 139 if( nobjpt > 2 ) 140 { 141 objpt.push_back(box[2]); 142 objpt.push_back(objpt[2] - objpt[1] + objpt[0]); 143 } 144 if( nobjpt > 3 ) 145 for( int i = 0; i < 4; i++ ) 146 objpt.push_back(Point3f(objpt[i].x, objpt[i].y, box[3].z)); 147 148 projectPoints(Mat(objpt), rvec, tvec, cameraMatrix, Mat(), imgpt); 149 150 if( !shownFrame.empty() ) 151 { 152 if( nobjpt == 1 ) 153 circle(shownFrame, imgpt[0], 3, Scalar(0,255,0), -1, LINE_AA); 154 else if( nobjpt == 2 ) 155 { 156 circle(shownFrame, imgpt[0], 3, Scalar(0,255,0), -1, LINE_AA); 157 circle(shownFrame, imgpt[1], 3, Scalar(0,255,0), -1, LINE_AA); 158 line(shownFrame, imgpt[0], imgpt[1], Scalar(0,255,0), 3, LINE_AA); 159 } 160 else if( nobjpt == 3 ) 161 for( int i = 0; i < 4; i++ ) 162 { 163 circle(shownFrame, imgpt[i], 3, Scalar(0,255,0), -1, LINE_AA); 164 line(shownFrame, imgpt[i], imgpt[(i+1)%4], Scalar(0,255,0), 3, LINE_AA); 165 } 166 else 167 for( int i = 0; i < 8; i++ ) 168 { 169 circle(shownFrame, imgpt[i], 3, Scalar(0,255,0), -1, LINE_AA); 170 line(shownFrame, imgpt[i], imgpt[(i+1)%4 + (i/4)*4], Scalar(0,255,0), 3, LINE_AA); 171 line(shownFrame, imgpt[i], imgpt[i%4], Scalar(0,255,0), 3, LINE_AA); 172 } 173 } 174 175 if( nobjpt <= 2 ) 176 return Rect(); 177 vector<Point> hull; 178 convexHull(Mat_<Point>(Mat(imgpt)), hull); 179 Mat selectedObjMask = Mat::zeros(frame.size(), CV_8U); 180 fillConvexPoly(selectedObjMask, &hull[0], (int)hull.size(), Scalar::all(255), 8, 0); 181 Rect roi = boundingRect(Mat(hull)) & Rect(Point(), frame.size()); 182 183 if( runExtraSegmentation ) 184 { 185 selectedObjMask = Scalar::all(GC_BGD); 186 fillConvexPoly(selectedObjMask, &hull[0], (int)hull.size(), Scalar::all(GC_PR_FGD), 8, 0); 187 Mat bgdModel, fgdModel; 188 grabCut(frame, selectedObjMask, roi, bgdModel, fgdModel, 189 3, GC_INIT_WITH_RECT + GC_INIT_WITH_MASK); 190 bitwise_and(selectedObjMask, Scalar::all(1), selectedObjMask); 191 } 192 193 frame.copyTo(selectedObjFrame, selectedObjMask); 194 return roi; 195 } 196 197 198 static int select3DBox(const string& windowname, const string& selWinName, const Mat& frame, 199 const Mat& cameraMatrix, const Mat& rvec, const Mat& tvec, 200 vector<Point3f>& box) 201 { 202 const float eps = 1e-3f; 203 MouseEvent mouse; 204 205 setMouseCallback(windowname, onMouse, &mouse); 206 vector<Point3f> tempobj(8); 207 vector<Point2f> imgpt(4), tempimg(8); 208 vector<Point> temphull; 209 int nobjpt = 0; 210 Mat R, selectedObjMask, selectedObjFrame, shownFrame; 211 Rodrigues(rvec, R); 212 box.resize(4); 213 214 for(;;) 215 { 216 float Z = 0.f; 217 bool dragging = (mouse.buttonState & EVENT_FLAG_LBUTTON) != 0; 218 int npt = nobjpt; 219 220 if( (mouse.event == EVENT_LBUTTONDOWN || 221 mouse.event == EVENT_LBUTTONUP || 222 dragging) && nobjpt < 4 ) 223 { 224 Point2f m = mouse.pt; 225 226 if( nobjpt < 2 ) 227 imgpt[npt] = m; 228 else 229 { 230 tempobj.resize(1); 231 int nearestIdx = npt-1; 232 if( nobjpt == 3 ) 233 { 234 nearestIdx = 0; 235 for( int i = 1; i < npt; i++ ) 236 if( norm(m - imgpt[i]) < norm(m - imgpt[nearestIdx]) ) 237 nearestIdx = i; 238 } 239 240 if( npt == 2 ) 241 { 242 float dx = box[1].x - box[0].x, dy = box[1].y - box[0].y; 243 float len = 1.f/std::sqrt(dx*dx+dy*dy); 244 tempobj[0] = Point3f(dy*len + box[nearestIdx].x, 245 -dx*len + box[nearestIdx].y, 0.f); 246 } 247 else 248 tempobj[0] = Point3f(box[nearestIdx].x, box[nearestIdx].y, 1.f); 249 250 projectPoints(Mat(tempobj), rvec, tvec, cameraMatrix, Mat(), tempimg); 251 252 Point2f a = imgpt[nearestIdx], b = tempimg[0], d1 = b - a, d2 = m - a; 253 float n1 = (float)norm(d1), n2 = (float)norm(d2); 254 if( n1*n2 < eps ) 255 imgpt[npt] = a; 256 else 257 { 258 Z = d1.dot(d2)/(n1*n1); 259 imgpt[npt] = d1*Z + a; 260 } 261 } 262 box[npt] = image2plane(imgpt[npt], R, tvec, cameraMatrix, npt<3 ? 0 : Z); 263 264 if( (npt == 0 && mouse.event == EVENT_LBUTTONDOWN) || 265 (npt > 0 && norm(box[npt] - box[npt-1]) > eps && 266 mouse.event == EVENT_LBUTTONUP) ) 267 { 268 nobjpt++; 269 if( nobjpt < 4 ) 270 { 271 imgpt[nobjpt] = imgpt[nobjpt-1]; 272 box[nobjpt] = box[nobjpt-1]; 273 } 274 } 275 276 // reset the event 277 mouse.event = -1; 278 //mouse.buttonState = 0; 279 npt++; 280 } 281 282 frame.copyTo(shownFrame); 283 extract3DBox(frame, shownFrame, selectedObjFrame, 284 cameraMatrix, rvec, tvec, box, npt, false); 285 imshow(windowname, shownFrame); 286 imshow(selWinName, selectedObjFrame); 287 288 int c = waitKey(30); 289 if( (c & 255) == 27 ) 290 { 291 nobjpt = 0; 292 } 293 if( c == 'q' || c == 'Q' || c == ' ' ) 294 { 295 box.clear(); 296 return c == ' ' ? -1 : -100; 297 } 298 if( (c == '\r' || c == '\n') && nobjpt == 4 && box[3].z != 0 ) 299 return 1; 300 } 301 } 302 303 304 static bool readModelViews( const string& filename, vector<Point3f>& box, 305 vector<string>& imagelist, 306 vector<Rect>& roiList, vector<Vec6f>& poseList ) 307 { 308 imagelist.resize(0); 309 roiList.resize(0); 310 poseList.resize(0); 311 box.resize(0); 312 313 FileStorage fs(filename, FileStorage::READ); 314 if( !fs.isOpened() ) 315 return false; 316 fs["box"] >> box; 317 318 FileNode all = fs["views"]; 319 if( all.type() != FileNode::SEQ ) 320 return false; 321 FileNodeIterator it = all.begin(), it_end = all.end(); 322 323 for(; it != it_end; ++it) 324 { 325 FileNode n = *it; 326 imagelist.push_back((string)n["image"]); 327 FileNode nr = n["rect"]; 328 roiList.push_back(Rect((int)nr[0], (int)nr[1], (int)nr[2], (int)nr[3])); 329 FileNode np = n["pose"]; 330 poseList.push_back(Vec6f((float)np[0], (float)np[1], (float)np[2], 331 (float)np[3], (float)np[4], (float)np[5])); 332 } 333 334 return true; 335 } 336 337 338 static bool writeModelViews(const string& filename, const vector<Point3f>& box, 339 const vector<string>& imagelist, 340 const vector<Rect>& roiList, 341 const vector<Vec6f>& poseList) 342 { 343 FileStorage fs(filename, FileStorage::WRITE); 344 if( !fs.isOpened() ) 345 return false; 346 347 fs << "box" << "[:"; 348 fs << box << "]" << "views" << "["; 349 350 size_t i, nviews = imagelist.size(); 351 352 CV_Assert( nviews == roiList.size() && nviews == poseList.size() ); 353 354 for( i = 0; i < nviews; i++ ) 355 { 356 Rect r = roiList[i]; 357 Vec6f p = poseList[i]; 358 359 fs << "{" << "image" << imagelist[i] << 360 "roi" << "[:" << r.x << r.y << r.width << r.height << "]" << 361 "pose" << "[:" << p[0] << p[1] << p[2] << p[3] << p[4] << p[5] << "]" << "}"; 362 } 363 fs << "]"; 364 365 return true; 366 } 367 368 369 static bool readStringList( const string& filename, vector<string>& l ) 370 { 371 l.resize(0); 372 FileStorage fs(filename, FileStorage::READ); 373 if( !fs.isOpened() ) 374 return false; 375 FileNode n = fs.getFirstTopLevelNode(); 376 if( n.type() != FileNode::SEQ ) 377 return false; 378 FileNodeIterator it = n.begin(), it_end = n.end(); 379 for( ; it != it_end; ++it ) 380 l.push_back((string)*it); 381 return true; 382 } 383 384 385 int main(int argc, char** argv) 386 { 387 const char* help = "Usage: select3dobj -w <board_width> -h <board_height> [-s <square_size>]\n" 388 "\t-i <intrinsics_filename> -o <output_prefix> [video_filename/cameraId]\n"; 389 const char* screen_help = 390 "Actions: \n" 391 "\tSelect object as 3D box with the mouse. That's it\n" 392 "\tESC - Reset the selection\n" 393 "\tSPACE - Skip the frame; move to the next frame (not in video mode)\n" 394 "\tENTER - Confirm the selection. Grab next object in video mode.\n" 395 "\tq - Exit the program\n"; 396 397 if(argc < 5) 398 { 399 puts(helphelp); 400 puts(help); 401 return 0; 402 } 403 const char* intrinsicsFilename = 0; 404 const char* outprefix = 0; 405 const char* inputName = 0; 406 int cameraId = 0; 407 Size boardSize; 408 double squareSize = 1; 409 vector<string> imageList; 410 411 for( int i = 1; i < argc; i++ ) 412 { 413 if( strcmp(argv[i], "-i") == 0 ) 414 intrinsicsFilename = argv[++i]; 415 else if( strcmp(argv[i], "-o") == 0 ) 416 outprefix = argv[++i]; 417 else if( strcmp(argv[i], "-w") == 0 ) 418 { 419 if(sscanf(argv[++i], "%d", &boardSize.width) != 1 || boardSize.width <= 0) 420 { 421 printf("Incorrect -w parameter (must be a positive integer)\n"); 422 puts(help); 423 return 0; 424 } 425 } 426 else if( strcmp(argv[i], "-h") == 0 ) 427 { 428 if(sscanf(argv[++i], "%d", &boardSize.height) != 1 || boardSize.height <= 0) 429 { 430 printf("Incorrect -h parameter (must be a positive integer)\n"); 431 puts(help); 432 return 0; 433 } 434 } 435 else if( strcmp(argv[i], "-s") == 0 ) 436 { 437 if(sscanf(argv[++i], "%lf", &squareSize) != 1 || squareSize <= 0) 438 { 439 printf("Incorrect -w parameter (must be a positive real number)\n"); 440 puts(help); 441 return 0; 442 } 443 } 444 else if( argv[i][0] != '-' ) 445 { 446 if( isdigit(argv[i][0])) 447 sscanf(argv[i], "%d", &cameraId); 448 else 449 inputName = argv[i]; 450 } 451 else 452 { 453 printf("Incorrect option\n"); 454 puts(help); 455 return 0; 456 } 457 } 458 459 if( !intrinsicsFilename || !outprefix || 460 boardSize.width <= 0 || boardSize.height <= 0 ) 461 { 462 printf("Some of the required parameters are missing\n"); 463 puts(help); 464 return 0; 465 } 466 467 Mat cameraMatrix, distCoeffs; 468 Size calibratedImageSize; 469 readCameraMatrix(intrinsicsFilename, cameraMatrix, distCoeffs, calibratedImageSize ); 470 471 VideoCapture capture; 472 if( inputName ) 473 { 474 if( !readStringList(inputName, imageList) && 475 !capture.open(inputName)) 476 { 477 fprintf( stderr, "The input file could not be opened\n" ); 478 return -1; 479 } 480 } 481 else 482 capture.open(cameraId); 483 484 if( !capture.isOpened() && imageList.empty() ) 485 return fprintf( stderr, "Could not initialize video capture\n" ), -2; 486 487 const char* outbarename = 0; 488 { 489 outbarename = strrchr(outprefix, '/'); 490 const char* tmp = strrchr(outprefix, '\\'); 491 char cmd[1000]; 492 sprintf(cmd, "mkdir %s", outprefix); 493 if( tmp && tmp > outbarename ) 494 outbarename = tmp; 495 if( outbarename ) 496 { 497 cmd[6 + outbarename - outprefix] = '\0'; 498 int result = system(cmd); 499 CV_Assert(result == 0); 500 outbarename++; 501 } 502 else 503 outbarename = outprefix; 504 } 505 506 Mat frame, shownFrame, selectedObjFrame, mapxy; 507 508 namedWindow("View", 1); 509 namedWindow("Selected Object", 1); 510 setMouseCallback("View", onMouse, 0); 511 bool boardFound = false; 512 513 string indexFilename = format("%s_index.yml", outprefix); 514 515 vector<string> capturedImgList; 516 vector<Rect> roiList; 517 vector<Vec6f> poseList; 518 vector<Point3f> box, boardPoints; 519 520 readModelViews(indexFilename, box, capturedImgList, roiList, poseList); 521 calcChessboardCorners(boardSize, (float)squareSize, boardPoints); 522 int frameIdx = 0; 523 bool grabNext = !imageList.empty(); 524 525 puts(screen_help); 526 527 for(int i = 0;;i++) 528 { 529 Mat frame0; 530 if( !imageList.empty() ) 531 { 532 if( i < (int)imageList.size() ) 533 frame0 = imread(string(imageList[i]), 1); 534 } 535 else 536 capture >> frame0; 537 if( frame0.empty() ) 538 break; 539 if( frame.empty() ) 540 { 541 if( frame0.size() != calibratedImageSize ) 542 { 543 double sx = (double)frame0.cols/calibratedImageSize.width; 544 double sy = (double)frame0.rows/calibratedImageSize.height; 545 546 // adjust the camera matrix for the new resolution 547 cameraMatrix.at<double>(0,0) *= sx; 548 cameraMatrix.at<double>(0,2) *= sx; 549 cameraMatrix.at<double>(1,1) *= sy; 550 cameraMatrix.at<double>(1,2) *= sy; 551 } 552 Mat dummy; 553 initUndistortRectifyMap(cameraMatrix, distCoeffs, Mat(), 554 cameraMatrix, frame0.size(), 555 CV_32FC2, mapxy, dummy ); 556 distCoeffs = Mat::zeros(5, 1, CV_64F); 557 } 558 remap(frame0, frame, mapxy, Mat(), INTER_LINEAR); 559 vector<Point2f> foundBoardCorners; 560 boardFound = findChessboardCorners(frame, boardSize, foundBoardCorners); 561 562 Mat rvec, tvec; 563 if( boardFound ) 564 solvePnP(Mat(boardPoints), Mat(foundBoardCorners), cameraMatrix, 565 distCoeffs, rvec, tvec, false); 566 567 frame.copyTo(shownFrame); 568 drawChessboardCorners(shownFrame, boardSize, Mat(foundBoardCorners), boardFound); 569 selectedObjFrame = Mat::zeros(frame.size(), frame.type()); 570 571 if( boardFound && grabNext ) 572 { 573 if( box.empty() ) 574 { 575 int code = select3DBox("View", "Selected Object", frame, 576 cameraMatrix, rvec, tvec, box); 577 if( code == -100 ) 578 break; 579 } 580 581 if( !box.empty() ) 582 { 583 Rect r = extract3DBox(frame, shownFrame, selectedObjFrame, 584 cameraMatrix, rvec, tvec, box, 4, true); 585 if( r.area() ) 586 { 587 const int maxFrameIdx = 10000; 588 char path[1000]; 589 for(;frameIdx < maxFrameIdx;frameIdx++) 590 { 591 sprintf(path, "%s%04d.jpg", outprefix, frameIdx); 592 FILE* f = fopen(path, "rb"); 593 if( !f ) 594 break; 595 fclose(f); 596 } 597 if( frameIdx == maxFrameIdx ) 598 { 599 printf("Can not save the image as %s<...>.jpg", outprefix); 600 break; 601 } 602 imwrite(path, selectedObjFrame(r)); 603 604 capturedImgList.push_back(string(path)); 605 roiList.push_back(r); 606 607 float p[6]; 608 Mat RV(3, 1, CV_32F, p), TV(3, 1, CV_32F, p+3); 609 rvec.convertTo(RV, RV.type()); 610 tvec.convertTo(TV, TV.type()); 611 poseList.push_back(Vec6f(p[0], p[1], p[2], p[3], p[4], p[5])); 612 } 613 } 614 grabNext = !imageList.empty(); 615 } 616 617 imshow("View", shownFrame); 618 imshow("Selected Object", selectedObjFrame); 619 int c = waitKey(imageList.empty() && !box.empty() ? 30 : 300); 620 if( c == 'q' || c == 'Q' ) 621 break; 622 if( c == '\r' || c == '\n' ) 623 grabNext = true; 624 } 625 626 writeModelViews(indexFilename, box, capturedImgList, roiList, poseList); 627 return 0; 628 } 629