1 // libjingle 2 // Copyright 2010 Google Inc. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // 1. Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // 2. Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // 3. The name of the author may not be used to endorse or promote products 13 // derived from this software without specific prior written permission. 14 // 15 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 17 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 18 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 21 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 23 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 24 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 // 26 // Implementation file of class VideoCapturer. 27 28 #include "talk/media/base/videocapturer.h" 29 30 #include <algorithm> 31 32 #if !defined(DISABLE_YUV) 33 #include "libyuv/scale_argb.h" 34 #endif 35 #include "talk/base/common.h" 36 #include "talk/base/logging.h" 37 #include "talk/base/systeminfo.h" 38 #include "talk/media/base/videoprocessor.h" 39 40 #if defined(HAVE_WEBRTC_VIDEO) 41 #include "talk/media/webrtc/webrtcvideoframe.h" 42 #endif // HAVE_WEBRTC_VIDEO 43 44 45 namespace cricket { 46 47 namespace { 48 49 // TODO(thorcarpenter): This is a BIG hack to flush the system with black 50 // frames. Frontends should coordinate to update the video state of a muted 51 // user. When all frontends to this consider removing the black frame business. 52 const int kNumBlackFramesOnMute = 30; 53 54 // MessageHandler constants. 55 enum { 56 MSG_DO_PAUSE = 0, 57 MSG_DO_UNPAUSE, 58 MSG_STATE_CHANGE 59 }; 60 61 static const int64 kMaxDistance = ~(static_cast<int64>(1) << 63); 62 static const int kYU12Penalty = 16; // Needs to be higher than MJPG index. 63 static const int kDefaultScreencastFps = 5; 64 typedef talk_base::TypedMessageData<CaptureState> StateChangeParams; 65 66 } // namespace 67 68 ///////////////////////////////////////////////////////////////////// 69 // Implementation of struct CapturedFrame 70 ///////////////////////////////////////////////////////////////////// 71 CapturedFrame::CapturedFrame() 72 : width(0), 73 height(0), 74 fourcc(0), 75 pixel_width(0), 76 pixel_height(0), 77 elapsed_time(0), 78 time_stamp(0), 79 data_size(0), 80 rotation(0), 81 data(NULL) {} 82 83 // TODO(fbarchard): Remove this function once lmimediaengine stops using it. 84 bool CapturedFrame::GetDataSize(uint32* size) const { 85 if (!size || data_size == CapturedFrame::kUnknownDataSize) { 86 return false; 87 } 88 *size = data_size; 89 return true; 90 } 91 92 ///////////////////////////////////////////////////////////////////// 93 // Implementation of class VideoCapturer 94 ///////////////////////////////////////////////////////////////////// 95 VideoCapturer::VideoCapturer() : thread_(talk_base::Thread::Current()) { 96 Construct(); 97 } 98 99 VideoCapturer::VideoCapturer(talk_base::Thread* thread) : thread_(thread) { 100 Construct(); 101 } 102 103 void VideoCapturer::Construct() { 104 ClearAspectRatio(); 105 enable_camera_list_ = false; 106 capture_state_ = CS_STOPPED; 107 SignalFrameCaptured.connect(this, &VideoCapturer::OnFrameCaptured); 108 scaled_width_ = 0; 109 scaled_height_ = 0; 110 muted_ = false; 111 black_frame_count_down_ = kNumBlackFramesOnMute; 112 } 113 114 const std::vector<VideoFormat>* VideoCapturer::GetSupportedFormats() const { 115 return &filtered_supported_formats_; 116 } 117 118 bool VideoCapturer::StartCapturing(const VideoFormat& capture_format) { 119 CaptureState result = Start(capture_format); 120 const bool success = (result == CS_RUNNING) || (result == CS_STARTING); 121 if (!success) { 122 return false; 123 } 124 if (result == CS_RUNNING) { 125 SetCaptureState(result); 126 } 127 return true; 128 } 129 130 void VideoCapturer::UpdateAspectRatio(int ratio_w, int ratio_h) { 131 if (ratio_w == 0 || ratio_h == 0) { 132 LOG(LS_WARNING) << "UpdateAspectRatio ignored invalid ratio: " 133 << ratio_w << "x" << ratio_h; 134 return; 135 } 136 ratio_w_ = ratio_w; 137 ratio_h_ = ratio_h; 138 } 139 140 void VideoCapturer::ClearAspectRatio() { 141 ratio_w_ = 0; 142 ratio_h_ = 0; 143 } 144 145 // Override this to have more control of how your device is started/stopped. 146 bool VideoCapturer::Pause(bool pause) { 147 if (pause) { 148 if (capture_state() == CS_PAUSED) { 149 return true; 150 } 151 bool is_running = capture_state() == CS_STARTING || 152 capture_state() == CS_RUNNING; 153 if (!is_running) { 154 LOG(LS_ERROR) << "Cannot pause a stopped camera."; 155 return false; 156 } 157 LOG(LS_INFO) << "Pausing a camera."; 158 talk_base::scoped_ptr<VideoFormat> capture_format_when_paused( 159 capture_format_ ? new VideoFormat(*capture_format_) : NULL); 160 Stop(); 161 SetCaptureState(CS_PAUSED); 162 // If you override this function be sure to restore the capture format 163 // after calling Stop(). 164 SetCaptureFormat(capture_format_when_paused.get()); 165 } else { // Unpause. 166 if (capture_state() != CS_PAUSED) { 167 LOG(LS_WARNING) << "Cannot unpause a camera that hasn't been paused."; 168 return false; 169 } 170 if (!capture_format_) { 171 LOG(LS_ERROR) << "Missing capture_format_, cannot unpause a camera."; 172 return false; 173 } 174 if (muted_) { 175 LOG(LS_WARNING) << "Camera cannot be unpaused while muted."; 176 return false; 177 } 178 LOG(LS_INFO) << "Unpausing a camera."; 179 if (!Start(*capture_format_)) { 180 LOG(LS_ERROR) << "Camera failed to start when unpausing."; 181 return false; 182 } 183 } 184 return true; 185 } 186 187 bool VideoCapturer::Restart(const VideoFormat& capture_format) { 188 if (!IsRunning()) { 189 return StartCapturing(capture_format); 190 } 191 192 if (GetCaptureFormat() != NULL && *GetCaptureFormat() == capture_format) { 193 // The reqested format is the same; nothing to do. 194 return true; 195 } 196 197 Stop(); 198 return StartCapturing(capture_format); 199 } 200 201 bool VideoCapturer::MuteToBlackThenPause(bool muted) { 202 if (muted == IsMuted()) { 203 return true; 204 } 205 206 LOG(LS_INFO) << (muted ? "Muting" : "Unmuting") << " this video capturer."; 207 muted_ = muted; // Do this before calling Pause(). 208 if (muted) { 209 // Reset black frame count down. 210 black_frame_count_down_ = kNumBlackFramesOnMute; 211 // Following frames will be overritten with black, then the camera will be 212 // paused. 213 return true; 214 } 215 // Start the camera. 216 thread_->Clear(this, MSG_DO_PAUSE); 217 return Pause(false); 218 } 219 220 void VideoCapturer::SetSupportedFormats( 221 const std::vector<VideoFormat>& formats) { 222 supported_formats_ = formats; 223 UpdateFilteredSupportedFormats(); 224 } 225 226 bool VideoCapturer::GetBestCaptureFormat(const VideoFormat& format, 227 VideoFormat* best_format) { 228 // TODO(fbarchard): Directly support max_format. 229 UpdateFilteredSupportedFormats(); 230 const std::vector<VideoFormat>* supported_formats = GetSupportedFormats(); 231 232 if (supported_formats->empty()) { 233 return false; 234 } 235 LOG(LS_INFO) << " Capture Requested " << format.ToString(); 236 int64 best_distance = kMaxDistance; 237 std::vector<VideoFormat>::const_iterator best = supported_formats->end(); 238 std::vector<VideoFormat>::const_iterator i; 239 for (i = supported_formats->begin(); i != supported_formats->end(); ++i) { 240 int64 distance = GetFormatDistance(format, *i); 241 // TODO(fbarchard): Reduce to LS_VERBOSE if/when camera capture is 242 // relatively bug free. 243 LOG(LS_INFO) << " Supported " << i->ToString() << " distance " << distance; 244 if (distance < best_distance) { 245 best_distance = distance; 246 best = i; 247 } 248 } 249 if (supported_formats->end() == best) { 250 LOG(LS_ERROR) << " No acceptable camera format found"; 251 return false; 252 } 253 254 if (best_format) { 255 best_format->width = best->width; 256 best_format->height = best->height; 257 best_format->fourcc = best->fourcc; 258 best_format->interval = talk_base::_max(format.interval, best->interval); 259 LOG(LS_INFO) << " Best " << best_format->ToString() << " Interval " 260 << best_format->interval << " distance " << best_distance; 261 } 262 return true; 263 } 264 265 void VideoCapturer::AddVideoProcessor(VideoProcessor* video_processor) { 266 talk_base::CritScope cs(&crit_); 267 ASSERT(std::find(video_processors_.begin(), video_processors_.end(), 268 video_processor) == video_processors_.end()); 269 video_processors_.push_back(video_processor); 270 } 271 272 bool VideoCapturer::RemoveVideoProcessor(VideoProcessor* video_processor) { 273 talk_base::CritScope cs(&crit_); 274 VideoProcessors::iterator found = std::find( 275 video_processors_.begin(), video_processors_.end(), video_processor); 276 if (found == video_processors_.end()) { 277 return false; 278 } 279 video_processors_.erase(found); 280 return true; 281 } 282 283 void VideoCapturer::ConstrainSupportedFormats(const VideoFormat& max_format) { 284 max_format_.reset(new VideoFormat(max_format)); 285 LOG(LS_VERBOSE) << " ConstrainSupportedFormats " << max_format.ToString(); 286 UpdateFilteredSupportedFormats(); 287 } 288 289 std::string VideoCapturer::ToString(const CapturedFrame* captured_frame) const { 290 std::string fourcc_name = GetFourccName(captured_frame->fourcc) + " "; 291 for (std::string::const_iterator i = fourcc_name.begin(); 292 i < fourcc_name.end(); ++i) { 293 // Test character is printable; Avoid isprint() which asserts on negatives. 294 if (*i < 32 || *i >= 127) { 295 fourcc_name = ""; 296 break; 297 } 298 } 299 300 std::ostringstream ss; 301 ss << fourcc_name << captured_frame->width << "x" << captured_frame->height 302 << "x" << VideoFormat::IntervalToFps(captured_frame->elapsed_time); 303 return ss.str(); 304 } 305 306 void VideoCapturer::OnFrameCaptured(VideoCapturer*, 307 const CapturedFrame* captured_frame) { 308 if (muted_) { 309 if (black_frame_count_down_ == 0) { 310 thread_->Post(this, MSG_DO_PAUSE, NULL); 311 } else { 312 --black_frame_count_down_; 313 } 314 } 315 316 if (SignalVideoFrame.is_empty()) { 317 return; 318 } 319 #if defined(HAVE_WEBRTC_VIDEO) 320 #define VIDEO_FRAME_NAME WebRtcVideoFrame 321 #endif 322 #if defined(VIDEO_FRAME_NAME) 323 #if !defined(DISABLE_YUV) 324 if (IsScreencast()) { 325 int scaled_width, scaled_height; 326 int desired_screencast_fps = capture_format_.get() ? 327 VideoFormat::IntervalToFps(capture_format_->interval) : 328 kDefaultScreencastFps; 329 ComputeScale(captured_frame->width, captured_frame->height, 330 desired_screencast_fps, &scaled_width, &scaled_height); 331 332 if (scaled_width != scaled_width_ || scaled_height != scaled_height_) { 333 LOG(LS_VERBOSE) << "Scaling Screencast from " 334 << captured_frame->width << "x" 335 << captured_frame->height << " to " 336 << scaled_width << "x" << scaled_height; 337 scaled_width_ = scaled_width; 338 scaled_height_ = scaled_height; 339 } 340 if (FOURCC_ARGB == captured_frame->fourcc && 341 (scaled_width != captured_frame->height || 342 scaled_height != captured_frame->height)) { 343 CapturedFrame* scaled_frame = const_cast<CapturedFrame*>(captured_frame); 344 // Compute new width such that width * height is less than maximum but 345 // maintains original captured frame aspect ratio. 346 // Round down width to multiple of 4 so odd width won't round up beyond 347 // maximum, and so chroma channel is even width to simplify spatial 348 // resampling. 349 libyuv::ARGBScale(reinterpret_cast<const uint8*>(captured_frame->data), 350 captured_frame->width * 4, captured_frame->width, 351 captured_frame->height, 352 reinterpret_cast<uint8*>(scaled_frame->data), 353 scaled_width * 4, scaled_width, scaled_height, 354 libyuv::kFilterBilinear); 355 scaled_frame->width = scaled_width; 356 scaled_frame->height = scaled_height; 357 scaled_frame->data_size = scaled_width * 4 * scaled_height; 358 } 359 } 360 #endif // !DISABLE_YUV 361 // Size to crop captured frame to. This adjusts the captured frames 362 // aspect ratio to match the final view aspect ratio, considering pixel 363 // aspect ratio and rotation. The final size may be scaled down by video 364 // adapter to better match ratio_w_ x ratio_h_. 365 // Note that abs() of frame height is passed in, because source may be 366 // inverted, but output will be positive. 367 int desired_width = captured_frame->width; 368 int desired_height = captured_frame->height; 369 370 // TODO(fbarchard): Improve logic to pad or crop. 371 // MJPG can crop vertically, but not horizontally. This logic disables crop. 372 // Alternatively we could pad the image with black, or implement a 2 step 373 // crop. 374 bool can_crop = true; 375 if (captured_frame->fourcc == FOURCC_MJPG) { 376 float cam_aspect = static_cast<float>(captured_frame->width) / 377 static_cast<float>(captured_frame->height); 378 float view_aspect = static_cast<float>(ratio_w_) / 379 static_cast<float>(ratio_h_); 380 can_crop = cam_aspect <= view_aspect; 381 } 382 if (can_crop && !IsScreencast()) { 383 // TODO(ronghuawu): The capturer should always produce the native 384 // resolution and the cropping should be done in downstream code. 385 ComputeCrop(ratio_w_, ratio_h_, captured_frame->width, 386 abs(captured_frame->height), captured_frame->pixel_width, 387 captured_frame->pixel_height, captured_frame->rotation, 388 &desired_width, &desired_height); 389 } 390 391 VIDEO_FRAME_NAME i420_frame; 392 if (!i420_frame.Init(captured_frame, desired_width, desired_height)) { 393 // TODO(fbarchard): LOG more information about captured frame attributes. 394 LOG(LS_ERROR) << "Couldn't convert to I420! " 395 << "From " << ToString(captured_frame) << " To " 396 << desired_width << " x " << desired_height; 397 return; 398 } 399 if (!muted_ && !ApplyProcessors(&i420_frame)) { 400 // Processor dropped the frame. 401 return; 402 } 403 if (muted_) { 404 i420_frame.SetToBlack(); 405 } 406 SignalVideoFrame(this, &i420_frame); 407 #endif // VIDEO_FRAME_NAME 408 } 409 410 void VideoCapturer::SetCaptureState(CaptureState state) { 411 if (state == capture_state_) { 412 // Don't trigger a state changed callback if the state hasn't changed. 413 return; 414 } 415 StateChangeParams* state_params = new StateChangeParams(state); 416 capture_state_ = state; 417 thread_->Post(this, MSG_STATE_CHANGE, state_params); 418 } 419 420 void VideoCapturer::OnMessage(talk_base::Message* message) { 421 switch (message->message_id) { 422 case MSG_STATE_CHANGE: { 423 talk_base::scoped_ptr<StateChangeParams> p( 424 static_cast<StateChangeParams*>(message->pdata)); 425 SignalStateChange(this, p->data()); 426 break; 427 } 428 case MSG_DO_PAUSE: { 429 Pause(true); 430 break; 431 } 432 case MSG_DO_UNPAUSE: { 433 Pause(false); 434 break; 435 } 436 default: { 437 ASSERT(false); 438 } 439 } 440 } 441 442 // Get the distance between the supported and desired formats. 443 // Prioritization is done according to this algorithm: 444 // 1) Width closeness. If not same, we prefer wider. 445 // 2) Height closeness. If not same, we prefer higher. 446 // 3) Framerate closeness. If not same, we prefer faster. 447 // 4) Compression. If desired format has a specific fourcc, we need exact match; 448 // otherwise, we use preference. 449 int64 VideoCapturer::GetFormatDistance(const VideoFormat& desired, 450 const VideoFormat& supported) { 451 int64 distance = kMaxDistance; 452 453 // Check fourcc. 454 uint32 supported_fourcc = CanonicalFourCC(supported.fourcc); 455 int64 delta_fourcc = kMaxDistance; 456 if (FOURCC_ANY == desired.fourcc) { 457 // Any fourcc is OK for the desired. Use preference to find best fourcc. 458 std::vector<uint32> preferred_fourccs; 459 if (!GetPreferredFourccs(&preferred_fourccs)) { 460 return distance; 461 } 462 463 for (size_t i = 0; i < preferred_fourccs.size(); ++i) { 464 if (supported_fourcc == CanonicalFourCC(preferred_fourccs[i])) { 465 delta_fourcc = i; 466 #ifdef LINUX 467 // For HD avoid YU12 which is a software conversion and has 2 bugs 468 // b/7326348 b/6960899. Reenable when fixed. 469 if (supported.height >= 720 && (supported_fourcc == FOURCC_YU12 || 470 supported_fourcc == FOURCC_YV12)) { 471 delta_fourcc += kYU12Penalty; 472 } 473 #endif 474 break; 475 } 476 } 477 } else if (supported_fourcc == CanonicalFourCC(desired.fourcc)) { 478 delta_fourcc = 0; // Need exact match. 479 } 480 481 if (kMaxDistance == delta_fourcc) { 482 // Failed to match fourcc. 483 return distance; 484 } 485 486 // Check resolution and fps. 487 int desired_width = desired.width; 488 int desired_height = desired.height; 489 int64 delta_w = supported.width - desired_width; 490 int64 supported_fps = VideoFormat::IntervalToFps(supported.interval); 491 int64 delta_fps = 492 supported_fps - VideoFormat::IntervalToFps(desired.interval); 493 // Check height of supported height compared to height we would like it to be. 494 int64 aspect_h = 495 desired_width ? supported.width * desired_height / desired_width 496 : desired_height; 497 int64 delta_h = supported.height - aspect_h; 498 499 distance = 0; 500 // Set high penalty if the supported format is lower than the desired format. 501 // 3x means we would prefer down to down to 3/4, than up to double. 502 // But we'd prefer up to double than down to 1/2. This is conservative, 503 // strongly avoiding going down in resolution, similar to 504 // the old method, but not completely ruling it out in extreme situations. 505 // It also ignores framerate, which is often very low at high resolutions. 506 // TODO(fbarchard): Improve logic to use weighted factors. 507 static const int kDownPenalty = -3; 508 if (delta_w < 0) { 509 delta_w = delta_w * kDownPenalty; 510 } 511 if (delta_h < 0) { 512 delta_h = delta_h * kDownPenalty; 513 } 514 // Require camera fps to be at least 80% of what is requested if resolution 515 // matches. 516 // Require camera fps to be at least 96% of what is requested, or higher, 517 // if resolution differs. 96% allows for slight variations in fps. e.g. 29.97 518 if (delta_fps < 0) { 519 int64 min_desirable_fps = delta_w ? 520 VideoFormat::IntervalToFps(desired.interval) * 29 / 30 : 521 VideoFormat::IntervalToFps(desired.interval) * 24 / 30; 522 delta_fps = -delta_fps; 523 if (supported_fps < min_desirable_fps) { 524 distance |= static_cast<int64>(1) << 62; 525 } else { 526 distance |= static_cast<int64>(1) << 15; 527 } 528 } 529 530 // 12 bits for width and height and 8 bits for fps and fourcc. 531 distance |= 532 (delta_w << 28) | (delta_h << 16) | (delta_fps << 8) | delta_fourcc; 533 534 return distance; 535 } 536 537 bool VideoCapturer::ApplyProcessors(VideoFrame* video_frame) { 538 bool drop_frame = false; 539 talk_base::CritScope cs(&crit_); 540 for (VideoProcessors::iterator iter = video_processors_.begin(); 541 iter != video_processors_.end(); ++iter) { 542 (*iter)->OnFrame(kDummyVideoSsrc, video_frame, &drop_frame); 543 if (drop_frame) { 544 return false; 545 } 546 } 547 return true; 548 } 549 550 void VideoCapturer::UpdateFilteredSupportedFormats() { 551 filtered_supported_formats_.clear(); 552 filtered_supported_formats_ = supported_formats_; 553 if (!max_format_) { 554 return; 555 } 556 std::vector<VideoFormat>::iterator iter = filtered_supported_formats_.begin(); 557 while (iter != filtered_supported_formats_.end()) { 558 if (ShouldFilterFormat(*iter)) { 559 iter = filtered_supported_formats_.erase(iter); 560 } else { 561 ++iter; 562 } 563 } 564 if (filtered_supported_formats_.empty()) { 565 // The device only captures at resolutions higher than |max_format_| this 566 // indicates that |max_format_| should be ignored as it is better to capture 567 // at too high a resolution than to not capture at all. 568 filtered_supported_formats_ = supported_formats_; 569 } 570 } 571 572 bool VideoCapturer::ShouldFilterFormat(const VideoFormat& format) const { 573 if (!enable_camera_list_) { 574 return false; 575 } 576 return format.width > max_format_->width || 577 format.height > max_format_->height; 578 } 579 580 } // namespace cricket 581