1 // libjingle 2 // Copyright 2010 Google Inc. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // 1. Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // 2. Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // 3. The name of the author may not be used to endorse or promote products 13 // derived from this software without specific prior written permission. 14 // 15 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 17 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 18 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 21 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 22 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 23 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 24 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 // 26 // Declaration of abstract class VideoCapturer 27 28 #ifndef TALK_MEDIA_BASE_VIDEOCAPTURER_H_ 29 #define TALK_MEDIA_BASE_VIDEOCAPTURER_H_ 30 31 #include <string> 32 #include <vector> 33 34 #include "talk/media/base/mediachannel.h" 35 #include "talk/media/base/videoadapter.h" 36 #include "talk/media/base/videocommon.h" 37 #include "talk/media/base/videoframefactory.h" 38 #include "talk/media/devices/devicemanager.h" 39 #include "webrtc/base/basictypes.h" 40 #include "webrtc/base/criticalsection.h" 41 #include "webrtc/base/messagehandler.h" 42 #include "webrtc/base/rollingaccumulator.h" 43 #include "webrtc/base/scoped_ptr.h" 44 #include "webrtc/base/sigslot.h" 45 #include "webrtc/base/thread.h" 46 #include "webrtc/base/timing.h" 47 48 49 namespace cricket { 50 51 class VideoProcessor; 52 53 // Current state of the capturer. 54 // TODO(hellner): CS_NO_DEVICE is an error code not a capture state. Separate 55 // error codes and states. 56 enum CaptureState { 57 CS_STOPPED, // The capturer has been stopped or hasn't started yet. 58 CS_STARTING, // The capturer is in the process of starting. Note, it may 59 // still fail to start. 60 CS_RUNNING, // The capturer has been started successfully and is now 61 // capturing. 62 CS_PAUSED, // The capturer has been paused. 63 CS_FAILED, // The capturer failed to start. 64 CS_NO_DEVICE, // The capturer has no device and consequently failed to start. 65 }; 66 67 class VideoFrame; 68 69 struct CapturedFrame { 70 static const uint32 kFrameHeaderSize = 40; // Size from width to data_size. 71 static const uint32 kUnknownDataSize = 0xFFFFFFFF; 72 73 CapturedFrame(); 74 75 // Get the number of bytes of the frame data. If data_size is known, return 76 // it directly. Otherwise, calculate the size based on width, height, and 77 // fourcc. Return true if succeeded. 78 bool GetDataSize(uint32* size) const; 79 80 // The width and height of the captured frame could be different from those 81 // of VideoFormat. Once the first frame is captured, the width, height, 82 // fourcc, pixel_width, and pixel_height should keep the same over frames. 83 int width; // in number of pixels 84 int height; // in number of pixels 85 uint32 fourcc; // compression 86 uint32 pixel_width; // width of a pixel, default is 1 87 uint32 pixel_height; // height of a pixel, default is 1 88 int64 elapsed_time; // elapsed time since the creation of the frame 89 // source (that is, the camera), in nanoseconds. 90 int64 time_stamp; // timestamp of when the frame was captured, in unix 91 // time with nanosecond units. 92 uint32 data_size; // number of bytes of the frame data 93 int rotation; // rotation in degrees of the frame (0, 90, 180, 270) 94 void* data; // pointer to the frame data. This object allocates the 95 // memory or points to an existing memory. 96 97 private: 98 DISALLOW_COPY_AND_ASSIGN(CapturedFrame); 99 }; 100 101 // VideoCapturer is an abstract class that defines the interfaces for video 102 // capturing. The subclasses implement the video capturer for various types of 103 // capturers and various platforms. 104 // 105 // The captured frames may need to be adapted (for example, cropping). 106 // Video adaptation is built into and enabled by default. After a frame has 107 // been captured from the device, it is sent to the video adapter, then video 108 // processors, then out to the encoder. 109 // 110 // Programming model: 111 // Create an object of a subclass of VideoCapturer 112 // Initialize 113 // SignalStateChange.connect() 114 // SignalFrameCaptured.connect() 115 // Find the capture format for Start() by either calling GetSupportedFormats() 116 // and selecting one of the supported or calling GetBestCaptureFormat(). 117 // video_adapter()->OnOutputFormatRequest(desired_encoding_format) 118 // Start() 119 // GetCaptureFormat() optionally 120 // Stop() 121 // 122 // Assumption: 123 // The Start() and Stop() methods are called by a single thread (E.g., the 124 // media engine thread). Hence, the VideoCapture subclasses dont need to be 125 // thread safe. 126 // 127 class VideoCapturer 128 : public sigslot::has_slots<>, 129 public rtc::MessageHandler { 130 public: 131 typedef std::vector<VideoProcessor*> VideoProcessors; 132 133 // All signals are marshalled to |thread| or the creating thread if 134 // none is provided. 135 VideoCapturer(); 136 explicit VideoCapturer(rtc::Thread* thread); 137 virtual ~VideoCapturer() {} 138 139 // Gets the id of the underlying device, which is available after the capturer 140 // is initialized. Can be used to determine if two capturers reference the 141 // same device. 142 const std::string& GetId() const { return id_; } 143 144 // Get the capture formats supported by the video capturer. The supported 145 // formats are non empty after the device has been opened successfully. 146 const std::vector<VideoFormat>* GetSupportedFormats() const; 147 148 // Get the best capture format for the desired format. The best format is the 149 // same as one of the supported formats except that the frame interval may be 150 // different. If the application asks for 16x9 and the camera does not support 151 // 16x9 HD or the application asks for 16x10, we find the closest 4x3 and then 152 // crop; Otherwise, we find what the application asks for. Note that we assume 153 // that for HD, the desired format is always 16x9. The subclasses can override 154 // the default implementation. 155 // Parameters 156 // desired: the input desired format. If desired.fourcc is not kAnyFourcc, 157 // the best capture format has the exactly same fourcc. Otherwise, 158 // the best capture format uses a fourcc in GetPreferredFourccs(). 159 // best_format: the output of the best capture format. 160 // Return false if there is no such a best format, that is, the desired format 161 // is not supported. 162 virtual bool GetBestCaptureFormat(const VideoFormat& desired, 163 VideoFormat* best_format); 164 165 // TODO(hellner): deprecate (make private) the Start API in favor of this one. 166 // Also remove CS_STARTING as it is implied by the return 167 // value of StartCapturing(). 168 bool StartCapturing(const VideoFormat& capture_format); 169 // Start the video capturer with the specified capture format. 170 // Parameter 171 // capture_format: The caller got this parameter by either calling 172 // GetSupportedFormats() and selecting one of the supported 173 // or calling GetBestCaptureFormat(). 174 // Return 175 // CS_STARTING: The capturer is trying to start. Success or failure will 176 // be notified via the |SignalStateChange| callback. 177 // CS_RUNNING: if the capturer is started and capturing. 178 // CS_PAUSED: Will never be returned. 179 // CS_FAILED: if the capturer failes to start.. 180 // CS_NO_DEVICE: if the capturer has no device and fails to start. 181 virtual CaptureState Start(const VideoFormat& capture_format) = 0; 182 // Sets the desired aspect ratio. If the capturer is capturing at another 183 // aspect ratio it will crop the width or the height so that asked for 184 // aspect ratio is acheived. Note that ratio_w and ratio_h do not need to be 185 // relatively prime. 186 void UpdateAspectRatio(int ratio_w, int ratio_h); 187 void ClearAspectRatio(); 188 189 // Get the current capture format, which is set by the Start() call. 190 // Note that the width and height of the captured frames may differ from the 191 // capture format. For example, the capture format is HD but the captured 192 // frames may be smaller than HD. 193 const VideoFormat* GetCaptureFormat() const { 194 return capture_format_.get(); 195 } 196 197 // Pause the video capturer. 198 virtual bool Pause(bool paused); 199 // Stop the video capturer. 200 virtual void Stop() = 0; 201 // Check if the video capturer is running. 202 virtual bool IsRunning() = 0; 203 // Restart the video capturer with the new |capture_format|. 204 // Default implementation stops and starts the capturer. 205 virtual bool Restart(const VideoFormat& capture_format); 206 // TODO(thorcarpenter): This behavior of keeping the camera open just to emit 207 // black frames is a total hack and should be fixed. 208 // When muting, produce black frames then pause the camera. 209 // When unmuting, start the camera. Camera starts unmuted. 210 virtual bool MuteToBlackThenPause(bool muted); 211 virtual bool IsMuted() const { 212 return muted_; 213 } 214 CaptureState capture_state() const { 215 return capture_state_; 216 } 217 218 // Adds a video processor that will be applied on VideoFrames returned by 219 // |SignalVideoFrame|. Multiple video processors can be added. The video 220 // processors will be applied in the order they were added. 221 void AddVideoProcessor(VideoProcessor* video_processor); 222 // Removes the |video_processor| from the list of video processors or 223 // returns false. 224 bool RemoveVideoProcessor(VideoProcessor* video_processor); 225 226 // Returns true if the capturer is screencasting. This can be used to 227 // implement screencast specific behavior. 228 virtual bool IsScreencast() const = 0; 229 230 // Caps the VideoCapturer's format according to max_format. It can e.g. be 231 // used to prevent cameras from capturing at a resolution or framerate that 232 // the capturer is capable of but not performing satisfactorily at. 233 // The capping is an upper bound for each component of the capturing format. 234 // The fourcc component is ignored. 235 void ConstrainSupportedFormats(const VideoFormat& max_format); 236 237 void set_enable_camera_list(bool enable_camera_list) { 238 enable_camera_list_ = enable_camera_list; 239 } 240 bool enable_camera_list() { 241 return enable_camera_list_; 242 } 243 244 // Enable scaling to ensure square pixels. 245 void set_square_pixel_aspect_ratio(bool square_pixel_aspect_ratio) { 246 square_pixel_aspect_ratio_ = square_pixel_aspect_ratio; 247 } 248 bool square_pixel_aspect_ratio() { 249 return square_pixel_aspect_ratio_; 250 } 251 252 // Signal all capture state changes that are not a direct result of calling 253 // Start(). 254 sigslot::signal2<VideoCapturer*, CaptureState> SignalStateChange; 255 // Frame callbacks are multithreaded to allow disconnect and connect to be 256 // called concurrently. It also ensures that it is safe to call disconnect 257 // at any time which is needed since the signal may be called from an 258 // unmarshalled thread owned by the VideoCapturer. 259 // Signal the captured frame to downstream. 260 sigslot::signal2<VideoCapturer*, const CapturedFrame*, 261 sigslot::multi_threaded_local> SignalFrameCaptured; 262 // Signal the captured and possibly adapted frame to downstream consumers 263 // such as the encoder. 264 sigslot::signal2<VideoCapturer*, const VideoFrame*, 265 sigslot::multi_threaded_local> SignalVideoFrame; 266 267 const VideoProcessors& video_processors() const { return video_processors_; } 268 269 // If 'screencast_max_pixels' is set greater than zero, screencasts will be 270 // scaled to be no larger than this value. 271 // If set to zero, the max pixels will be limited to 272 // Retina MacBookPro 15" resolution of 2880 x 1800. 273 // For high fps, maximum pixels limit is set based on common 24" monitor 274 // resolution of 2048 x 1280. 275 int screencast_max_pixels() const { return screencast_max_pixels_; } 276 void set_screencast_max_pixels(int p) { 277 screencast_max_pixels_ = rtc::_max(0, p); 278 } 279 280 // If true, run video adaptation. By default, video adaptation is enabled 281 // and users must call video_adapter()->OnOutputFormatRequest() 282 // to receive frames. 283 bool enable_video_adapter() const { return enable_video_adapter_; } 284 void set_enable_video_adapter(bool enable_video_adapter) { 285 enable_video_adapter_ = enable_video_adapter; 286 } 287 288 CoordinatedVideoAdapter* video_adapter() { return &video_adapter_; } 289 const CoordinatedVideoAdapter* video_adapter() const { 290 return &video_adapter_; 291 } 292 293 // Takes ownership. 294 void set_frame_factory(VideoFrameFactory* frame_factory) { 295 frame_factory_.reset(frame_factory); 296 } 297 298 // Gets statistics for tracked variables recorded since the last call to 299 // GetStats. Note that calling GetStats resets any gathered data so it 300 // should be called only periodically to log statistics. 301 void GetStats(VariableInfo<int>* adapt_drop_stats, 302 VariableInfo<int>* effect_drop_stats, 303 VariableInfo<double>* frame_time_stats, 304 VideoFormat* last_captured_frame_format); 305 306 protected: 307 // Callback attached to SignalFrameCaptured where SignalVideoFrames is called. 308 void OnFrameCaptured(VideoCapturer* video_capturer, 309 const CapturedFrame* captured_frame); 310 void SetCaptureState(CaptureState state); 311 312 // Marshals SignalStateChange onto thread_. 313 void OnMessage(rtc::Message* message); 314 315 // subclasses override this virtual method to provide a vector of fourccs, in 316 // order of preference, that are expected by the media engine. 317 virtual bool GetPreferredFourccs(std::vector<uint32>* fourccs) = 0; 318 319 // mutators to set private attributes 320 void SetId(const std::string& id) { 321 id_ = id; 322 } 323 324 void SetCaptureFormat(const VideoFormat* format) { 325 capture_format_.reset(format ? new VideoFormat(*format) : NULL); 326 if (capture_format_) { 327 ASSERT(capture_format_->interval > 0 && 328 "Capture format expected to have positive interval."); 329 // Video adapter really only cares about capture format interval. 330 video_adapter_.SetInputFormat(*capture_format_); 331 } 332 } 333 334 void SetSupportedFormats(const std::vector<VideoFormat>& formats); 335 VideoFrameFactory* frame_factory() { return frame_factory_.get(); } 336 337 private: 338 void Construct(); 339 // Get the distance between the desired format and the supported format. 340 // Return the max distance if they mismatch. See the implementation for 341 // details. 342 int64 GetFormatDistance(const VideoFormat& desired, 343 const VideoFormat& supported); 344 345 // Convert captured frame to readable string for LOG messages. 346 std::string ToString(const CapturedFrame* frame) const; 347 348 // Applies all registered processors. If any of the processors signal that 349 // the frame should be dropped the return value will be false. Note that 350 // this frame should be dropped as it has not applied all processors. 351 bool ApplyProcessors(VideoFrame* video_frame); 352 353 // Updates filtered_supported_formats_ so that it contains the formats in 354 // supported_formats_ that fulfill all applied restrictions. 355 void UpdateFilteredSupportedFormats(); 356 // Returns true if format doesn't fulfill all applied restrictions. 357 bool ShouldFilterFormat(const VideoFormat& format) const; 358 359 void UpdateStats(const CapturedFrame* captured_frame); 360 361 // Helper function to save statistics on the current data from a 362 // RollingAccumulator into stats. 363 template<class T> 364 static void GetVariableSnapshot( 365 const rtc::RollingAccumulator<T>& data, 366 VariableInfo<T>* stats); 367 368 rtc::Thread* thread_; 369 std::string id_; 370 CaptureState capture_state_; 371 rtc::scoped_ptr<VideoFrameFactory> frame_factory_; 372 rtc::scoped_ptr<VideoFormat> capture_format_; 373 std::vector<VideoFormat> supported_formats_; 374 rtc::scoped_ptr<VideoFormat> max_format_; 375 std::vector<VideoFormat> filtered_supported_formats_; 376 377 int ratio_w_; // View resolution. e.g. 1280 x 720. 378 int ratio_h_; 379 bool enable_camera_list_; 380 bool square_pixel_aspect_ratio_; // Enable scaling to square pixels. 381 int scaled_width_; // Current output size from ComputeScale. 382 int scaled_height_; 383 int screencast_max_pixels_; // Downscale screencasts further if requested. 384 bool muted_; 385 int black_frame_count_down_; 386 387 bool enable_video_adapter_; 388 CoordinatedVideoAdapter video_adapter_; 389 390 rtc::Timing frame_length_time_reporter_; 391 rtc::CriticalSection frame_stats_crit_; 392 393 int adapt_frame_drops_; 394 rtc::RollingAccumulator<int> adapt_frame_drops_data_; 395 int effect_frame_drops_; 396 rtc::RollingAccumulator<int> effect_frame_drops_data_; 397 double previous_frame_time_; 398 rtc::RollingAccumulator<double> frame_time_data_; 399 // The captured frame format before potential adapation. 400 VideoFormat last_captured_frame_format_; 401 402 rtc::CriticalSection crit_; 403 VideoProcessors video_processors_; 404 405 DISALLOW_COPY_AND_ASSIGN(VideoCapturer); 406 }; 407 408 } // namespace cricket 409 410 #endif // TALK_MEDIA_BASE_VIDEOCAPTURER_H_ 411