Home | History | Annotate | Download | only in base
      1 // libjingle
      2 // Copyright 2010 Google Inc.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //  1. Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //  2. Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //  3. The name of the author may not be used to endorse or promote products
     13 //     derived from this software without specific prior written permission.
     14 //
     15 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
     16 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     17 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
     18 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     19 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
     21 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     22 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
     23 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
     24 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25 //
     26 // Declaration of abstract class VideoCapturer
     27 
     28 #ifndef TALK_MEDIA_BASE_VIDEOCAPTURER_H_
     29 #define TALK_MEDIA_BASE_VIDEOCAPTURER_H_
     30 
     31 #include <string>
     32 #include <vector>
     33 
     34 #include "talk/media/base/mediachannel.h"
     35 #include "talk/media/base/videoadapter.h"
     36 #include "talk/media/base/videocommon.h"
     37 #include "talk/media/base/videoframefactory.h"
     38 #include "talk/media/devices/devicemanager.h"
     39 #include "webrtc/base/basictypes.h"
     40 #include "webrtc/base/criticalsection.h"
     41 #include "webrtc/base/messagehandler.h"
     42 #include "webrtc/base/rollingaccumulator.h"
     43 #include "webrtc/base/scoped_ptr.h"
     44 #include "webrtc/base/sigslot.h"
     45 #include "webrtc/base/thread.h"
     46 #include "webrtc/base/timing.h"
     47 
     48 
     49 namespace cricket {
     50 
     51 class VideoProcessor;
     52 
     53 // Current state of the capturer.
     54 // TODO(hellner): CS_NO_DEVICE is an error code not a capture state. Separate
     55 //                error codes and states.
     56 enum CaptureState {
     57   CS_STOPPED,    // The capturer has been stopped or hasn't started yet.
     58   CS_STARTING,   // The capturer is in the process of starting. Note, it may
     59                  // still fail to start.
     60   CS_RUNNING,    // The capturer has been started successfully and is now
     61                  // capturing.
     62   CS_PAUSED,     // The capturer has been paused.
     63   CS_FAILED,     // The capturer failed to start.
     64   CS_NO_DEVICE,  // The capturer has no device and consequently failed to start.
     65 };
     66 
     67 class VideoFrame;
     68 
     69 struct CapturedFrame {
     70   static const uint32 kFrameHeaderSize = 40;  // Size from width to data_size.
     71   static const uint32 kUnknownDataSize = 0xFFFFFFFF;
     72 
     73   CapturedFrame();
     74 
     75   // Get the number of bytes of the frame data. If data_size is known, return
     76   // it directly. Otherwise, calculate the size based on width, height, and
     77   // fourcc. Return true if succeeded.
     78   bool GetDataSize(uint32* size) const;
     79 
     80   // The width and height of the captured frame could be different from those
     81   // of VideoFormat. Once the first frame is captured, the width, height,
     82   // fourcc, pixel_width, and pixel_height should keep the same over frames.
     83   int    width;         // in number of pixels
     84   int    height;        // in number of pixels
     85   uint32 fourcc;        // compression
     86   uint32 pixel_width;   // width of a pixel, default is 1
     87   uint32 pixel_height;  // height of a pixel, default is 1
     88   int64  elapsed_time;  // elapsed time since the creation of the frame
     89                         // source (that is, the camera), in nanoseconds.
     90   int64  time_stamp;    // timestamp of when the frame was captured, in unix
     91                         // time with nanosecond units.
     92   uint32 data_size;     // number of bytes of the frame data
     93   int    rotation;      // rotation in degrees of the frame (0, 90, 180, 270)
     94   void*  data;          // pointer to the frame data. This object allocates the
     95                         // memory or points to an existing memory.
     96 
     97  private:
     98   DISALLOW_COPY_AND_ASSIGN(CapturedFrame);
     99 };
    100 
    101 // VideoCapturer is an abstract class that defines the interfaces for video
    102 // capturing. The subclasses implement the video capturer for various types of
    103 // capturers and various platforms.
    104 //
    105 // The captured frames may need to be adapted (for example, cropping).
    106 // Video adaptation is built into and enabled by default. After a frame has
    107 // been captured from the device, it is sent to the video adapter, then video
    108 // processors, then out to the encoder.
    109 //
    110 // Programming model:
    111 //   Create an object of a subclass of VideoCapturer
    112 //   Initialize
    113 //   SignalStateChange.connect()
    114 //   SignalFrameCaptured.connect()
    115 //   Find the capture format for Start() by either calling GetSupportedFormats()
    116 //   and selecting one of the supported or calling GetBestCaptureFormat().
    117 //   video_adapter()->OnOutputFormatRequest(desired_encoding_format)
    118 //   Start()
    119 //   GetCaptureFormat() optionally
    120 //   Stop()
    121 //
    122 // Assumption:
    123 //   The Start() and Stop() methods are called by a single thread (E.g., the
    124 //   media engine thread). Hence, the VideoCapture subclasses dont need to be
    125 //   thread safe.
    126 //
    127 class VideoCapturer
    128     : public sigslot::has_slots<>,
    129       public rtc::MessageHandler {
    130  public:
    131   typedef std::vector<VideoProcessor*> VideoProcessors;
    132 
    133   // All signals are marshalled to |thread| or the creating thread if
    134   // none is provided.
    135   VideoCapturer();
    136   explicit VideoCapturer(rtc::Thread* thread);
    137   virtual ~VideoCapturer() {}
    138 
    139   // Gets the id of the underlying device, which is available after the capturer
    140   // is initialized. Can be used to determine if two capturers reference the
    141   // same device.
    142   const std::string& GetId() const { return id_; }
    143 
    144   // Get the capture formats supported by the video capturer. The supported
    145   // formats are non empty after the device has been opened successfully.
    146   const std::vector<VideoFormat>* GetSupportedFormats() const;
    147 
    148   // Get the best capture format for the desired format. The best format is the
    149   // same as one of the supported formats except that the frame interval may be
    150   // different. If the application asks for 16x9 and the camera does not support
    151   // 16x9 HD or the application asks for 16x10, we find the closest 4x3 and then
    152   // crop; Otherwise, we find what the application asks for. Note that we assume
    153   // that for HD, the desired format is always 16x9. The subclasses can override
    154   // the default implementation.
    155   // Parameters
    156   //   desired: the input desired format. If desired.fourcc is not kAnyFourcc,
    157   //            the best capture format has the exactly same fourcc. Otherwise,
    158   //            the best capture format uses a fourcc in GetPreferredFourccs().
    159   //   best_format: the output of the best capture format.
    160   // Return false if there is no such a best format, that is, the desired format
    161   // is not supported.
    162   virtual bool GetBestCaptureFormat(const VideoFormat& desired,
    163                                     VideoFormat* best_format);
    164 
    165   // TODO(hellner): deprecate (make private) the Start API in favor of this one.
    166   //                Also remove CS_STARTING as it is implied by the return
    167   //                value of StartCapturing().
    168   bool StartCapturing(const VideoFormat& capture_format);
    169   // Start the video capturer with the specified capture format.
    170   // Parameter
    171   //   capture_format: The caller got this parameter by either calling
    172   //                   GetSupportedFormats() and selecting one of the supported
    173   //                   or calling GetBestCaptureFormat().
    174   // Return
    175   //   CS_STARTING:  The capturer is trying to start. Success or failure will
    176   //                 be notified via the |SignalStateChange| callback.
    177   //   CS_RUNNING:   if the capturer is started and capturing.
    178   //   CS_PAUSED:    Will never be returned.
    179   //   CS_FAILED:    if the capturer failes to start..
    180   //   CS_NO_DEVICE: if the capturer has no device and fails to start.
    181   virtual CaptureState Start(const VideoFormat& capture_format) = 0;
    182   // Sets the desired aspect ratio. If the capturer is capturing at another
    183   // aspect ratio it will crop the width or the height so that asked for
    184   // aspect ratio is acheived. Note that ratio_w and ratio_h do not need to be
    185   // relatively prime.
    186   void UpdateAspectRatio(int ratio_w, int ratio_h);
    187   void ClearAspectRatio();
    188 
    189   // Get the current capture format, which is set by the Start() call.
    190   // Note that the width and height of the captured frames may differ from the
    191   // capture format. For example, the capture format is HD but the captured
    192   // frames may be smaller than HD.
    193   const VideoFormat* GetCaptureFormat() const {
    194     return capture_format_.get();
    195   }
    196 
    197   // Pause the video capturer.
    198   virtual bool Pause(bool paused);
    199   // Stop the video capturer.
    200   virtual void Stop() = 0;
    201   // Check if the video capturer is running.
    202   virtual bool IsRunning() = 0;
    203   // Restart the video capturer with the new |capture_format|.
    204   // Default implementation stops and starts the capturer.
    205   virtual bool Restart(const VideoFormat& capture_format);
    206   // TODO(thorcarpenter): This behavior of keeping the camera open just to emit
    207   // black frames is a total hack and should be fixed.
    208   // When muting, produce black frames then pause the camera.
    209   // When unmuting, start the camera. Camera starts unmuted.
    210   virtual bool MuteToBlackThenPause(bool muted);
    211   virtual bool IsMuted() const {
    212     return muted_;
    213   }
    214   CaptureState capture_state() const {
    215     return capture_state_;
    216   }
    217 
    218   // Adds a video processor that will be applied on VideoFrames returned by
    219   // |SignalVideoFrame|. Multiple video processors can be added. The video
    220   // processors will be applied in the order they were added.
    221   void AddVideoProcessor(VideoProcessor* video_processor);
    222   // Removes the |video_processor| from the list of video processors or
    223   // returns false.
    224   bool RemoveVideoProcessor(VideoProcessor* video_processor);
    225 
    226   // Returns true if the capturer is screencasting. This can be used to
    227   // implement screencast specific behavior.
    228   virtual bool IsScreencast() const = 0;
    229 
    230   // Caps the VideoCapturer's format according to max_format. It can e.g. be
    231   // used to prevent cameras from capturing at a resolution or framerate that
    232   // the capturer is capable of but not performing satisfactorily at.
    233   // The capping is an upper bound for each component of the capturing format.
    234   // The fourcc component is ignored.
    235   void ConstrainSupportedFormats(const VideoFormat& max_format);
    236 
    237   void set_enable_camera_list(bool enable_camera_list) {
    238     enable_camera_list_ = enable_camera_list;
    239   }
    240   bool enable_camera_list() {
    241     return enable_camera_list_;
    242   }
    243 
    244   // Enable scaling to ensure square pixels.
    245   void set_square_pixel_aspect_ratio(bool square_pixel_aspect_ratio) {
    246     square_pixel_aspect_ratio_ = square_pixel_aspect_ratio;
    247   }
    248   bool square_pixel_aspect_ratio() {
    249     return square_pixel_aspect_ratio_;
    250   }
    251 
    252   // Signal all capture state changes that are not a direct result of calling
    253   // Start().
    254   sigslot::signal2<VideoCapturer*, CaptureState> SignalStateChange;
    255   // Frame callbacks are multithreaded to allow disconnect and connect to be
    256   // called concurrently. It also ensures that it is safe to call disconnect
    257   // at any time which is needed since the signal may be called from an
    258   // unmarshalled thread owned by the VideoCapturer.
    259   // Signal the captured frame to downstream.
    260   sigslot::signal2<VideoCapturer*, const CapturedFrame*,
    261                    sigslot::multi_threaded_local> SignalFrameCaptured;
    262   // Signal the captured and possibly adapted frame to downstream consumers
    263   // such as the encoder.
    264   sigslot::signal2<VideoCapturer*, const VideoFrame*,
    265                    sigslot::multi_threaded_local> SignalVideoFrame;
    266 
    267   const VideoProcessors& video_processors() const { return video_processors_; }
    268 
    269   // If 'screencast_max_pixels' is set greater than zero, screencasts will be
    270   // scaled to be no larger than this value.
    271   // If set to zero, the max pixels will be limited to
    272   // Retina MacBookPro 15" resolution of 2880 x 1800.
    273   // For high fps, maximum pixels limit is set based on common 24" monitor
    274   // resolution of 2048 x 1280.
    275   int screencast_max_pixels() const { return screencast_max_pixels_; }
    276   void set_screencast_max_pixels(int p) {
    277     screencast_max_pixels_ = rtc::_max(0, p);
    278   }
    279 
    280   // If true, run video adaptation. By default, video adaptation is enabled
    281   // and users must call video_adapter()->OnOutputFormatRequest()
    282   // to receive frames.
    283   bool enable_video_adapter() const { return enable_video_adapter_; }
    284   void set_enable_video_adapter(bool enable_video_adapter) {
    285     enable_video_adapter_ = enable_video_adapter;
    286   }
    287 
    288   CoordinatedVideoAdapter* video_adapter() { return &video_adapter_; }
    289   const CoordinatedVideoAdapter* video_adapter() const {
    290     return &video_adapter_;
    291   }
    292 
    293   // Takes ownership.
    294   void set_frame_factory(VideoFrameFactory* frame_factory) {
    295     frame_factory_.reset(frame_factory);
    296   }
    297 
    298   // Gets statistics for tracked variables recorded since the last call to
    299   // GetStats.  Note that calling GetStats resets any gathered data so it
    300   // should be called only periodically to log statistics.
    301   void GetStats(VariableInfo<int>* adapt_drop_stats,
    302                 VariableInfo<int>* effect_drop_stats,
    303                 VariableInfo<double>* frame_time_stats,
    304                 VideoFormat* last_captured_frame_format);
    305 
    306  protected:
    307   // Callback attached to SignalFrameCaptured where SignalVideoFrames is called.
    308   void OnFrameCaptured(VideoCapturer* video_capturer,
    309                        const CapturedFrame* captured_frame);
    310   void SetCaptureState(CaptureState state);
    311 
    312   // Marshals SignalStateChange onto thread_.
    313   void OnMessage(rtc::Message* message);
    314 
    315   // subclasses override this virtual method to provide a vector of fourccs, in
    316   // order of preference, that are expected by the media engine.
    317   virtual bool GetPreferredFourccs(std::vector<uint32>* fourccs) = 0;
    318 
    319   // mutators to set private attributes
    320   void SetId(const std::string& id) {
    321     id_ = id;
    322   }
    323 
    324   void SetCaptureFormat(const VideoFormat* format) {
    325     capture_format_.reset(format ? new VideoFormat(*format) : NULL);
    326     if (capture_format_) {
    327       ASSERT(capture_format_->interval > 0 &&
    328              "Capture format expected to have positive interval.");
    329       // Video adapter really only cares about capture format interval.
    330       video_adapter_.SetInputFormat(*capture_format_);
    331     }
    332   }
    333 
    334   void SetSupportedFormats(const std::vector<VideoFormat>& formats);
    335   VideoFrameFactory* frame_factory() { return frame_factory_.get(); }
    336 
    337  private:
    338   void Construct();
    339   // Get the distance between the desired format and the supported format.
    340   // Return the max distance if they mismatch. See the implementation for
    341   // details.
    342   int64 GetFormatDistance(const VideoFormat& desired,
    343                           const VideoFormat& supported);
    344 
    345   // Convert captured frame to readable string for LOG messages.
    346   std::string ToString(const CapturedFrame* frame) const;
    347 
    348   // Applies all registered processors. If any of the processors signal that
    349   // the frame should be dropped the return value will be false. Note that
    350   // this frame should be dropped as it has not applied all processors.
    351   bool ApplyProcessors(VideoFrame* video_frame);
    352 
    353   // Updates filtered_supported_formats_ so that it contains the formats in
    354   // supported_formats_ that fulfill all applied restrictions.
    355   void UpdateFilteredSupportedFormats();
    356   // Returns true if format doesn't fulfill all applied restrictions.
    357   bool ShouldFilterFormat(const VideoFormat& format) const;
    358 
    359   void UpdateStats(const CapturedFrame* captured_frame);
    360 
    361   // Helper function to save statistics on the current data from a
    362   // RollingAccumulator into stats.
    363   template<class T>
    364   static void GetVariableSnapshot(
    365       const rtc::RollingAccumulator<T>& data,
    366       VariableInfo<T>* stats);
    367 
    368   rtc::Thread* thread_;
    369   std::string id_;
    370   CaptureState capture_state_;
    371   rtc::scoped_ptr<VideoFrameFactory> frame_factory_;
    372   rtc::scoped_ptr<VideoFormat> capture_format_;
    373   std::vector<VideoFormat> supported_formats_;
    374   rtc::scoped_ptr<VideoFormat> max_format_;
    375   std::vector<VideoFormat> filtered_supported_formats_;
    376 
    377   int ratio_w_;  // View resolution. e.g. 1280 x 720.
    378   int ratio_h_;
    379   bool enable_camera_list_;
    380   bool square_pixel_aspect_ratio_;  // Enable scaling to square pixels.
    381   int scaled_width_;  // Current output size from ComputeScale.
    382   int scaled_height_;
    383   int screencast_max_pixels_;  // Downscale screencasts further if requested.
    384   bool muted_;
    385   int black_frame_count_down_;
    386 
    387   bool enable_video_adapter_;
    388   CoordinatedVideoAdapter video_adapter_;
    389 
    390   rtc::Timing frame_length_time_reporter_;
    391   rtc::CriticalSection frame_stats_crit_;
    392 
    393   int adapt_frame_drops_;
    394   rtc::RollingAccumulator<int> adapt_frame_drops_data_;
    395   int effect_frame_drops_;
    396   rtc::RollingAccumulator<int> effect_frame_drops_data_;
    397   double previous_frame_time_;
    398   rtc::RollingAccumulator<double> frame_time_data_;
    399   // The captured frame format before potential adapation.
    400   VideoFormat last_captured_frame_format_;
    401 
    402   rtc::CriticalSection crit_;
    403   VideoProcessors video_processors_;
    404 
    405   DISALLOW_COPY_AND_ASSIGN(VideoCapturer);
    406 };
    407 
    408 }  // namespace cricket
    409 
    410 #endif  // TALK_MEDIA_BASE_VIDEOCAPTURER_H_
    411