Home | History | Annotate | Download | only in media
      1 /*
      2  * libjingle
      3  * Copyright 2011 Google Inc.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions are met:
      7  *
      8  *  1. Redistributions of source code must retain the above copyright notice,
      9  *     this list of conditions and the following disclaimer.
     10  *  2. Redistributions in binary form must reproduce the above copyright notice,
     11  *     this list of conditions and the following disclaimer in the documentation
     12  *     and/or other materials provided with the distribution.
     13  *  3. The name of the author may not be used to endorse or promote products
     14  *     derived from this software without specific prior written permission.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
     17  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     18  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
     19  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     20  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
     22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
     24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
     25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "talk/session/media/currentspeakermonitor.h"
     29 
     30 #include "talk/media/base/streamparams.h"
     31 #include "talk/session/media/audiomonitor.h"
     32 #include "webrtc/base/logging.h"
     33 
     34 namespace cricket {
     35 
     36 namespace {
     37 const int kMaxAudioLevel = 9;
     38 // To avoid overswitching, we disable switching for a period of time after a
     39 // switch is done.
     40 const int kDefaultMinTimeBetweenSwitches = 1000;
     41 }
     42 
     43 CurrentSpeakerMonitor::CurrentSpeakerMonitor(
     44     AudioSourceContext* audio_source_context)
     45     : started_(false),
     46       audio_source_context_(audio_source_context),
     47       current_speaker_ssrc_(0),
     48       earliest_permitted_switch_time_(0),
     49       min_time_between_switches_(kDefaultMinTimeBetweenSwitches) {}
     50 
     51 CurrentSpeakerMonitor::~CurrentSpeakerMonitor() {
     52   Stop();
     53 }
     54 
     55 void CurrentSpeakerMonitor::Start() {
     56   if (!started_) {
     57     audio_source_context_->SignalAudioMonitor.connect(
     58         this, &CurrentSpeakerMonitor::OnAudioMonitor);
     59     audio_source_context_->SignalMediaStreamsUpdate.connect(
     60         this, &CurrentSpeakerMonitor::OnMediaStreamsUpdate);
     61     audio_source_context_->SignalMediaStreamsReset.connect(
     62         this, &CurrentSpeakerMonitor::OnMediaStreamsReset);
     63 
     64     started_ = true;
     65   }
     66 }
     67 
     68 void CurrentSpeakerMonitor::Stop() {
     69   if (started_) {
     70     audio_source_context_->SignalAudioMonitor.disconnect(this);
     71     audio_source_context_->SignalMediaStreamsUpdate.disconnect(this);
     72 
     73     started_ = false;
     74     ssrc_to_speaking_state_map_.clear();
     75     current_speaker_ssrc_ = 0;
     76     earliest_permitted_switch_time_ = 0;
     77   }
     78 }
     79 
     80 void CurrentSpeakerMonitor::set_min_time_between_switches(
     81     uint32_t min_time_between_switches) {
     82   min_time_between_switches_ = min_time_between_switches;
     83 }
     84 
     85 void CurrentSpeakerMonitor::OnAudioMonitor(
     86     AudioSourceContext* audio_source_context, const AudioInfo& info) {
     87   std::map<uint32_t, int> active_ssrc_to_level_map;
     88   cricket::AudioInfo::StreamList::const_iterator stream_list_it;
     89   for (stream_list_it = info.active_streams.begin();
     90        stream_list_it != info.active_streams.end(); ++stream_list_it) {
     91     uint32_t ssrc = stream_list_it->first;
     92     active_ssrc_to_level_map[ssrc] = stream_list_it->second;
     93 
     94     // It's possible we haven't yet added this source to our map.  If so,
     95     // add it now with a "not speaking" state.
     96     if (ssrc_to_speaking_state_map_.find(ssrc) ==
     97         ssrc_to_speaking_state_map_.end()) {
     98       ssrc_to_speaking_state_map_[ssrc] = SS_NOT_SPEAKING;
     99     }
    100   }
    101 
    102   int max_level = 0;
    103   uint32_t loudest_speaker_ssrc = 0;
    104 
    105   // Update the speaking states of all participants based on the new audio
    106   // level information.  Also retain loudest speaker.
    107   std::map<uint32_t, SpeakingState>::iterator state_it;
    108   for (state_it = ssrc_to_speaking_state_map_.begin();
    109        state_it != ssrc_to_speaking_state_map_.end(); ++state_it) {
    110     bool is_previous_speaker = current_speaker_ssrc_ == state_it->first;
    111 
    112     // This uses a state machine in order to gradually identify
    113     // members as having started or stopped speaking. Matches the
    114     // algorithm used by the hangouts js code.
    115 
    116     std::map<uint32_t, int>::const_iterator level_it =
    117         active_ssrc_to_level_map.find(state_it->first);
    118     // Note that the stream map only contains streams with non-zero audio
    119     // levels.
    120     int level = (level_it != active_ssrc_to_level_map.end()) ?
    121         level_it->second : 0;
    122     switch (state_it->second) {
    123       case SS_NOT_SPEAKING:
    124         if (level > 0) {
    125           // Reset level because we don't think they're really speaking.
    126           level = 0;
    127           state_it->second = SS_MIGHT_BE_SPEAKING;
    128         } else {
    129           // State unchanged.
    130         }
    131         break;
    132       case SS_MIGHT_BE_SPEAKING:
    133         if (level > 0) {
    134           state_it->second = SS_SPEAKING;
    135         } else {
    136           state_it->second = SS_NOT_SPEAKING;
    137         }
    138         break;
    139       case SS_SPEAKING:
    140         if (level > 0) {
    141           // State unchanged.
    142         } else {
    143           state_it->second = SS_WAS_SPEAKING_RECENTLY1;
    144           if (is_previous_speaker) {
    145             // Assume this is an inter-word silence and assign him the highest
    146             // volume.
    147             level = kMaxAudioLevel;
    148           }
    149         }
    150         break;
    151       case SS_WAS_SPEAKING_RECENTLY1:
    152         if (level > 0) {
    153           state_it->second = SS_SPEAKING;
    154         } else {
    155           state_it->second = SS_WAS_SPEAKING_RECENTLY2;
    156           if (is_previous_speaker) {
    157             // Assume this is an inter-word silence and assign him the highest
    158             // volume.
    159             level = kMaxAudioLevel;
    160           }
    161         }
    162         break;
    163       case SS_WAS_SPEAKING_RECENTLY2:
    164         if (level > 0) {
    165           state_it->second = SS_SPEAKING;
    166         } else {
    167           state_it->second = SS_NOT_SPEAKING;
    168         }
    169         break;
    170     }
    171 
    172     if (level > max_level) {
    173       loudest_speaker_ssrc = state_it->first;
    174       max_level = level;
    175     } else if (level > 0 && level == max_level && is_previous_speaker) {
    176       // Favor continuity of loudest speakers if audio levels are equal.
    177       loudest_speaker_ssrc = state_it->first;
    178     }
    179   }
    180 
    181   // We avoid over-switching by disabling switching for a period of time after
    182   // a switch is done.
    183   uint32_t now = rtc::Time();
    184   if (earliest_permitted_switch_time_ <= now &&
    185       current_speaker_ssrc_ != loudest_speaker_ssrc) {
    186     current_speaker_ssrc_ = loudest_speaker_ssrc;
    187     LOG(LS_INFO) << "Current speaker changed to " << current_speaker_ssrc_;
    188     earliest_permitted_switch_time_ = now + min_time_between_switches_;
    189     SignalUpdate(this, current_speaker_ssrc_);
    190   }
    191 }
    192 
    193 void CurrentSpeakerMonitor::OnMediaStreamsUpdate(
    194     AudioSourceContext* audio_source_context,
    195     const MediaStreams& added,
    196     const MediaStreams& removed) {
    197   if (audio_source_context == audio_source_context_) {
    198     // Update the speaking state map based on added and removed streams.
    199     for (std::vector<cricket::StreamParams>::const_iterator
    200            it = removed.audio().begin(); it != removed.audio().end(); ++it) {
    201       ssrc_to_speaking_state_map_.erase(it->first_ssrc());
    202     }
    203 
    204     for (std::vector<cricket::StreamParams>::const_iterator
    205            it = added.audio().begin(); it != added.audio().end(); ++it) {
    206       ssrc_to_speaking_state_map_[it->first_ssrc()] = SS_NOT_SPEAKING;
    207     }
    208   }
    209 }
    210 
    211 void CurrentSpeakerMonitor::OnMediaStreamsReset(
    212     AudioSourceContext* audio_source_context) {
    213   if (audio_source_context == audio_source_context_) {
    214     ssrc_to_speaking_state_map_.clear();
    215   }
    216 }
    217 
    218 }  // namespace cricket
    219