1 /* 2 * libjingle 3 * Copyright 2011 Google Inc. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 3. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "talk/session/media/currentspeakermonitor.h" 29 30 #include "talk/media/base/streamparams.h" 31 #include "talk/session/media/audiomonitor.h" 32 #include "webrtc/base/logging.h" 33 34 namespace cricket { 35 36 namespace { 37 const int kMaxAudioLevel = 9; 38 // To avoid overswitching, we disable switching for a period of time after a 39 // switch is done. 40 const int kDefaultMinTimeBetweenSwitches = 1000; 41 } 42 43 CurrentSpeakerMonitor::CurrentSpeakerMonitor( 44 AudioSourceContext* audio_source_context) 45 : started_(false), 46 audio_source_context_(audio_source_context), 47 current_speaker_ssrc_(0), 48 earliest_permitted_switch_time_(0), 49 min_time_between_switches_(kDefaultMinTimeBetweenSwitches) {} 50 51 CurrentSpeakerMonitor::~CurrentSpeakerMonitor() { 52 Stop(); 53 } 54 55 void CurrentSpeakerMonitor::Start() { 56 if (!started_) { 57 audio_source_context_->SignalAudioMonitor.connect( 58 this, &CurrentSpeakerMonitor::OnAudioMonitor); 59 audio_source_context_->SignalMediaStreamsUpdate.connect( 60 this, &CurrentSpeakerMonitor::OnMediaStreamsUpdate); 61 audio_source_context_->SignalMediaStreamsReset.connect( 62 this, &CurrentSpeakerMonitor::OnMediaStreamsReset); 63 64 started_ = true; 65 } 66 } 67 68 void CurrentSpeakerMonitor::Stop() { 69 if (started_) { 70 audio_source_context_->SignalAudioMonitor.disconnect(this); 71 audio_source_context_->SignalMediaStreamsUpdate.disconnect(this); 72 73 started_ = false; 74 ssrc_to_speaking_state_map_.clear(); 75 current_speaker_ssrc_ = 0; 76 earliest_permitted_switch_time_ = 0; 77 } 78 } 79 80 void CurrentSpeakerMonitor::set_min_time_between_switches( 81 uint32_t min_time_between_switches) { 82 min_time_between_switches_ = min_time_between_switches; 83 } 84 85 void CurrentSpeakerMonitor::OnAudioMonitor( 86 AudioSourceContext* audio_source_context, const AudioInfo& info) { 87 std::map<uint32_t, int> active_ssrc_to_level_map; 88 cricket::AudioInfo::StreamList::const_iterator stream_list_it; 89 for (stream_list_it = info.active_streams.begin(); 90 stream_list_it != info.active_streams.end(); ++stream_list_it) { 91 uint32_t ssrc = stream_list_it->first; 92 active_ssrc_to_level_map[ssrc] = stream_list_it->second; 93 94 // It's possible we haven't yet added this source to our map. If so, 95 // add it now with a "not speaking" state. 96 if (ssrc_to_speaking_state_map_.find(ssrc) == 97 ssrc_to_speaking_state_map_.end()) { 98 ssrc_to_speaking_state_map_[ssrc] = SS_NOT_SPEAKING; 99 } 100 } 101 102 int max_level = 0; 103 uint32_t loudest_speaker_ssrc = 0; 104 105 // Update the speaking states of all participants based on the new audio 106 // level information. Also retain loudest speaker. 107 std::map<uint32_t, SpeakingState>::iterator state_it; 108 for (state_it = ssrc_to_speaking_state_map_.begin(); 109 state_it != ssrc_to_speaking_state_map_.end(); ++state_it) { 110 bool is_previous_speaker = current_speaker_ssrc_ == state_it->first; 111 112 // This uses a state machine in order to gradually identify 113 // members as having started or stopped speaking. Matches the 114 // algorithm used by the hangouts js code. 115 116 std::map<uint32_t, int>::const_iterator level_it = 117 active_ssrc_to_level_map.find(state_it->first); 118 // Note that the stream map only contains streams with non-zero audio 119 // levels. 120 int level = (level_it != active_ssrc_to_level_map.end()) ? 121 level_it->second : 0; 122 switch (state_it->second) { 123 case SS_NOT_SPEAKING: 124 if (level > 0) { 125 // Reset level because we don't think they're really speaking. 126 level = 0; 127 state_it->second = SS_MIGHT_BE_SPEAKING; 128 } else { 129 // State unchanged. 130 } 131 break; 132 case SS_MIGHT_BE_SPEAKING: 133 if (level > 0) { 134 state_it->second = SS_SPEAKING; 135 } else { 136 state_it->second = SS_NOT_SPEAKING; 137 } 138 break; 139 case SS_SPEAKING: 140 if (level > 0) { 141 // State unchanged. 142 } else { 143 state_it->second = SS_WAS_SPEAKING_RECENTLY1; 144 if (is_previous_speaker) { 145 // Assume this is an inter-word silence and assign him the highest 146 // volume. 147 level = kMaxAudioLevel; 148 } 149 } 150 break; 151 case SS_WAS_SPEAKING_RECENTLY1: 152 if (level > 0) { 153 state_it->second = SS_SPEAKING; 154 } else { 155 state_it->second = SS_WAS_SPEAKING_RECENTLY2; 156 if (is_previous_speaker) { 157 // Assume this is an inter-word silence and assign him the highest 158 // volume. 159 level = kMaxAudioLevel; 160 } 161 } 162 break; 163 case SS_WAS_SPEAKING_RECENTLY2: 164 if (level > 0) { 165 state_it->second = SS_SPEAKING; 166 } else { 167 state_it->second = SS_NOT_SPEAKING; 168 } 169 break; 170 } 171 172 if (level > max_level) { 173 loudest_speaker_ssrc = state_it->first; 174 max_level = level; 175 } else if (level > 0 && level == max_level && is_previous_speaker) { 176 // Favor continuity of loudest speakers if audio levels are equal. 177 loudest_speaker_ssrc = state_it->first; 178 } 179 } 180 181 // We avoid over-switching by disabling switching for a period of time after 182 // a switch is done. 183 uint32_t now = rtc::Time(); 184 if (earliest_permitted_switch_time_ <= now && 185 current_speaker_ssrc_ != loudest_speaker_ssrc) { 186 current_speaker_ssrc_ = loudest_speaker_ssrc; 187 LOG(LS_INFO) << "Current speaker changed to " << current_speaker_ssrc_; 188 earliest_permitted_switch_time_ = now + min_time_between_switches_; 189 SignalUpdate(this, current_speaker_ssrc_); 190 } 191 } 192 193 void CurrentSpeakerMonitor::OnMediaStreamsUpdate( 194 AudioSourceContext* audio_source_context, 195 const MediaStreams& added, 196 const MediaStreams& removed) { 197 if (audio_source_context == audio_source_context_) { 198 // Update the speaking state map based on added and removed streams. 199 for (std::vector<cricket::StreamParams>::const_iterator 200 it = removed.audio().begin(); it != removed.audio().end(); ++it) { 201 ssrc_to_speaking_state_map_.erase(it->first_ssrc()); 202 } 203 204 for (std::vector<cricket::StreamParams>::const_iterator 205 it = added.audio().begin(); it != added.audio().end(); ++it) { 206 ssrc_to_speaking_state_map_[it->first_ssrc()] = SS_NOT_SPEAKING; 207 } 208 } 209 } 210 211 void CurrentSpeakerMonitor::OnMediaStreamsReset( 212 AudioSourceContext* audio_source_context) { 213 if (audio_source_context == audio_source_context_) { 214 ssrc_to_speaking_state_map_.clear(); 215 } 216 } 217 218 } // namespace cricket 219