1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/browser/speech/speech_input_manager.h" 6 7 #include <map> 8 #include <string> 9 10 #include "base/lazy_instance.h" 11 #include "base/memory/ref_counted.h" 12 #include "base/synchronization/lock.h" 13 #include "base/threading/thread_restrictions.h" 14 #include "base/utf_string_conversions.h" 15 #include "chrome/browser/browser_process.h" 16 #include "chrome/browser/platform_util.h" 17 #include "chrome/browser/prefs/pref_service.h" 18 #include "chrome/browser/speech/speech_input_bubble_controller.h" 19 #include "chrome/browser/tab_contents/tab_util.h" 20 #include "chrome/common/chrome_switches.h" 21 #include "chrome/common/pref_names.h" 22 #include "content/browser/browser_thread.h" 23 #include "content/browser/speech/speech_recognizer.h" 24 #include "grit/generated_resources.h" 25 #include "media/audio/audio_manager.h" 26 #include "ui/base/l10n/l10n_util.h" 27 28 #if defined(OS_WIN) 29 #include "chrome/installer/util/wmi.h" 30 #endif 31 32 namespace speech_input { 33 34 namespace { 35 36 // Asynchronously fetches the PC and audio hardware/driver info if 37 // the user has opted into UMA. This information is sent with speech input 38 // requests to the server for identifying and improving quality issues with 39 // specific device configurations. 40 class OptionalRequestInfo 41 : public base::RefCountedThreadSafe<OptionalRequestInfo> { 42 public: 43 OptionalRequestInfo() : can_report_metrics_(false) {} 44 45 void Refresh() { 46 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 47 // UMA opt-in can be checked only from the UI thread, so switch to that. 48 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, 49 NewRunnableMethod(this, 50 &OptionalRequestInfo::CheckUMAAndGetHardwareInfo)); 51 } 52 53 void CheckUMAAndGetHardwareInfo() { 54 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 55 if (g_browser_process->local_state()->GetBoolean( 56 prefs::kMetricsReportingEnabled)) { 57 // Access potentially slow OS calls from the FILE thread. 58 BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE, 59 NewRunnableMethod(this, &OptionalRequestInfo::GetHardwareInfo)); 60 } 61 } 62 63 void GetHardwareInfo() { 64 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 65 base::AutoLock lock(lock_); 66 can_report_metrics_ = true; 67 #if defined(OS_WIN) 68 value_ = UTF16ToUTF8( 69 installer::WMIComputerSystem::GetModel() + L"|" + 70 AudioManager::GetAudioManager()->GetAudioInputDeviceModel()); 71 #else // defined(OS_WIN) 72 value_ = UTF16ToUTF8( 73 AudioManager::GetAudioManager()->GetAudioInputDeviceModel()); 74 #endif // defined(OS_WIN) 75 } 76 77 std::string value() { 78 base::AutoLock lock(lock_); 79 return value_; 80 } 81 82 bool can_report_metrics() { 83 base::AutoLock lock(lock_); 84 return can_report_metrics_; 85 } 86 87 private: 88 base::Lock lock_; 89 std::string value_; 90 bool can_report_metrics_; 91 92 DISALLOW_COPY_AND_ASSIGN(OptionalRequestInfo); 93 }; 94 95 class SpeechInputManagerImpl : public SpeechInputManager, 96 public SpeechInputBubbleControllerDelegate, 97 public SpeechRecognizerDelegate { 98 public: 99 // SpeechInputManager methods. 100 virtual void StartRecognition(SpeechInputManagerDelegate* delegate, 101 int caller_id, 102 int render_process_id, 103 int render_view_id, 104 const gfx::Rect& element_rect, 105 const std::string& language, 106 const std::string& grammar, 107 const std::string& origin_url); 108 virtual void CancelRecognition(int caller_id); 109 virtual void StopRecording(int caller_id); 110 virtual void CancelAllRequestsWithDelegate( 111 SpeechInputManagerDelegate* delegate); 112 113 // SpeechRecognizer::Delegate methods. 114 virtual void DidStartReceivingAudio(int caller_id); 115 virtual void SetRecognitionResult(int caller_id, 116 bool error, 117 const SpeechInputResultArray& result); 118 virtual void DidCompleteRecording(int caller_id); 119 virtual void DidCompleteRecognition(int caller_id); 120 virtual void OnRecognizerError(int caller_id, 121 SpeechRecognizer::ErrorCode error); 122 virtual void DidCompleteEnvironmentEstimation(int caller_id); 123 virtual void SetInputVolume(int caller_id, float volume, float noise_volume); 124 125 // SpeechInputBubbleController::Delegate methods. 126 virtual void InfoBubbleButtonClicked(int caller_id, 127 SpeechInputBubble::Button button); 128 virtual void InfoBubbleFocusChanged(int caller_id); 129 130 private: 131 struct SpeechInputRequest { 132 SpeechInputManagerDelegate* delegate; 133 scoped_refptr<SpeechRecognizer> recognizer; 134 bool is_active; // Set to true when recording or recognition is going on. 135 }; 136 137 // Private constructor to enforce singleton. 138 friend struct base::DefaultLazyInstanceTraits<SpeechInputManagerImpl>; 139 SpeechInputManagerImpl(); 140 virtual ~SpeechInputManagerImpl(); 141 142 bool HasPendingRequest(int caller_id) const; 143 SpeechInputManagerDelegate* GetDelegate(int caller_id) const; 144 145 void CancelRecognitionAndInformDelegate(int caller_id); 146 147 // Starts/restarts recognition for an existing request. 148 void StartRecognitionForRequest(int caller_id); 149 150 typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap; 151 SpeechRecognizerMap requests_; 152 int recording_caller_id_; 153 scoped_refptr<SpeechInputBubbleController> bubble_controller_; 154 scoped_refptr<OptionalRequestInfo> optional_request_info_; 155 }; 156 157 base::LazyInstance<SpeechInputManagerImpl> g_speech_input_manager_impl( 158 base::LINKER_INITIALIZED); 159 160 } // namespace 161 162 SpeechInputManager* SpeechInputManager::Get() { 163 return g_speech_input_manager_impl.Pointer(); 164 } 165 166 void SpeechInputManager::ShowAudioInputSettings() { 167 // Since AudioManager::ShowAudioInputSettings can potentially launch external 168 // processes, do that in the FILE thread to not block the calling threads. 169 if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) { 170 BrowserThread::PostTask( 171 BrowserThread::FILE, FROM_HERE, 172 NewRunnableFunction(&SpeechInputManager::ShowAudioInputSettings)); 173 return; 174 } 175 176 DCHECK(AudioManager::GetAudioManager()->CanShowAudioInputSettings()); 177 if (AudioManager::GetAudioManager()->CanShowAudioInputSettings()) 178 AudioManager::GetAudioManager()->ShowAudioInputSettings(); 179 } 180 181 SpeechInputManagerImpl::SpeechInputManagerImpl() 182 : recording_caller_id_(0), 183 bubble_controller_(new SpeechInputBubbleController( 184 ALLOW_THIS_IN_INITIALIZER_LIST(this))) { 185 } 186 187 SpeechInputManagerImpl::~SpeechInputManagerImpl() { 188 while (requests_.begin() != requests_.end()) 189 CancelRecognition(requests_.begin()->first); 190 } 191 192 bool SpeechInputManagerImpl::HasPendingRequest(int caller_id) const { 193 return requests_.find(caller_id) != requests_.end(); 194 } 195 196 SpeechInputManagerDelegate* SpeechInputManagerImpl::GetDelegate( 197 int caller_id) const { 198 return requests_.find(caller_id)->second.delegate; 199 } 200 201 void SpeechInputManagerImpl::StartRecognition( 202 SpeechInputManagerDelegate* delegate, 203 int caller_id, 204 int render_process_id, 205 int render_view_id, 206 const gfx::Rect& element_rect, 207 const std::string& language, 208 const std::string& grammar, 209 const std::string& origin_url) { 210 DCHECK(!HasPendingRequest(caller_id)); 211 212 bubble_controller_->CreateBubble(caller_id, render_process_id, render_view_id, 213 element_rect); 214 215 if (!optional_request_info_.get()) { 216 optional_request_info_ = new OptionalRequestInfo(); 217 // Since hardware info is optional with speech input requests, we start an 218 // asynchronous fetch here and move on with recording audio. This first 219 // speech input request would send an empty string for hardware info and 220 // subsequent requests may have the hardware info available if the fetch 221 // completed before them. This way we don't end up stalling the user with 222 // a long wait and disk seeks when they click on a UI element and start 223 // speaking. 224 optional_request_info_->Refresh(); 225 } 226 227 SpeechInputRequest* request = &requests_[caller_id]; 228 request->delegate = delegate; 229 request->recognizer = new SpeechRecognizer( 230 this, caller_id, language, grammar, optional_request_info_->value(), 231 optional_request_info_->can_report_metrics() ? origin_url : ""); 232 request->is_active = false; 233 234 StartRecognitionForRequest(caller_id); 235 } 236 237 void SpeechInputManagerImpl::StartRecognitionForRequest(int caller_id) { 238 DCHECK(HasPendingRequest(caller_id)); 239 240 // If we are currently recording audio for another caller, abort that cleanly. 241 if (recording_caller_id_) 242 CancelRecognitionAndInformDelegate(recording_caller_id_); 243 244 if (!AudioManager::GetAudioManager()->HasAudioInputDevices()) { 245 bubble_controller_->SetBubbleMessage( 246 caller_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC)); 247 } else { 248 recording_caller_id_ = caller_id; 249 requests_[caller_id].is_active = true; 250 requests_[caller_id].recognizer->StartRecording(); 251 bubble_controller_->SetBubbleWarmUpMode(caller_id); 252 } 253 } 254 255 void SpeechInputManagerImpl::CancelRecognition(int caller_id) { 256 DCHECK(HasPendingRequest(caller_id)); 257 if (requests_[caller_id].is_active) 258 requests_[caller_id].recognizer->CancelRecognition(); 259 requests_.erase(caller_id); 260 if (recording_caller_id_ == caller_id) 261 recording_caller_id_ = 0; 262 bubble_controller_->CloseBubble(caller_id); 263 } 264 265 void SpeechInputManagerImpl::CancelAllRequestsWithDelegate( 266 SpeechInputManagerDelegate* delegate) { 267 SpeechRecognizerMap::iterator it = requests_.begin(); 268 while (it != requests_.end()) { 269 if (it->second.delegate == delegate) { 270 CancelRecognition(it->first); 271 // This map will have very few elements so it is simpler to restart. 272 it = requests_.begin(); 273 } else { 274 ++it; 275 } 276 } 277 } 278 279 void SpeechInputManagerImpl::StopRecording(int caller_id) { 280 DCHECK(HasPendingRequest(caller_id)); 281 requests_[caller_id].recognizer->StopRecording(); 282 } 283 284 void SpeechInputManagerImpl::SetRecognitionResult( 285 int caller_id, bool error, const SpeechInputResultArray& result) { 286 DCHECK(HasPendingRequest(caller_id)); 287 GetDelegate(caller_id)->SetRecognitionResult(caller_id, result); 288 } 289 290 void SpeechInputManagerImpl::DidCompleteRecording(int caller_id) { 291 DCHECK(recording_caller_id_ == caller_id); 292 DCHECK(HasPendingRequest(caller_id)); 293 recording_caller_id_ = 0; 294 GetDelegate(caller_id)->DidCompleteRecording(caller_id); 295 bubble_controller_->SetBubbleRecognizingMode(caller_id); 296 } 297 298 void SpeechInputManagerImpl::DidCompleteRecognition(int caller_id) { 299 GetDelegate(caller_id)->DidCompleteRecognition(caller_id); 300 requests_.erase(caller_id); 301 bubble_controller_->CloseBubble(caller_id); 302 } 303 304 void SpeechInputManagerImpl::OnRecognizerError( 305 int caller_id, SpeechRecognizer::ErrorCode error) { 306 if (caller_id == recording_caller_id_) 307 recording_caller_id_ = 0; 308 309 requests_[caller_id].is_active = false; 310 311 struct ErrorMessageMapEntry { 312 SpeechRecognizer::ErrorCode error; 313 int message_id; 314 }; 315 ErrorMessageMapEntry error_message_map[] = { 316 { 317 SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE, IDS_SPEECH_INPUT_MIC_ERROR 318 }, { 319 SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH, IDS_SPEECH_INPUT_NO_SPEECH 320 }, { 321 SpeechRecognizer::RECOGNIZER_ERROR_NO_RESULTS, IDS_SPEECH_INPUT_NO_RESULTS 322 }, { 323 SpeechRecognizer::RECOGNIZER_ERROR_NETWORK, IDS_SPEECH_INPUT_NET_ERROR 324 } 325 }; 326 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(error_message_map); ++i) { 327 if (error_message_map[i].error == error) { 328 bubble_controller_->SetBubbleMessage( 329 caller_id, 330 l10n_util::GetStringUTF16(error_message_map[i].message_id)); 331 return; 332 } 333 } 334 335 NOTREACHED() << "unknown error " << error; 336 } 337 338 void SpeechInputManagerImpl::DidStartReceivingAudio(int caller_id) { 339 DCHECK(HasPendingRequest(caller_id)); 340 DCHECK(recording_caller_id_ == caller_id); 341 bubble_controller_->SetBubbleRecordingMode(caller_id); 342 } 343 344 void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) { 345 DCHECK(HasPendingRequest(caller_id)); 346 DCHECK(recording_caller_id_ == caller_id); 347 } 348 349 void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume, 350 float noise_volume) { 351 DCHECK(HasPendingRequest(caller_id)); 352 DCHECK_EQ(recording_caller_id_, caller_id); 353 354 bubble_controller_->SetBubbleInputVolume(caller_id, volume, noise_volume); 355 } 356 357 void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) { 358 SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id); 359 CancelRecognition(caller_id); 360 cur_delegate->DidCompleteRecording(caller_id); 361 cur_delegate->DidCompleteRecognition(caller_id); 362 } 363 364 void SpeechInputManagerImpl::InfoBubbleButtonClicked( 365 int caller_id, SpeechInputBubble::Button button) { 366 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 367 // Ignore if the caller id was not in our active recognizers list because the 368 // user might have clicked more than once, or recognition could have been 369 // cancelled due to other reasons before the user click was processed. 370 if (!HasPendingRequest(caller_id)) 371 return; 372 373 if (button == SpeechInputBubble::BUTTON_CANCEL) { 374 CancelRecognitionAndInformDelegate(caller_id); 375 } else if (button == SpeechInputBubble::BUTTON_TRY_AGAIN) { 376 StartRecognitionForRequest(caller_id); 377 } 378 } 379 380 void SpeechInputManagerImpl::InfoBubbleFocusChanged(int caller_id) { 381 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 382 // Ignore if the caller id was not in our active recognizers list because the 383 // user might have clicked more than once, or recognition could have been 384 // ended due to other reasons before the user click was processed. 385 if (HasPendingRequest(caller_id)) { 386 // If this is an ongoing recording or if we were displaying an error message 387 // to the user, abort it since user has switched focus. Otherwise 388 // recognition has started and keep that going so user can start speaking to 389 // another element while this gets the results in parallel. 390 if (recording_caller_id_ == caller_id || !requests_[caller_id].is_active) { 391 CancelRecognitionAndInformDelegate(caller_id); 392 } 393 } 394 } 395 396 } // namespace speech_input 397