1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/speech/chrome_speech_recognition_manager_delegate.h" 6 7 #include <set> 8 #include <string> 9 10 #include "base/bind.h" 11 #include "base/prefs/pref_service.h" 12 #include "base/strings/utf_string_conversions.h" 13 #include "base/synchronization/lock.h" 14 #include "base/threading/thread_restrictions.h" 15 #include "chrome/browser/browser_process.h" 16 #include "chrome/browser/extensions/extension_service.h" 17 #include "chrome/browser/profiles/profile_manager.h" 18 #include "chrome/browser/tab_contents/tab_util.h" 19 #include "chrome/common/pref_names.h" 20 #include "chrome/common/url_constants.h" 21 #include "content/public/browser/browser_thread.h" 22 #include "content/public/browser/notification_registrar.h" 23 #include "content/public/browser/notification_source.h" 24 #include "content/public/browser/notification_types.h" 25 #include "content/public/browser/render_process_host.h" 26 #include "content/public/browser/render_view_host.h" 27 #include "content/public/browser/resource_context.h" 28 #include "content/public/browser/speech_recognition_manager.h" 29 #include "content/public/browser/speech_recognition_session_config.h" 30 #include "content/public/browser/speech_recognition_session_context.h" 31 #include "content/public/browser/web_contents.h" 32 #include "content/public/common/speech_recognition_error.h" 33 #include "content/public/common/speech_recognition_result.h" 34 #include "extensions/browser/view_type_utils.h" 35 #include "net/url_request/url_request_context_getter.h" 36 37 #if defined(OS_WIN) 38 #include "chrome/installer/util/wmi.h" 39 #endif 40 41 using content::BrowserThread; 42 using content::SpeechRecognitionManager; 43 using content::WebContents; 44 45 namespace speech { 46 47 namespace { 48 49 void TabClosedCallbackOnIOThread(int render_process_id, int render_view_id) { 50 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 51 52 SpeechRecognitionManager* manager = SpeechRecognitionManager::GetInstance(); 53 // |manager| becomes NULL if a browser shutdown happens between the post of 54 // this task (from the UI thread) and this call (on the IO thread). In this 55 // case we just return. 56 if (!manager) 57 return; 58 59 manager->AbortAllSessionsForRenderView(render_process_id, render_view_id); 60 } 61 62 } // namespace 63 64 65 // Asynchronously fetches the PC and audio hardware/driver info if 66 // the user has opted into UMA. This information is sent with speech input 67 // requests to the server for identifying and improving quality issues with 68 // specific device configurations. 69 class ChromeSpeechRecognitionManagerDelegate::OptionalRequestInfo 70 : public base::RefCountedThreadSafe<OptionalRequestInfo> { 71 public: 72 OptionalRequestInfo() : can_report_metrics_(false) { 73 } 74 75 void Refresh() { 76 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 77 // UMA opt-in can be checked only from the UI thread, so switch to that. 78 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, 79 base::Bind(&OptionalRequestInfo::CheckUMAAndGetHardwareInfo, this)); 80 } 81 82 void CheckUMAAndGetHardwareInfo() { 83 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 84 // prefs::kMetricsReportingEnabled is not registered for OS_CHROMEOS. 85 #if !defined(OS_CHROMEOS) 86 if (g_browser_process->local_state()->GetBoolean( 87 prefs::kMetricsReportingEnabled)) { 88 // Access potentially slow OS calls from the FILE thread. 89 BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE, 90 base::Bind(&OptionalRequestInfo::GetHardwareInfo, this)); 91 } 92 #endif 93 } 94 95 void GetHardwareInfo() { 96 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 97 base::AutoLock lock(lock_); 98 can_report_metrics_ = true; 99 base::string16 device_model = 100 SpeechRecognitionManager::GetInstance()->GetAudioInputDeviceModel(); 101 #if defined(OS_WIN) 102 value_ = UTF16ToUTF8( 103 installer::WMIComputerSystem::GetModel() + L"|" + device_model); 104 #else // defined(OS_WIN) 105 value_ = UTF16ToUTF8(device_model); 106 #endif // defined(OS_WIN) 107 } 108 109 std::string value() { 110 base::AutoLock lock(lock_); 111 return value_; 112 } 113 114 bool can_report_metrics() { 115 base::AutoLock lock(lock_); 116 return can_report_metrics_; 117 } 118 119 private: 120 friend class base::RefCountedThreadSafe<OptionalRequestInfo>; 121 122 ~OptionalRequestInfo() {} 123 124 base::Lock lock_; 125 std::string value_; 126 bool can_report_metrics_; 127 128 DISALLOW_COPY_AND_ASSIGN(OptionalRequestInfo); 129 }; 130 131 // Simple utility to get notified when a WebContent (a tab or an extension's 132 // background page) is closed or crashes. The callback will always be called on 133 // the UI thread. 134 // There is no restriction on the constructor, however this class must be 135 // destroyed on the UI thread, due to the NotificationRegistrar dependency. 136 class ChromeSpeechRecognitionManagerDelegate::TabWatcher 137 : public base::RefCountedThreadSafe<TabWatcher>, 138 public content::NotificationObserver { 139 public: 140 typedef base::Callback<void(int render_process_id, int render_view_id)> 141 TabClosedCallback; 142 143 explicit TabWatcher(TabClosedCallback tab_closed_callback) 144 : tab_closed_callback_(tab_closed_callback) { 145 } 146 147 // Starts monitoring the WebContents corresponding to the given 148 // |render_process_id|, |render_view_id| pair, invoking |tab_closed_callback_| 149 // if closed/unloaded. 150 void Watch(int render_process_id, int render_view_id) { 151 if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) { 152 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, base::Bind( 153 &TabWatcher::Watch, this, render_process_id, render_view_id)); 154 return; 155 } 156 157 WebContents* web_contents = tab_util::GetWebContentsByID(render_process_id, 158 render_view_id); 159 // Sessions initiated by speech input extension APIs will end up in a NULL 160 // WebContent here, but they are properly managed by the 161 // chrome::SpeechInputExtensionManager. However, sessions initiated within a 162 // extension using the (new) speech JS APIs, will be properly handled here. 163 // TODO(primiano) turn this line into a DCHECK once speech input extension 164 // API is deprecated. 165 if (!web_contents) 166 return; 167 168 // Avoid multiple registrations on |registrar_| for the same |web_contents|. 169 if (FindWebContents(web_contents) != registered_web_contents_.end()) { 170 return; 171 } 172 registered_web_contents_.push_back( 173 WebContentsInfo(web_contents, render_process_id, render_view_id)); 174 175 // Lazy initialize the registrar. 176 if (!registrar_.get()) 177 registrar_.reset(new content::NotificationRegistrar()); 178 179 registrar_->Add(this, 180 content::NOTIFICATION_RENDER_VIEW_HOST_CHANGED, 181 content::Source<WebContents>(web_contents)); 182 registrar_->Add(this, 183 content::NOTIFICATION_WEB_CONTENTS_DISCONNECTED, 184 content::Source<WebContents>(web_contents)); 185 } 186 187 // content::NotificationObserver implementation. 188 virtual void Observe(int type, 189 const content::NotificationSource& source, 190 const content::NotificationDetails& details) OVERRIDE { 191 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 192 DCHECK(type == content::NOTIFICATION_WEB_CONTENTS_DISCONNECTED || 193 type == content::NOTIFICATION_RENDER_VIEW_HOST_CHANGED); 194 195 WebContents* web_contents = content::Source<WebContents>(source).ptr(); 196 std::vector<WebContentsInfo>::iterator iter = FindWebContents(web_contents); 197 DCHECK(iter != registered_web_contents_.end()); 198 int render_process_id = iter->render_process_id; 199 int render_view_id = iter->render_view_id; 200 registered_web_contents_.erase(iter); 201 202 registrar_->Remove(this, 203 content::NOTIFICATION_RENDER_VIEW_HOST_CHANGED, 204 content::Source<WebContents>(web_contents)); 205 registrar_->Remove(this, 206 content::NOTIFICATION_WEB_CONTENTS_DISCONNECTED, 207 content::Source<WebContents>(web_contents)); 208 209 tab_closed_callback_.Run(render_process_id, render_view_id); 210 } 211 212 private: 213 struct WebContentsInfo { 214 WebContentsInfo(content::WebContents* web_contents, 215 int render_process_id, 216 int render_view_id) 217 : web_contents(web_contents), 218 render_process_id(render_process_id), 219 render_view_id(render_view_id) {} 220 221 ~WebContentsInfo() {} 222 223 content::WebContents* web_contents; 224 int render_process_id; 225 int render_view_id; 226 }; 227 228 friend class base::RefCountedThreadSafe<TabWatcher>; 229 230 virtual ~TabWatcher() { 231 // Must be destroyed on the UI thread due to |registrar_| non thread-safety. 232 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 233 } 234 235 // Helper function to find the iterator in |registered_web_contents_| which 236 // contains |web_contents|. 237 std::vector<WebContentsInfo>::iterator FindWebContents( 238 content::WebContents* web_contents) { 239 for (std::vector<WebContentsInfo>::iterator i( 240 registered_web_contents_.begin()); 241 i != registered_web_contents_.end(); ++i) { 242 if (i->web_contents == web_contents) 243 return i; 244 } 245 246 return registered_web_contents_.end(); 247 } 248 249 // Lazy-initialized and used on the UI thread to handle web contents 250 // notifications (tab closing). 251 scoped_ptr<content::NotificationRegistrar> registrar_; 252 253 // Keeps track of which WebContent(s) have been registered, in order to avoid 254 // double registrations on |registrar_| and to pass the correct render 255 // process id and render view id to |tab_closed_callback_| after the process 256 // has gone away. 257 std::vector<WebContentsInfo> registered_web_contents_; 258 259 // Callback used to notify, on the thread specified by |callback_thread_| the 260 // closure of a registered tab. 261 TabClosedCallback tab_closed_callback_; 262 263 DISALLOW_COPY_AND_ASSIGN(TabWatcher); 264 }; 265 266 ChromeSpeechRecognitionManagerDelegate 267 ::ChromeSpeechRecognitionManagerDelegate() { 268 } 269 270 ChromeSpeechRecognitionManagerDelegate 271 ::~ChromeSpeechRecognitionManagerDelegate() { 272 } 273 274 void ChromeSpeechRecognitionManagerDelegate::TabClosedCallback( 275 int render_process_id, int render_view_id) { 276 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 277 278 // Tell the S.R. Manager (which lives on the IO thread) to abort all the 279 // sessions for the given renderer view. 280 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, base::Bind( 281 &TabClosedCallbackOnIOThread, render_process_id, render_view_id)); 282 } 283 284 void ChromeSpeechRecognitionManagerDelegate::OnRecognitionStart( 285 int session_id) { 286 const content::SpeechRecognitionSessionContext& context = 287 SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id); 288 289 // Register callback to auto abort session on tab closure. 290 // |tab_watcher_| is lazyly istantiated on the first call. 291 if (!tab_watcher_.get()) { 292 tab_watcher_ = new TabWatcher( 293 base::Bind(&ChromeSpeechRecognitionManagerDelegate::TabClosedCallback, 294 base::Unretained(this))); 295 } 296 tab_watcher_->Watch(context.render_process_id, context.render_view_id); 297 } 298 299 void ChromeSpeechRecognitionManagerDelegate::OnAudioStart(int session_id) { 300 } 301 302 void ChromeSpeechRecognitionManagerDelegate::OnEnvironmentEstimationComplete( 303 int session_id) { 304 } 305 306 void ChromeSpeechRecognitionManagerDelegate::OnSoundStart(int session_id) { 307 } 308 309 void ChromeSpeechRecognitionManagerDelegate::OnSoundEnd(int session_id) { 310 } 311 312 void ChromeSpeechRecognitionManagerDelegate::OnAudioEnd(int session_id) { 313 } 314 315 void ChromeSpeechRecognitionManagerDelegate::OnRecognitionResults( 316 int session_id, const content::SpeechRecognitionResults& result) { 317 } 318 319 void ChromeSpeechRecognitionManagerDelegate::OnRecognitionError( 320 int session_id, const content::SpeechRecognitionError& error) { 321 } 322 323 void ChromeSpeechRecognitionManagerDelegate::OnAudioLevelsChange( 324 int session_id, float volume, float noise_volume) { 325 } 326 327 void ChromeSpeechRecognitionManagerDelegate::OnRecognitionEnd(int session_id) { 328 } 329 330 void ChromeSpeechRecognitionManagerDelegate::GetDiagnosticInformation( 331 bool* can_report_metrics, 332 std::string* hardware_info) { 333 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 334 if (!optional_request_info_.get()) { 335 optional_request_info_ = new OptionalRequestInfo(); 336 // Since hardware info is optional with speech input requests, we start an 337 // asynchronous fetch here and move on with recording audio. This first 338 // speech input request would send an empty string for hardware info and 339 // subsequent requests may have the hardware info available if the fetch 340 // completed before them. This way we don't end up stalling the user with 341 // a long wait and disk seeks when they click on a UI element and start 342 // speaking. 343 optional_request_info_->Refresh(); 344 } 345 *can_report_metrics = optional_request_info_->can_report_metrics(); 346 *hardware_info = optional_request_info_->value(); 347 } 348 349 void ChromeSpeechRecognitionManagerDelegate::CheckRecognitionIsAllowed( 350 int session_id, 351 base::Callback<void(bool ask_user, bool is_allowed)> callback) { 352 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 353 354 const content::SpeechRecognitionSessionContext& context = 355 SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id); 356 357 // Make sure that initiators (extensions/web pages) properly set the 358 // |render_process_id| field, which is needed later to retrieve the profile. 359 DCHECK_NE(context.render_process_id, 0); 360 361 int render_process_id = context.render_process_id; 362 int render_view_id = context.render_view_id; 363 if (context.embedder_render_process_id) { 364 // If this is a request originated from a guest, we need to re-route the 365 // permission check through the embedder (app). 366 render_process_id = context.embedder_render_process_id; 367 render_view_id = context.embedder_render_view_id; 368 } 369 370 // Check that the render view type is appropriate, and whether or not we 371 // need to request permission from the user. 372 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, 373 base::Bind(&CheckRenderViewType, 374 callback, 375 render_process_id, 376 render_view_id, 377 !context.requested_by_page_element)); 378 } 379 380 content::SpeechRecognitionEventListener* 381 ChromeSpeechRecognitionManagerDelegate::GetEventListener() { 382 return this; 383 } 384 385 bool ChromeSpeechRecognitionManagerDelegate::FilterProfanities( 386 int render_process_id) { 387 content::RenderProcessHost* rph = 388 content::RenderProcessHost::FromID(render_process_id); 389 if (!rph) // Guard against race conditions on RPH lifetime. 390 return true; 391 392 return Profile::FromBrowserContext(rph->GetBrowserContext())->GetPrefs()-> 393 GetBoolean(prefs::kSpeechRecognitionFilterProfanities); 394 } 395 396 // static. 397 void ChromeSpeechRecognitionManagerDelegate::CheckRenderViewType( 398 base::Callback<void(bool ask_user, bool is_allowed)> callback, 399 int render_process_id, 400 int render_view_id, 401 bool js_api) { 402 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 403 const content::RenderViewHost* render_view_host = 404 content::RenderViewHost::FromID(render_process_id, render_view_id); 405 406 bool allowed = false; 407 bool check_permission = false; 408 409 if (!render_view_host) { 410 if (!js_api) { 411 // If there is no render view, we cannot show the speech bubble, so this 412 // is not allowed. 413 allowed = false; 414 check_permission = false; 415 } else { 416 // This happens for extensions. Manifest should be checked for permission. 417 allowed = true; 418 check_permission = false; 419 } 420 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 421 base::Bind(callback, check_permission, allowed)); 422 return; 423 } 424 425 WebContents* web_contents = WebContents::FromRenderViewHost(render_view_host); 426 427 // chrome://app-list/ uses speech recognition. 428 if (web_contents->GetCommittedWebUI() && 429 web_contents->GetLastCommittedURL().spec() == 430 chrome::kChromeUIAppListStartPageURL) { 431 allowed = true; 432 check_permission = false; 433 } 434 435 extensions::ViewType view_type = extensions::GetViewType(web_contents); 436 437 // TODO(kalman): Also enable speech bubble for extension popups 438 // (VIEW_TYPE_EXTENSION_POPUP) once popup-like control UI works properly in 439 // extensions: http://crbug.com/163851. 440 // Right now the extension popup closes and dismisses immediately on user 441 // click. 442 if (view_type == extensions::VIEW_TYPE_TAB_CONTENTS || 443 view_type == extensions::VIEW_TYPE_APP_SHELL || 444 view_type == extensions::VIEW_TYPE_VIRTUAL_KEYBOARD || 445 // Only allow requests through JavaScript API (|js_api| = true). 446 // Requests originating from html element (|js_api| = false) would want 447 // to show bubble which isn't quite intuitive from a background page. Also 448 // see todo above about issues with rendering such bubbles from extension 449 // popups. 450 (view_type == extensions::VIEW_TYPE_EXTENSION_BACKGROUND_PAGE && 451 js_api)) { 452 // If it is a tab, we can show the speech input bubble or check for 453 // permission. For apps, this means manifest would be checked for 454 // permission. 455 456 allowed = true; 457 if (js_api) 458 check_permission = true; 459 } 460 461 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 462 base::Bind(callback, check_permission, allowed)); 463 } 464 465 } // namespace speech 466