Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/speech/chrome_speech_recognition_manager_delegate.h"
      6 
      7 #include <set>
      8 #include <string>
      9 
     10 #include "base/bind.h"
     11 #include "base/prefs/pref_service.h"
     12 #include "base/strings/utf_string_conversions.h"
     13 #include "base/synchronization/lock.h"
     14 #include "base/threading/thread_restrictions.h"
     15 #include "chrome/browser/browser_process.h"
     16 #include "chrome/browser/profiles/profile_manager.h"
     17 #include "chrome/browser/tab_contents/tab_util.h"
     18 #include "chrome/common/pref_names.h"
     19 #include "chrome/common/url_constants.h"
     20 #include "content/public/browser/browser_thread.h"
     21 #include "content/public/browser/notification_registrar.h"
     22 #include "content/public/browser/notification_source.h"
     23 #include "content/public/browser/notification_types.h"
     24 #include "content/public/browser/render_process_host.h"
     25 #include "content/public/browser/render_view_host.h"
     26 #include "content/public/browser/resource_context.h"
     27 #include "content/public/browser/speech_recognition_manager.h"
     28 #include "content/public/browser/speech_recognition_session_config.h"
     29 #include "content/public/browser/speech_recognition_session_context.h"
     30 #include "content/public/browser/web_contents.h"
     31 #include "content/public/common/speech_recognition_error.h"
     32 #include "content/public/common/speech_recognition_result.h"
     33 #include "net/url_request/url_request_context_getter.h"
     34 
     35 #if defined(OS_WIN)
     36 #include "chrome/installer/util/wmi.h"
     37 #endif
     38 
     39 #if defined(ENABLE_EXTENSIONS)
     40 #include "chrome/browser/extensions/extension_service.h"
     41 #include "extensions/browser/view_type_utils.h"
     42 #endif
     43 
     44 using content::BrowserThread;
     45 using content::SpeechRecognitionManager;
     46 using content::WebContents;
     47 
     48 namespace speech {
     49 
     50 namespace {
     51 
     52 void TabClosedCallbackOnIOThread(int render_process_id, int render_view_id) {
     53   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
     54 
     55   SpeechRecognitionManager* manager = SpeechRecognitionManager::GetInstance();
     56   // |manager| becomes NULL if a browser shutdown happens between the post of
     57   // this task (from the UI thread) and this call (on the IO thread). In this
     58   // case we just return.
     59   if (!manager)
     60     return;
     61 
     62   manager->AbortAllSessionsForRenderView(render_process_id, render_view_id);
     63 }
     64 
     65 }  // namespace
     66 
     67 
     68 // Asynchronously fetches the PC and audio hardware/driver info if
     69 // the user has opted into UMA. This information is sent with speech input
     70 // requests to the server for identifying and improving quality issues with
     71 // specific device configurations.
     72 class ChromeSpeechRecognitionManagerDelegate::OptionalRequestInfo
     73     : public base::RefCountedThreadSafe<OptionalRequestInfo> {
     74  public:
     75   OptionalRequestInfo() : can_report_metrics_(false) {
     76   }
     77 
     78   void Refresh() {
     79     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
     80     // UMA opt-in can be checked only from the UI thread, so switch to that.
     81     BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
     82         base::Bind(&OptionalRequestInfo::CheckUMAAndGetHardwareInfo, this));
     83   }
     84 
     85   void CheckUMAAndGetHardwareInfo() {
     86     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
     87     // prefs::kMetricsReportingEnabled is not registered for OS_CHROMEOS.
     88 #if !defined(OS_CHROMEOS)
     89     if (g_browser_process->local_state()->GetBoolean(
     90         prefs::kMetricsReportingEnabled)) {
     91       // Access potentially slow OS calls from the FILE thread.
     92       BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE,
     93           base::Bind(&OptionalRequestInfo::GetHardwareInfo, this));
     94     }
     95 #endif
     96   }
     97 
     98   void GetHardwareInfo() {
     99     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
    100     base::AutoLock lock(lock_);
    101     can_report_metrics_ = true;
    102     base::string16 device_model =
    103         SpeechRecognitionManager::GetInstance()->GetAudioInputDeviceModel();
    104 #if defined(OS_WIN)
    105     value_ = base::UTF16ToUTF8(
    106         installer::WMIComputerSystem::GetModel() + L"|" + device_model);
    107 #else  // defined(OS_WIN)
    108     value_ = base::UTF16ToUTF8(device_model);
    109 #endif  // defined(OS_WIN)
    110   }
    111 
    112   std::string value() {
    113     base::AutoLock lock(lock_);
    114     return value_;
    115   }
    116 
    117   bool can_report_metrics() {
    118     base::AutoLock lock(lock_);
    119     return can_report_metrics_;
    120   }
    121 
    122  private:
    123   friend class base::RefCountedThreadSafe<OptionalRequestInfo>;
    124 
    125   ~OptionalRequestInfo() {}
    126 
    127   base::Lock lock_;
    128   std::string value_;
    129   bool can_report_metrics_;
    130 
    131   DISALLOW_COPY_AND_ASSIGN(OptionalRequestInfo);
    132 };
    133 
    134 // Simple utility to get notified when a WebContent (a tab or an extension's
    135 // background page) is closed or crashes. The callback will always be called on
    136 // the UI thread.
    137 // There is no restriction on the constructor, however this class must be
    138 // destroyed on the UI thread, due to the NotificationRegistrar dependency.
    139 class ChromeSpeechRecognitionManagerDelegate::TabWatcher
    140     : public base::RefCountedThreadSafe<TabWatcher>,
    141       public content::NotificationObserver {
    142  public:
    143   typedef base::Callback<void(int render_process_id, int render_view_id)>
    144       TabClosedCallback;
    145 
    146   explicit TabWatcher(TabClosedCallback tab_closed_callback)
    147       : tab_closed_callback_(tab_closed_callback) {
    148   }
    149 
    150   // Starts monitoring the WebContents corresponding to the given
    151   // |render_process_id|, |render_view_id| pair, invoking |tab_closed_callback_|
    152   // if closed/unloaded.
    153   void Watch(int render_process_id, int render_view_id) {
    154     if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
    155       BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, base::Bind(
    156           &TabWatcher::Watch, this, render_process_id, render_view_id));
    157       return;
    158     }
    159 
    160     WebContents* web_contents = tab_util::GetWebContentsByID(render_process_id,
    161                                                              render_view_id);
    162     // Sessions initiated by speech input extension APIs will end up in a NULL
    163     // WebContent here, but they are properly managed by the
    164     // chrome::SpeechInputExtensionManager. However, sessions initiated within a
    165     // extension using the (new) speech JS APIs, will be properly handled here.
    166     // TODO(primiano) turn this line into a DCHECK once speech input extension
    167     // API is deprecated.
    168     if (!web_contents)
    169       return;
    170 
    171     // Avoid multiple registrations on |registrar_| for the same |web_contents|.
    172     if (FindWebContents(web_contents) !=  registered_web_contents_.end()) {
    173       return;
    174     }
    175     registered_web_contents_.push_back(
    176         WebContentsInfo(web_contents, render_process_id, render_view_id));
    177 
    178     // Lazy initialize the registrar.
    179     if (!registrar_.get())
    180       registrar_.reset(new content::NotificationRegistrar());
    181 
    182     registrar_->Add(this,
    183                     content::NOTIFICATION_RENDER_VIEW_HOST_CHANGED,
    184                     content::Source<WebContents>(web_contents));
    185     registrar_->Add(this,
    186                     content::NOTIFICATION_WEB_CONTENTS_DISCONNECTED,
    187                     content::Source<WebContents>(web_contents));
    188   }
    189 
    190   // content::NotificationObserver implementation.
    191   virtual void Observe(int type,
    192                        const content::NotificationSource& source,
    193                        const content::NotificationDetails& details) OVERRIDE {
    194     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    195     DCHECK(type == content::NOTIFICATION_WEB_CONTENTS_DISCONNECTED ||
    196            type == content::NOTIFICATION_RENDER_VIEW_HOST_CHANGED);
    197 
    198     WebContents* web_contents = content::Source<WebContents>(source).ptr();
    199     std::vector<WebContentsInfo>::iterator iter = FindWebContents(web_contents);
    200     DCHECK(iter != registered_web_contents_.end());
    201     int render_process_id = iter->render_process_id;
    202     int render_view_id = iter->render_view_id;
    203     registered_web_contents_.erase(iter);
    204 
    205     registrar_->Remove(this,
    206                        content::NOTIFICATION_RENDER_VIEW_HOST_CHANGED,
    207                        content::Source<WebContents>(web_contents));
    208     registrar_->Remove(this,
    209                        content::NOTIFICATION_WEB_CONTENTS_DISCONNECTED,
    210                        content::Source<WebContents>(web_contents));
    211 
    212     tab_closed_callback_.Run(render_process_id, render_view_id);
    213   }
    214 
    215  private:
    216   struct WebContentsInfo {
    217     WebContentsInfo(content::WebContents* web_contents,
    218                     int render_process_id,
    219                     int render_view_id)
    220         : web_contents(web_contents),
    221           render_process_id(render_process_id),
    222           render_view_id(render_view_id) {}
    223 
    224     ~WebContentsInfo() {}
    225 
    226     content::WebContents* web_contents;
    227     int render_process_id;
    228     int render_view_id;
    229   };
    230 
    231   friend class base::RefCountedThreadSafe<TabWatcher>;
    232 
    233   virtual ~TabWatcher() {
    234     // Must be destroyed on the UI thread due to |registrar_| non thread-safety.
    235     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    236   }
    237 
    238   // Helper function to find the iterator in |registered_web_contents_| which
    239   // contains |web_contents|.
    240   std::vector<WebContentsInfo>::iterator FindWebContents(
    241       content::WebContents* web_contents) {
    242     for (std::vector<WebContentsInfo>::iterator i(
    243          registered_web_contents_.begin());
    244          i != registered_web_contents_.end(); ++i) {
    245       if (i->web_contents == web_contents)
    246         return i;
    247     }
    248 
    249     return registered_web_contents_.end();
    250   }
    251 
    252   // Lazy-initialized and used on the UI thread to handle web contents
    253   // notifications (tab closing).
    254   scoped_ptr<content::NotificationRegistrar> registrar_;
    255 
    256   // Keeps track of which WebContent(s) have been registered, in order to avoid
    257   // double registrations on |registrar_| and to pass the correct render
    258   // process id and render view id to |tab_closed_callback_| after the process
    259   // has gone away.
    260   std::vector<WebContentsInfo> registered_web_contents_;
    261 
    262   // Callback used to notify, on the thread specified by |callback_thread_| the
    263   // closure of a registered tab.
    264   TabClosedCallback tab_closed_callback_;
    265 
    266   DISALLOW_COPY_AND_ASSIGN(TabWatcher);
    267 };
    268 
    269 ChromeSpeechRecognitionManagerDelegate
    270 ::ChromeSpeechRecognitionManagerDelegate() {
    271 }
    272 
    273 ChromeSpeechRecognitionManagerDelegate
    274 ::~ChromeSpeechRecognitionManagerDelegate() {
    275 }
    276 
    277 void ChromeSpeechRecognitionManagerDelegate::TabClosedCallback(
    278     int render_process_id, int render_view_id) {
    279   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    280 
    281   // Tell the S.R. Manager (which lives on the IO thread) to abort all the
    282   // sessions for the given renderer view.
    283   BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, base::Bind(
    284       &TabClosedCallbackOnIOThread, render_process_id, render_view_id));
    285 }
    286 
    287 void ChromeSpeechRecognitionManagerDelegate::OnRecognitionStart(
    288     int session_id) {
    289   const content::SpeechRecognitionSessionContext& context =
    290       SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id);
    291 
    292   // Register callback to auto abort session on tab closure.
    293   // |tab_watcher_| is lazyly istantiated on the first call.
    294   if (!tab_watcher_.get()) {
    295     tab_watcher_ = new TabWatcher(
    296         base::Bind(&ChromeSpeechRecognitionManagerDelegate::TabClosedCallback,
    297                    base::Unretained(this)));
    298   }
    299   tab_watcher_->Watch(context.render_process_id, context.render_view_id);
    300 }
    301 
    302 void ChromeSpeechRecognitionManagerDelegate::OnAudioStart(int session_id) {
    303 }
    304 
    305 void ChromeSpeechRecognitionManagerDelegate::OnEnvironmentEstimationComplete(
    306     int session_id) {
    307 }
    308 
    309 void ChromeSpeechRecognitionManagerDelegate::OnSoundStart(int session_id) {
    310 }
    311 
    312 void ChromeSpeechRecognitionManagerDelegate::OnSoundEnd(int session_id) {
    313 }
    314 
    315 void ChromeSpeechRecognitionManagerDelegate::OnAudioEnd(int session_id) {
    316 }
    317 
    318 void ChromeSpeechRecognitionManagerDelegate::OnRecognitionResults(
    319     int session_id, const content::SpeechRecognitionResults& result) {
    320 }
    321 
    322 void ChromeSpeechRecognitionManagerDelegate::OnRecognitionError(
    323     int session_id, const content::SpeechRecognitionError& error) {
    324 }
    325 
    326 void ChromeSpeechRecognitionManagerDelegate::OnAudioLevelsChange(
    327     int session_id, float volume, float noise_volume) {
    328 }
    329 
    330 void ChromeSpeechRecognitionManagerDelegate::OnRecognitionEnd(int session_id) {
    331 }
    332 
    333 void ChromeSpeechRecognitionManagerDelegate::GetDiagnosticInformation(
    334     bool* can_report_metrics,
    335     std::string* hardware_info) {
    336   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    337   if (!optional_request_info_.get()) {
    338     optional_request_info_ = new OptionalRequestInfo();
    339     // Since hardware info is optional with speech input requests, we start an
    340     // asynchronous fetch here and move on with recording audio. This first
    341     // speech input request would send an empty string for hardware info and
    342     // subsequent requests may have the hardware info available if the fetch
    343     // completed before them. This way we don't end up stalling the user with
    344     // a long wait and disk seeks when they click on a UI element and start
    345     // speaking.
    346     optional_request_info_->Refresh();
    347   }
    348   *can_report_metrics = optional_request_info_->can_report_metrics();
    349   *hardware_info = optional_request_info_->value();
    350 }
    351 
    352 void ChromeSpeechRecognitionManagerDelegate::CheckRecognitionIsAllowed(
    353     int session_id,
    354     base::Callback<void(bool ask_user, bool is_allowed)> callback) {
    355   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    356 
    357   const content::SpeechRecognitionSessionContext& context =
    358       SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id);
    359 
    360   // Make sure that initiators (extensions/web pages) properly set the
    361   // |render_process_id| field, which is needed later to retrieve the profile.
    362   DCHECK_NE(context.render_process_id, 0);
    363 
    364   int render_process_id = context.render_process_id;
    365   int render_view_id = context.render_view_id;
    366   if (context.embedder_render_process_id) {
    367     // If this is a request originated from a guest, we need to re-route the
    368     // permission check through the embedder (app).
    369     render_process_id = context.embedder_render_process_id;
    370     render_view_id = context.embedder_render_view_id;
    371   }
    372 
    373   // Check that the render view type is appropriate, and whether or not we
    374   // need to request permission from the user.
    375   BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
    376                           base::Bind(&CheckRenderViewType,
    377                                      callback,
    378                                      render_process_id,
    379                                      render_view_id));
    380 }
    381 
    382 content::SpeechRecognitionEventListener*
    383 ChromeSpeechRecognitionManagerDelegate::GetEventListener() {
    384   return this;
    385 }
    386 
    387 bool ChromeSpeechRecognitionManagerDelegate::FilterProfanities(
    388     int render_process_id) {
    389   content::RenderProcessHost* rph =
    390       content::RenderProcessHost::FromID(render_process_id);
    391   if (!rph)  // Guard against race conditions on RPH lifetime.
    392     return true;
    393 
    394   return Profile::FromBrowserContext(rph->GetBrowserContext())->GetPrefs()->
    395       GetBoolean(prefs::kSpeechRecognitionFilterProfanities);
    396 }
    397 
    398 // static.
    399 void ChromeSpeechRecognitionManagerDelegate::CheckRenderViewType(
    400     base::Callback<void(bool ask_user, bool is_allowed)> callback,
    401     int render_process_id,
    402     int render_view_id) {
    403   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    404   const content::RenderViewHost* render_view_host =
    405       content::RenderViewHost::FromID(render_process_id, render_view_id);
    406 
    407   bool allowed = false;
    408   bool check_permission = false;
    409 
    410   if (!render_view_host) {
    411     // This happens for extensions. Manifest should be checked for permission.
    412     allowed = true;
    413     check_permission = false;
    414     BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
    415                             base::Bind(callback, check_permission, allowed));
    416     return;
    417   }
    418 
    419   WebContents* web_contents = WebContents::FromRenderViewHost(render_view_host);
    420 
    421   // chrome://app-list/ uses speech recognition.
    422   if (web_contents->GetCommittedWebUI() &&
    423       web_contents->GetLastCommittedURL().spec() ==
    424       chrome::kChromeUIAppListStartPageURL) {
    425     allowed = true;
    426     check_permission = false;
    427   }
    428 
    429 #if defined(ENABLE_EXTENSIONS)
    430   extensions::ViewType view_type = extensions::GetViewType(web_contents);
    431 
    432   if (view_type == extensions::VIEW_TYPE_TAB_CONTENTS ||
    433       view_type == extensions::VIEW_TYPE_APP_WINDOW ||
    434       view_type == extensions::VIEW_TYPE_LAUNCHER_PAGE ||
    435       view_type == extensions::VIEW_TYPE_VIRTUAL_KEYBOARD ||
    436       view_type == extensions::VIEW_TYPE_EXTENSION_BACKGROUND_PAGE) {
    437     // If it is a tab, we can check for permission. For apps, this means
    438     // manifest would be checked for permission.
    439     allowed = true;
    440     check_permission = true;
    441   }
    442 #else
    443   // Otherwise this should be a regular tab contents.
    444   allowed = true;
    445   check_permission = true;
    446 #endif
    447 
    448   BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
    449                           base::Bind(callback, check_permission, allowed));
    450 }
    451 
    452 }  // namespace speech
    453