Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "content/browser/speech/speech_input_manager.h"
      6 
      7 #include <map>
      8 #include <string>
      9 
     10 #include "base/lazy_instance.h"
     11 #include "base/memory/ref_counted.h"
     12 #include "base/synchronization/lock.h"
     13 #include "base/threading/thread_restrictions.h"
     14 #include "base/utf_string_conversions.h"
     15 #include "chrome/browser/browser_process.h"
     16 #include "chrome/browser/platform_util.h"
     17 #include "chrome/browser/prefs/pref_service.h"
     18 #include "chrome/browser/speech/speech_input_bubble_controller.h"
     19 #include "chrome/browser/tab_contents/tab_util.h"
     20 #include "chrome/common/chrome_switches.h"
     21 #include "chrome/common/pref_names.h"
     22 #include "content/browser/browser_thread.h"
     23 #include "content/browser/speech/speech_recognizer.h"
     24 #include "grit/generated_resources.h"
     25 #include "media/audio/audio_manager.h"
     26 #include "ui/base/l10n/l10n_util.h"
     27 
     28 #if defined(OS_WIN)
     29 #include "chrome/installer/util/wmi.h"
     30 #endif
     31 
     32 namespace speech_input {
     33 
     34 namespace {
     35 
     36 // Asynchronously fetches the PC and audio hardware/driver info if
     37 // the user has opted into UMA. This information is sent with speech input
     38 // requests to the server for identifying and improving quality issues with
     39 // specific device configurations.
     40 class OptionalRequestInfo
     41     : public base::RefCountedThreadSafe<OptionalRequestInfo> {
     42  public:
     43   OptionalRequestInfo() : can_report_metrics_(false) {}
     44 
     45   void Refresh() {
     46     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
     47     // UMA opt-in can be checked only from the UI thread, so switch to that.
     48     BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
     49         NewRunnableMethod(this,
     50                           &OptionalRequestInfo::CheckUMAAndGetHardwareInfo));
     51   }
     52 
     53   void CheckUMAAndGetHardwareInfo() {
     54     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
     55     if (g_browser_process->local_state()->GetBoolean(
     56         prefs::kMetricsReportingEnabled)) {
     57       // Access potentially slow OS calls from the FILE thread.
     58       BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE,
     59           NewRunnableMethod(this, &OptionalRequestInfo::GetHardwareInfo));
     60     }
     61   }
     62 
     63   void GetHardwareInfo() {
     64     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
     65     base::AutoLock lock(lock_);
     66     can_report_metrics_ = true;
     67 #if defined(OS_WIN)
     68     value_ = UTF16ToUTF8(
     69         installer::WMIComputerSystem::GetModel() + L"|" +
     70         AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
     71 #else  // defined(OS_WIN)
     72     value_ = UTF16ToUTF8(
     73         AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
     74 #endif  // defined(OS_WIN)
     75   }
     76 
     77   std::string value() {
     78     base::AutoLock lock(lock_);
     79     return value_;
     80   }
     81 
     82   bool can_report_metrics() {
     83     base::AutoLock lock(lock_);
     84     return can_report_metrics_;
     85   }
     86 
     87  private:
     88   base::Lock lock_;
     89   std::string value_;
     90   bool can_report_metrics_;
     91 
     92   DISALLOW_COPY_AND_ASSIGN(OptionalRequestInfo);
     93 };
     94 
     95 class SpeechInputManagerImpl : public SpeechInputManager,
     96                                public SpeechInputBubbleControllerDelegate,
     97                                public SpeechRecognizerDelegate {
     98  public:
     99   // SpeechInputManager methods.
    100   virtual void StartRecognition(SpeechInputManagerDelegate* delegate,
    101                                 int caller_id,
    102                                 int render_process_id,
    103                                 int render_view_id,
    104                                 const gfx::Rect& element_rect,
    105                                 const std::string& language,
    106                                 const std::string& grammar,
    107                                 const std::string& origin_url);
    108   virtual void CancelRecognition(int caller_id);
    109   virtual void StopRecording(int caller_id);
    110   virtual void CancelAllRequestsWithDelegate(
    111       SpeechInputManagerDelegate* delegate);
    112 
    113   // SpeechRecognizer::Delegate methods.
    114   virtual void DidStartReceivingAudio(int caller_id);
    115   virtual void SetRecognitionResult(int caller_id,
    116                                     bool error,
    117                                     const SpeechInputResultArray& result);
    118   virtual void DidCompleteRecording(int caller_id);
    119   virtual void DidCompleteRecognition(int caller_id);
    120   virtual void OnRecognizerError(int caller_id,
    121                                  SpeechRecognizer::ErrorCode error);
    122   virtual void DidCompleteEnvironmentEstimation(int caller_id);
    123   virtual void SetInputVolume(int caller_id, float volume, float noise_volume);
    124 
    125   // SpeechInputBubbleController::Delegate methods.
    126   virtual void InfoBubbleButtonClicked(int caller_id,
    127                                        SpeechInputBubble::Button button);
    128   virtual void InfoBubbleFocusChanged(int caller_id);
    129 
    130  private:
    131   struct SpeechInputRequest {
    132     SpeechInputManagerDelegate* delegate;
    133     scoped_refptr<SpeechRecognizer> recognizer;
    134     bool is_active;  // Set to true when recording or recognition is going on.
    135   };
    136 
    137   // Private constructor to enforce singleton.
    138   friend struct base::DefaultLazyInstanceTraits<SpeechInputManagerImpl>;
    139   SpeechInputManagerImpl();
    140   virtual ~SpeechInputManagerImpl();
    141 
    142   bool HasPendingRequest(int caller_id) const;
    143   SpeechInputManagerDelegate* GetDelegate(int caller_id) const;
    144 
    145   void CancelRecognitionAndInformDelegate(int caller_id);
    146 
    147   // Starts/restarts recognition for an existing request.
    148   void StartRecognitionForRequest(int caller_id);
    149 
    150   typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap;
    151   SpeechRecognizerMap requests_;
    152   int recording_caller_id_;
    153   scoped_refptr<SpeechInputBubbleController> bubble_controller_;
    154   scoped_refptr<OptionalRequestInfo> optional_request_info_;
    155 };
    156 
    157 base::LazyInstance<SpeechInputManagerImpl> g_speech_input_manager_impl(
    158     base::LINKER_INITIALIZED);
    159 
    160 }  // namespace
    161 
    162 SpeechInputManager* SpeechInputManager::Get() {
    163   return g_speech_input_manager_impl.Pointer();
    164 }
    165 
    166 void SpeechInputManager::ShowAudioInputSettings() {
    167   // Since AudioManager::ShowAudioInputSettings can potentially launch external
    168   // processes, do that in the FILE thread to not block the calling threads.
    169   if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) {
    170     BrowserThread::PostTask(
    171         BrowserThread::FILE, FROM_HERE,
    172         NewRunnableFunction(&SpeechInputManager::ShowAudioInputSettings));
    173     return;
    174   }
    175 
    176   DCHECK(AudioManager::GetAudioManager()->CanShowAudioInputSettings());
    177   if (AudioManager::GetAudioManager()->CanShowAudioInputSettings())
    178     AudioManager::GetAudioManager()->ShowAudioInputSettings();
    179 }
    180 
    181 SpeechInputManagerImpl::SpeechInputManagerImpl()
    182     : recording_caller_id_(0),
    183       bubble_controller_(new SpeechInputBubbleController(
    184           ALLOW_THIS_IN_INITIALIZER_LIST(this))) {
    185 }
    186 
    187 SpeechInputManagerImpl::~SpeechInputManagerImpl() {
    188   while (requests_.begin() != requests_.end())
    189     CancelRecognition(requests_.begin()->first);
    190 }
    191 
    192 bool SpeechInputManagerImpl::HasPendingRequest(int caller_id) const {
    193   return requests_.find(caller_id) != requests_.end();
    194 }
    195 
    196 SpeechInputManagerDelegate* SpeechInputManagerImpl::GetDelegate(
    197     int caller_id) const {
    198   return requests_.find(caller_id)->second.delegate;
    199 }
    200 
    201 void SpeechInputManagerImpl::StartRecognition(
    202     SpeechInputManagerDelegate* delegate,
    203     int caller_id,
    204     int render_process_id,
    205     int render_view_id,
    206     const gfx::Rect& element_rect,
    207     const std::string& language,
    208     const std::string& grammar,
    209     const std::string& origin_url) {
    210   DCHECK(!HasPendingRequest(caller_id));
    211 
    212   bubble_controller_->CreateBubble(caller_id, render_process_id, render_view_id,
    213                                    element_rect);
    214 
    215   if (!optional_request_info_.get()) {
    216     optional_request_info_ = new OptionalRequestInfo();
    217     // Since hardware info is optional with speech input requests, we start an
    218     // asynchronous fetch here and move on with recording audio. This first
    219     // speech input request would send an empty string for hardware info and
    220     // subsequent requests may have the hardware info available if the fetch
    221     // completed before them. This way we don't end up stalling the user with
    222     // a long wait and disk seeks when they click on a UI element and start
    223     // speaking.
    224     optional_request_info_->Refresh();
    225   }
    226 
    227   SpeechInputRequest* request = &requests_[caller_id];
    228   request->delegate = delegate;
    229   request->recognizer = new SpeechRecognizer(
    230       this, caller_id, language, grammar, optional_request_info_->value(),
    231       optional_request_info_->can_report_metrics() ? origin_url : "");
    232   request->is_active = false;
    233 
    234   StartRecognitionForRequest(caller_id);
    235 }
    236 
    237 void SpeechInputManagerImpl::StartRecognitionForRequest(int caller_id) {
    238   DCHECK(HasPendingRequest(caller_id));
    239 
    240   // If we are currently recording audio for another caller, abort that cleanly.
    241   if (recording_caller_id_)
    242     CancelRecognitionAndInformDelegate(recording_caller_id_);
    243 
    244   if (!AudioManager::GetAudioManager()->HasAudioInputDevices()) {
    245     bubble_controller_->SetBubbleMessage(
    246         caller_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC));
    247   } else {
    248     recording_caller_id_ = caller_id;
    249     requests_[caller_id].is_active = true;
    250     requests_[caller_id].recognizer->StartRecording();
    251     bubble_controller_->SetBubbleWarmUpMode(caller_id);
    252   }
    253 }
    254 
    255 void SpeechInputManagerImpl::CancelRecognition(int caller_id) {
    256   DCHECK(HasPendingRequest(caller_id));
    257   if (requests_[caller_id].is_active)
    258     requests_[caller_id].recognizer->CancelRecognition();
    259   requests_.erase(caller_id);
    260   if (recording_caller_id_ == caller_id)
    261     recording_caller_id_ = 0;
    262   bubble_controller_->CloseBubble(caller_id);
    263 }
    264 
    265 void SpeechInputManagerImpl::CancelAllRequestsWithDelegate(
    266     SpeechInputManagerDelegate* delegate) {
    267   SpeechRecognizerMap::iterator it = requests_.begin();
    268   while (it != requests_.end()) {
    269     if (it->second.delegate == delegate) {
    270       CancelRecognition(it->first);
    271       // This map will have very few elements so it is simpler to restart.
    272       it = requests_.begin();
    273     } else {
    274       ++it;
    275     }
    276   }
    277 }
    278 
    279 void SpeechInputManagerImpl::StopRecording(int caller_id) {
    280   DCHECK(HasPendingRequest(caller_id));
    281   requests_[caller_id].recognizer->StopRecording();
    282 }
    283 
    284 void SpeechInputManagerImpl::SetRecognitionResult(
    285     int caller_id, bool error, const SpeechInputResultArray& result) {
    286   DCHECK(HasPendingRequest(caller_id));
    287   GetDelegate(caller_id)->SetRecognitionResult(caller_id, result);
    288 }
    289 
    290 void SpeechInputManagerImpl::DidCompleteRecording(int caller_id) {
    291   DCHECK(recording_caller_id_ == caller_id);
    292   DCHECK(HasPendingRequest(caller_id));
    293   recording_caller_id_ = 0;
    294   GetDelegate(caller_id)->DidCompleteRecording(caller_id);
    295   bubble_controller_->SetBubbleRecognizingMode(caller_id);
    296 }
    297 
    298 void SpeechInputManagerImpl::DidCompleteRecognition(int caller_id) {
    299   GetDelegate(caller_id)->DidCompleteRecognition(caller_id);
    300   requests_.erase(caller_id);
    301   bubble_controller_->CloseBubble(caller_id);
    302 }
    303 
    304 void SpeechInputManagerImpl::OnRecognizerError(
    305     int caller_id, SpeechRecognizer::ErrorCode error) {
    306   if (caller_id == recording_caller_id_)
    307     recording_caller_id_ = 0;
    308 
    309   requests_[caller_id].is_active = false;
    310 
    311   struct ErrorMessageMapEntry {
    312     SpeechRecognizer::ErrorCode error;
    313     int message_id;
    314   };
    315   ErrorMessageMapEntry error_message_map[] = {
    316     {
    317       SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE, IDS_SPEECH_INPUT_MIC_ERROR
    318     }, {
    319       SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH, IDS_SPEECH_INPUT_NO_SPEECH
    320     }, {
    321       SpeechRecognizer::RECOGNIZER_ERROR_NO_RESULTS, IDS_SPEECH_INPUT_NO_RESULTS
    322     }, {
    323       SpeechRecognizer::RECOGNIZER_ERROR_NETWORK, IDS_SPEECH_INPUT_NET_ERROR
    324     }
    325   };
    326   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(error_message_map); ++i) {
    327     if (error_message_map[i].error == error) {
    328       bubble_controller_->SetBubbleMessage(
    329           caller_id,
    330           l10n_util::GetStringUTF16(error_message_map[i].message_id));
    331       return;
    332     }
    333   }
    334 
    335   NOTREACHED() << "unknown error " << error;
    336 }
    337 
    338 void SpeechInputManagerImpl::DidStartReceivingAudio(int caller_id) {
    339   DCHECK(HasPendingRequest(caller_id));
    340   DCHECK(recording_caller_id_ == caller_id);
    341   bubble_controller_->SetBubbleRecordingMode(caller_id);
    342 }
    343 
    344 void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) {
    345   DCHECK(HasPendingRequest(caller_id));
    346   DCHECK(recording_caller_id_ == caller_id);
    347 }
    348 
    349 void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume,
    350                                             float noise_volume) {
    351   DCHECK(HasPendingRequest(caller_id));
    352   DCHECK_EQ(recording_caller_id_, caller_id);
    353 
    354   bubble_controller_->SetBubbleInputVolume(caller_id, volume, noise_volume);
    355 }
    356 
    357 void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) {
    358   SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id);
    359   CancelRecognition(caller_id);
    360   cur_delegate->DidCompleteRecording(caller_id);
    361   cur_delegate->DidCompleteRecognition(caller_id);
    362 }
    363 
    364 void SpeechInputManagerImpl::InfoBubbleButtonClicked(
    365     int caller_id, SpeechInputBubble::Button button) {
    366   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    367   // Ignore if the caller id was not in our active recognizers list because the
    368   // user might have clicked more than once, or recognition could have been
    369   // cancelled due to other reasons before the user click was processed.
    370   if (!HasPendingRequest(caller_id))
    371     return;
    372 
    373   if (button == SpeechInputBubble::BUTTON_CANCEL) {
    374     CancelRecognitionAndInformDelegate(caller_id);
    375   } else if (button == SpeechInputBubble::BUTTON_TRY_AGAIN) {
    376     StartRecognitionForRequest(caller_id);
    377   }
    378 }
    379 
    380 void SpeechInputManagerImpl::InfoBubbleFocusChanged(int caller_id) {
    381   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    382   // Ignore if the caller id was not in our active recognizers list because the
    383   // user might have clicked more than once, or recognition could have been
    384   // ended due to other reasons before the user click was processed.
    385   if (HasPendingRequest(caller_id)) {
    386     // If this is an ongoing recording or if we were displaying an error message
    387     // to the user, abort it since user has switched focus. Otherwise
    388     // recognition has started and keep that going so user can start speaking to
    389     // another element while this gets the results in parallel.
    390     if (recording_caller_id_ == caller_id || !requests_[caller_id].is_active) {
    391       CancelRecognitionAndInformDelegate(caller_id);
    392     }
    393   }
    394 }
    395 
    396 }  // namespace speech_input
    397