Home | History | Annotate | Download | only in speech
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
      6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
      7 
      8 #include <string>
      9 
     10 #include "base/basictypes.h"
     11 #include "content/common/content_export.h"
     12 #include "content/public/common/speech_recognition_grammar.h"
     13 #include "content/public/common/speech_recognition_result.h"
     14 
     15 namespace content {
     16 
     17 class AudioChunk;
     18 struct SpeechRecognitionError;
     19 
     20 // This interface models the basic contract that a speech recognition engine,
     21 // either working locally or relying on a remote web-service, must obey.
     22 // The expected call sequence for exported methods is:
     23 // StartRecognition      Mandatory at beginning of SR.
     24 //   TakeAudioChunk      For every audio chunk pushed.
     25 //   AudioChunksEnded    Finalize the audio stream (omitted in case of errors).
     26 // EndRecognition        Mandatory at end of SR (even on errors).
     27 // No delegate callbacks are allowed before StartRecognition or after
     28 // EndRecognition. If a recognition was started, the caller can free the
     29 // SpeechRecognitionEngine only after calling EndRecognition.
     30 class SpeechRecognitionEngine {
     31  public:
     32   // Interface for receiving callbacks from this object.
     33   class Delegate {
     34    public:
     35     // Called whenever a result is retrieved. It might be issued several times,
     36     // (e.g., in the case of continuous speech recognition engine
     37     // implementations).
     38     virtual void OnSpeechRecognitionEngineResults(
     39         const SpeechRecognitionResults& results) = 0;
     40     virtual void OnSpeechRecognitionEngineError(
     41         const SpeechRecognitionError& error) = 0;
     42 
     43    protected:
     44     virtual ~Delegate() {}
     45   };
     46 
     47   // Remote engine configuration.
     48   struct CONTENT_EXPORT Config {
     49     Config();
     50     ~Config();
     51 
     52     std::string language;
     53     SpeechRecognitionGrammarArray grammars;
     54     bool filter_profanities;
     55     bool continuous;
     56     bool interim_results;
     57     uint32 max_hypotheses;
     58     std::string hardware_info;
     59     std::string origin_url;
     60     int audio_sample_rate;
     61     int audio_num_bits_per_sample;
     62   };
     63 
     64   virtual ~SpeechRecognitionEngine() {}
     65 
     66   // Set/change the recognition engine configuration. It is not allowed to call
     67   // this function while a recognition is ongoing.
     68   virtual void SetConfig(const Config& config) = 0;
     69 
     70   // Called when the speech recognition begins, before any TakeAudioChunk call.
     71   virtual void StartRecognition() = 0;
     72 
     73   // End any recognition activity and don't make any further callback.
     74   // Must be always called to close the corresponding StartRecognition call,
     75   // even in case of errors.
     76   // No further TakeAudioChunk/AudioChunksEnded calls are allowed after this.
     77   virtual void EndRecognition() = 0;
     78 
     79   // Push a chunk of uncompressed audio data, where the chunk length agrees with
     80   // GetDesiredAudioChunkDurationMs().
     81   virtual void TakeAudioChunk(const AudioChunk& data) = 0;
     82 
     83   // Notifies the engine that audio capture has completed and no more chunks
     84   // will be pushed. The engine, however, can still provide further results
     85   // using the audio chunks collected so far.
     86   virtual void AudioChunksEnded() = 0;
     87 
     88   // Checks wheter recognition of pushed audio data is pending.
     89   virtual bool IsRecognitionPending() const = 0;
     90 
     91   // Retrieves the desired duration, in milliseconds, of pushed AudioChunk(s).
     92   virtual int GetDesiredAudioChunkDurationMs() const = 0;
     93 
     94   // set_delegate detached from constructor for lazy dependency injection.
     95   void set_delegate(Delegate* delegate) { delegate_ = delegate; }
     96 
     97  protected:
     98   Delegate* delegate() const { return delegate_; }
     99 
    100  private:
    101   Delegate* delegate_;
    102 };
    103 
    104 // These typedefs are to workaround the issue with certain versions of
    105 // Visual Studio where it gets confused between multiple Delegate
    106 // classes and gives a C2500 error.
    107 typedef SpeechRecognitionEngine::Delegate SpeechRecognitionEngineDelegate;
    108 typedef SpeechRecognitionEngine::Config SpeechRecognitionEngineConfig;
    109 
    110 }  // namespace content
    111 
    112 #endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
    113