Home | History | Annotate | Download | only in speech_commands
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_RECOGNIZE_COMMANDS_H_
     17 #define TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_RECOGNIZE_COMMANDS_H_
     18 
     19 #include <deque>
     20 #include <unordered_set>
     21 #include <vector>
     22 
     23 #include "tensorflow/core/framework/tensor.h"
     24 #include "tensorflow/core/platform/types.h"
     25 
     26 namespace tensorflow {
     27 
     28 // This class is designed to apply a very primitive decoding model on top of the
     29 // instantaneous results from running an audio recognition model on a single
     30 // window of samples. It applies smoothing over time so that noisy individual
     31 // label scores are averaged, increasing the confidence that apparent matches
     32 // are real.
     33 // To use it, you should create a class object with the configuration you
     34 // want, and then feed results from running a TensorFlow model into the
     35 // processing method. The timestamp for each subsequent call should be
     36 // increasing from the previous, since the class is designed to process a stream
     37 // of data over time.
     38 class RecognizeCommands {
     39  public:
     40   // labels should be a list of the strings associated with each one-hot score.
     41   // The window duration controls the smoothing. Longer durations will give a
     42   // higher confidence that the results are correct, but may miss some commands.
     43   // The detection threshold has a similar effect, with high values increasing
     44   // the precision at the cost of recall. The minimum count controls how many
     45   // results need to be in the averaging window before it's seen as a reliable
     46   // average. This prevents erroneous results when the averaging window is
     47   // initially being populated for example. The suppression argument disables
     48   // further recognitions for a set time after one has been triggered, which can
     49   // help reduce spurious recognitions.
     50   explicit RecognizeCommands(const std::vector<string>& labels,
     51                              int32 average_window_duration_ms = 1000,
     52                              float detection_threshold = 0.2,
     53                              int32 suppression_ms = 500,
     54                              int32 minimum_count = 3);
     55 
     56   // Call this with the results of running a model on sample data.
     57   Status ProcessLatestResults(const Tensor& latest_results,
     58                               const int64 current_time_ms,
     59                               string* found_command, float* score,
     60                               bool* is_new_command);
     61 
     62  private:
     63   // Configuration
     64   std::vector<string> labels_;
     65   int32 average_window_duration_ms_;
     66   float detection_threshold_;
     67   int32 suppression_ms_;
     68   int32 minimum_count_;
     69 
     70   // Working variables
     71   std::deque<std::pair<int64, Tensor>> previous_results_;
     72   string previous_top_label_;
     73   int64 labels_count_;
     74   int64 previous_top_label_time_;
     75 };
     76 
     77 }  // namespace tensorflow
     78 
     79 #endif  // TENSORFLOW_EXAMPLES_SPEECH_COMMANDS_RECOGNIZE_COMMANDS_H_
     80