Home | History | Annotate | Download | only in memory_image
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_
     18 #define LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_
     19 
     20 #include "common/memory_image/data-store.h"
     21 #include "common/task-spec.pb.h"
     22 #include "util/strings/stringpiece.h"
     23 
     24 namespace libtextclassifier {
     25 namespace nlp_core {
     26 
     27 // In-memory representation of data for a Saft model.  Provides access to a
     28 // TaskSpec object (produced by the "spec" stage of the Saft training model) and
     29 // to the bytes of the TaskInputs mentioned in that spec (all these bytes are in
     30 // memory, no file I/O required).
     31 //
     32 // Technically, an InMemoryModelData is a DataStore that maps the special string
     33 // kTaskSpecDataStoreEntryName to the binary serialization of a TaskSpec.  For
     34 // each TaskInput (of the TaskSpec) with a file_pattern that starts with
     35 // kFilePatternPrefix (see below), the same DataStore maps file_pattern to some
     36 // content bytes.  This way, it is possible to have all TaskInputs in memory,
     37 // while still allowing classic, on-disk TaskInputs.
     38 class InMemoryModelData {
     39  public:
     40   // Name for the DataStore entry that stores the serialized TaskSpec for the
     41   // entire model.
     42   static const char kTaskSpecDataStoreEntryName[];
     43 
     44   // Returns prefix for TaskInput::Part::file_pattern, to distinguish those
     45   // "files" from other files.
     46   static const char kFilePatternPrefix[];
     47 
     48   // Constructs an InMemoryModelData based on a chunk of bytes.  Those bytes
     49   // should have been produced by a DataStoreBuilder.
     50   explicit InMemoryModelData(StringPiece bytes) : data_store_(bytes) {}
     51 
     52   // Fills *task_spec with a TaskSpec similar to the one used by
     53   // DataStoreBuilder (when building the bytes used to construct this
     54   // InMemoryModelData) except that each file name
     55   // (TaskInput::Part::file_pattern) is replaced with a name that can be used to
     56   // retrieve the corresponding file content bytes via GetBytesForInputFile().
     57   //
     58   // Returns true on success, false otherwise.
     59   bool GetTaskSpec(TaskSpec *task_spec) const;
     60 
     61   // Gets content bytes for a file.  The file_name argument should be the
     62   // file_pattern for a TaskInput from the TaskSpec (see GetTaskSpec()).
     63   // Returns a StringPiece indicating a memory area with the content bytes.  On
     64   // error, returns StringPiece(nullptr, 0).
     65   StringPiece GetBytesForInputFile(const std::string &file_name) const;
     66 
     67  private:
     68   const memory_image::DataStore data_store_;
     69 };
     70 
     71 }  // namespace nlp_core
     72 }  // namespace libtextclassifier
     73 
     74 #endif  // LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_
     75