Home | History | Annotate | Download | only in common
      1 // Copyright (C) 2017 The Android Open Source Project
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // Protos for performing inference with an EmbeddingNetwork.
     16 
     17 syntax = "proto2";
     18 option optimize_for = LITE_RUNTIME;
     19 
     20 package libtextclassifier.nlp_core;
     21 
     22 // Wrapper for storing a matrix of parameters. These are stored in row-major
     23 // order.
     24 message MatrixParams {
     25   optional int32 rows = 1;  // # of rows in the matrix
     26   optional int32 cols = 2;  // # of columns in the matrix
     27 
     28   // Non-quantized matrix entries.
     29   repeated float value = 3 [packed = true];
     30 
     31   // Whether the matrix is quantized.
     32   optional bool is_quantized = 4 [default = false];
     33 
     34   // Bytes for all quantized values.  Each value (see "repeated float value"
     35   // field) is quantized to an uint8 (1 byte) value, and all these bytes are
     36   // concatenated into the string from this field.
     37   optional bytes bytes_for_quantized_values = 7;
     38 
     39   // Bytes for all scale factors for dequantizing the values.  The quantization
     40   // process generates a float16 scale factor for each column.  The 2 bytes for
     41   // each such float16 are put in little-endian order (least significant byte
     42   // first) and next all these pairs of bytes are concatenated into the string
     43   // from this field.
     44   optional bytes bytes_for_col_scales = 8;
     45 
     46   reserved 5, 6;
     47 }
     48 
     49 // Stores all parameters for a given EmbeddingNetwork. This can either be a
     50 // EmbeddingNetwork or a PrecomputedEmbeddingNetwork: for precomputed networks,
     51 // the embedding weights are actually the activations of the first hidden layer
     52 // *before* the bias is added and the non-linear transform is applied.
     53 //
     54 // Thus, for PrecomputedEmbeddingNetwork storage, hidden layers are stored
     55 // starting from the second hidden layer, while biases are stored for every
     56 // hidden layer.
     57 message EmbeddingNetworkProto {
     58   // Embeddings and hidden layers. Note that if is_precomputed == true, then the
     59   // embeddings should store the activations of the first hidden layer, so we
     60   // must have hidden_bias_size() == hidden_size() + 1 (we store weights for
     61   // first hidden layer bias, but no the layer itself.)
     62   repeated MatrixParams embeddings = 1;
     63   repeated MatrixParams hidden = 2;
     64   repeated MatrixParams hidden_bias = 3;
     65 
     66   // Final layer of the network.
     67   optional MatrixParams softmax = 4;
     68   optional MatrixParams softmax_bias = 5;
     69 
     70   // Element i of the repeated field below indicates number of features that use
     71   // the i-th embedding space.
     72   repeated int32 embedding_num_features = 7;
     73 
     74   // Whether or not this is intended to store a precomputed network.
     75   optional bool is_precomputed = 11 [default = false];
     76 
     77   // True if this EmbeddingNetworkProto can be used for inference with no
     78   // additional matrix transposition.
     79   //
     80   // Given an EmbeddingNetworkProto produced by a Neurosis training pipeline, we
     81   // have to transpose a few matrices (e.g., the embedding matrices) before we
     82   // can perform inference.  When we do so, we negate this flag.  Note: we don't
     83   // simply set this to true: transposing twice takes us to the original state.
     84   optional bool is_transposed = 12 [default = false];
     85 
     86   // Allow extensions.
     87   extensions 100 to max;
     88 
     89   reserved 6, 8, 9, 10;
     90 }
     91