Home | History | Annotate | Download | only in fel
      1 /*
      2  * Copyright (C) 2018 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 // Feature extraction language (FEL) parser.
     18 //
     19 // BNF grammar for FEL:
     20 //
     21 // <feature model> ::= { <feature extractor> }
     22 //
     23 // <feature extractor> ::= <extractor spec> |
     24 //                         <extractor spec> '.' <feature extractor> |
     25 //                         <extractor spec> '{' { <feature extractor> } '}'
     26 //
     27 // <extractor spec> ::= <extractor type>
     28 //                      [ '(' <parameter list> ')' ]
     29 //                      [ ':' <extractor name> ]
     30 //
     31 // <parameter list> = ( <parameter> | <argument> ) { ',' <parameter> }
     32 //
     33 // <parameter> ::= <parameter name> '=' <parameter value>
     34 //
     35 // <extractor type> ::= NAME
     36 // <extractor name> ::= NAME | STRING
     37 // <argument> ::= NUMBER
     38 // <parameter name> ::= NAME
     39 // <parameter value> ::= NUMBER | STRING | NAME
     40 
     41 #ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_
     42 #define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_
     43 
     44 #include <string>
     45 
     46 #include "lang_id/common/fel/feature-descriptors.h"
     47 #include "lang_id/common/lite_base/logging.h"
     48 
     49 namespace libtextclassifier3 {
     50 namespace mobile {
     51 
     52 class FELParser {
     53  public:
     54   // Parses fml specification into feature extractor descriptor.
     55   // Returns true on success, false on error (e.g., syntax errors).
     56   bool Parse(const string &source, FeatureExtractorDescriptor *result);
     57 
     58  private:
     59   // Initializes the parser with the source text.
     60   // Returns true on success, false on syntax error.
     61   bool Initialize(const string &source);
     62 
     63   // Outputs an error message, with context info.
     64   void ReportError(const string &error_message);
     65 
     66   // Moves to the next input character.
     67   void Next();
     68 
     69   // Moves to the next input item.  Sets item_text_ and item_type_ accordingly.
     70   // Returns true on success, false on syntax error.
     71   bool NextItem();
     72 
     73   // Parses a feature descriptor.
     74   // Returns true on success, false on syntax error.
     75   bool ParseFeature(FeatureFunctionDescriptor *result);
     76 
     77   // Parses a parameter specification.
     78   // Returns true on success, false on syntax error.
     79   bool ParseParameter(FeatureFunctionDescriptor *result);
     80 
     81   // Returns true if end of source input has been reached.
     82   bool eos() const { return current_ >= source_.end(); }
     83 
     84   // Returns current character.  Other methods should access the current
     85   // character through this method (instead of using *current_ directly): this
     86   // method performs extra safety checks.
     87   //
     88   // In case of an unsafe access, returns '\0'.
     89   char CurrentChar() const {
     90     if ((current_ >= source_.begin()) && (current_ < source_.end())) {
     91       return *current_;
     92     } else {
     93       SAFTM_LOG(ERROR) << "Unsafe char read";
     94       return '\0';
     95     }
     96   }
     97 
     98   // Item types.
     99   enum ItemTypes {
    100     END = 0,
    101     NAME = -1,
    102     NUMBER = -2,
    103     STRING = -3,
    104   };
    105 
    106   // Source text.
    107   string source_;
    108 
    109   // Current input position.
    110   string::iterator current_;
    111 
    112   // Line number for current input position.
    113   int line_number_;
    114 
    115   // Start position for current item.
    116   string::iterator item_start_;
    117 
    118   // Start position for current line.
    119   string::iterator line_start_;
    120 
    121   // Line number for current item.
    122   int item_line_number_;
    123 
    124   // Item type for current item. If this is positive it is interpreted as a
    125   // character. If it is negative it is interpreted as an item type.
    126   int item_type_;
    127 
    128   // Text for current item.
    129   string item_text_;
    130 };
    131 
    132 }  // namespace mobile
    133 }  // namespace nlp_saft
    134 
    135 #endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_
    136