Home | History | Annotate | Download | only in fel
      1 /*
      2  * Copyright (C) 2018 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "lang_id/common/fel/fel-parser.h"
     18 
     19 #include <ctype.h>
     20 #include <string>
     21 
     22 #include "lang_id/common/lite_base/logging.h"
     23 #include "lang_id/common/lite_strings/numbers.h"
     24 
     25 namespace libtextclassifier3 {
     26 namespace mobile {
     27 
     28 namespace {
     29 inline bool IsValidCharAtStartOfIdentifier(char c) {
     30   return isalpha(c) || (c == '_') || (c == '/');
     31 }
     32 
     33 // Returns true iff character c can appear inside an identifier.
     34 inline bool IsValidCharInsideIdentifier(char c) {
     35   return isalnum(c) || (c == '_') || (c == '-') || (c == '/');
     36 }
     37 
     38 // Returns true iff character c can appear at the beginning of a number.
     39 inline bool IsValidCharAtStartOfNumber(char c) {
     40   return isdigit(c) || (c == '+') || (c == '-');
     41 }
     42 
     43 // Returns true iff character c can appear inside a number.
     44 inline bool IsValidCharInsideNumber(char c) {
     45   return isdigit(c) || (c == '.');
     46 }
     47 }  // namespace
     48 
     49 bool FELParser::Initialize(const string &source) {
     50   // Initialize parser state.
     51   source_ = source;
     52   current_ = source_.begin();
     53   item_start_ = line_start_ = current_;
     54   line_number_ = item_line_number_ = 1;
     55 
     56   // Read first input item.
     57   return NextItem();
     58 }
     59 
     60 void FELParser::ReportError(const string &error_message) {
     61   const int position = item_start_ - line_start_ + 1;
     62   const string line(line_start_, current_);
     63 
     64   SAFTM_LOG(ERROR) << "Error in feature model, line " << item_line_number_
     65                    << ", position " << position << ": " << error_message
     66                    << "\n    " << line << " <--HERE";
     67 }
     68 
     69 void FELParser::Next() {
     70   // Move to the next input character. If we are at a line break update line
     71   // number and line start position.
     72   if (CurrentChar() == '\n') {
     73     ++line_number_;
     74     ++current_;
     75     line_start_ = current_;
     76   } else {
     77     ++current_;
     78   }
     79 }
     80 
     81 bool FELParser::NextItem() {
     82   // Skip white space and comments.
     83   while (!eos()) {
     84     if (CurrentChar() == '#') {
     85       // Skip comment.
     86       while (!eos() && CurrentChar() != '\n') Next();
     87     } else if (isspace(CurrentChar())) {
     88       // Skip whitespace.
     89       while (!eos() && isspace(CurrentChar())) Next();
     90     } else {
     91       break;
     92     }
     93   }
     94 
     95   // Record start position for next item.
     96   item_start_ = current_;
     97   item_line_number_ = line_number_;
     98 
     99   // Check for end of input.
    100   if (eos()) {
    101     item_type_ = END;
    102     return true;
    103   }
    104 
    105   // Parse number.
    106   if (IsValidCharAtStartOfNumber(CurrentChar())) {
    107     string::iterator start = current_;
    108     Next();
    109     while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next();
    110     item_text_.assign(start, current_);
    111     item_type_ = NUMBER;
    112     return true;
    113   }
    114 
    115   // Parse string.
    116   if (CurrentChar() == '"') {
    117     Next();
    118     string::iterator start = current_;
    119     while (CurrentChar() != '"') {
    120       if (eos()) {
    121         ReportError("Unterminated string");
    122         return false;
    123       }
    124       Next();
    125     }
    126     item_text_.assign(start, current_);
    127     item_type_ = STRING;
    128     Next();
    129     return true;
    130   }
    131 
    132   // Parse identifier name.
    133   if (IsValidCharAtStartOfIdentifier(CurrentChar())) {
    134     string::iterator start = current_;
    135     while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) {
    136       Next();
    137     }
    138     item_text_.assign(start, current_);
    139     item_type_ = NAME;
    140     return true;
    141   }
    142 
    143   // Single character item.
    144   item_type_ = CurrentChar();
    145   Next();
    146   return true;
    147 }
    148 
    149 bool FELParser::Parse(const string &source,
    150                       FeatureExtractorDescriptor *result) {
    151   // Initialize parser.
    152   if (!Initialize(source)) {
    153     return false;
    154   }
    155 
    156   while (item_type_ != END) {
    157     // Current item should be a feature name.
    158     if (item_type_ != NAME) {
    159       ReportError("Feature type name expected");
    160       return false;
    161     }
    162     string name = item_text_;
    163     if (!NextItem()) {
    164       return false;
    165     }
    166 
    167     if (item_type_ == '=') {
    168       ReportError("Invalid syntax: feature expected");
    169       return false;
    170     } else {
    171       // Parse feature.
    172       FeatureFunctionDescriptor *descriptor = result->add_feature();
    173       descriptor->set_type(name);
    174       if (!ParseFeature(descriptor)) {
    175         return false;
    176       }
    177     }
    178   }
    179 
    180   return true;
    181 }
    182 
    183 bool FELParser::ParseFeature(FeatureFunctionDescriptor *result) {
    184   // Parse argument and parameters.
    185   if (item_type_ == '(') {
    186     if (!NextItem()) return false;
    187     if (!ParseParameter(result)) return false;
    188     while (item_type_ == ',') {
    189       if (!NextItem()) return false;
    190       if (!ParseParameter(result)) return false;
    191     }
    192 
    193     if (item_type_ != ')') {
    194       ReportError(") expected");
    195       return false;
    196     }
    197     if (!NextItem()) return false;
    198   }
    199 
    200   // Parse feature name.
    201   if (item_type_ == ':') {
    202     if (!NextItem()) return false;
    203     if (item_type_ != NAME && item_type_ != STRING) {
    204       ReportError("Feature name expected");
    205       return false;
    206     }
    207     string name = item_text_;
    208     if (!NextItem()) return false;
    209 
    210     // Set feature name.
    211     result->set_name(name);
    212   }
    213 
    214   // Parse sub-features.
    215   if (item_type_ == '.') {
    216     // Parse dotted sub-feature.
    217     if (!NextItem()) return false;
    218     if (item_type_ != NAME) {
    219       ReportError("Feature type name expected");
    220       return false;
    221     }
    222     string type = item_text_;
    223     if (!NextItem()) return false;
    224 
    225     // Parse sub-feature.
    226     FeatureFunctionDescriptor *subfeature = result->add_feature();
    227     subfeature->set_type(type);
    228     if (!ParseFeature(subfeature)) return false;
    229   } else if (item_type_ == '{') {
    230     // Parse sub-feature block.
    231     if (!NextItem()) return false;
    232     while (item_type_ != '}') {
    233       if (item_type_ != NAME) {
    234         ReportError("Feature type name expected");
    235         return false;
    236       }
    237       string type = item_text_;
    238       if (!NextItem()) return false;
    239 
    240       // Parse sub-feature.
    241       FeatureFunctionDescriptor *subfeature = result->add_feature();
    242       subfeature->set_type(type);
    243       if (!ParseFeature(subfeature)) return false;
    244     }
    245     if (!NextItem()) return false;
    246   }
    247   return true;
    248 }
    249 
    250 bool FELParser::ParseParameter(FeatureFunctionDescriptor *result) {
    251   if (item_type_ == NUMBER) {
    252     int argument;
    253     if (!LiteAtoi(item_text_, &argument)) {
    254       ReportError("Unable to parse number");
    255       return false;
    256     }
    257     if (!NextItem()) return false;
    258 
    259     // Set default argument for feature.
    260     result->set_argument(argument);
    261   } else if (item_type_ == NAME) {
    262     string name = item_text_;
    263     if (!NextItem()) return false;
    264     if (item_type_ != '=') {
    265       ReportError("= expected");
    266       return false;
    267     }
    268     if (!NextItem()) return false;
    269     if (item_type_ >= END) {
    270       ReportError("Parameter value expected");
    271       return false;
    272     }
    273     string value = item_text_;
    274     if (!NextItem()) return false;
    275 
    276     // Add parameter to feature.
    277     Parameter *parameter;
    278     parameter = result->add_parameter();
    279     parameter->set_name(name);
    280     parameter->set_value(value);
    281   } else {
    282     ReportError("Syntax error in parameter list");
    283     return false;
    284   }
    285   return true;
    286 }
    287 
    288 }  // namespace mobile
    289 }  // namespace nlp_saft
    290