Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "common/fml-parser.h"
     18 
     19 #include <ctype.h>
     20 #include <string>
     21 
     22 #include "util/base/logging.h"
     23 #include "util/strings/numbers.h"
     24 
     25 namespace libtextclassifier {
     26 namespace nlp_core {
     27 
     28 namespace {
     29 inline bool IsValidCharAtStartOfIdentifier(char c) {
     30   return isalpha(c) || (c == '_') || (c == '/');
     31 }
     32 
     33 // Returns true iff character c can appear inside an identifier.
     34 inline bool IsValidCharInsideIdentifier(char c) {
     35   return isalnum(c) || (c == '_') || (c == '-') || (c == '/');
     36 }
     37 
     38 // Returns true iff character c can appear at the beginning of a number.
     39 inline bool IsValidCharAtStartOfNumber(char c) {
     40   return isdigit(c) || (c == '+') || (c == '-');
     41 }
     42 
     43 // Returns true iff character c can appear inside a number.
     44 inline bool IsValidCharInsideNumber(char c) {
     45   return isdigit(c) || (c == '.');
     46 }
     47 }  // namespace
     48 
     49 bool FMLParser::Initialize(const std::string &source) {
     50   // Initialize parser state.
     51   source_ = source;
     52   current_ = source_.begin();
     53   item_start_ = line_start_ = current_;
     54   line_number_ = item_line_number_ = 1;
     55 
     56   // Read first input item.
     57   return NextItem();
     58 }
     59 
     60 void FMLParser::ReportError(const std::string &error_message) {
     61   const int position = item_start_ - line_start_ + 1;
     62   const std::string line(line_start_, current_);
     63 
     64   TC_LOG(ERROR) << "Error in feature model, line " << item_line_number_
     65                 << ", position " << position << ": " << error_message
     66                 << "\n    " << line << " <--HERE";
     67 }
     68 
     69 void FMLParser::Next() {
     70   // Move to the next input character. If we are at a line break update line
     71   // number and line start position.
     72   if (CurrentChar() == '\n') {
     73     ++line_number_;
     74     ++current_;
     75     line_start_ = current_;
     76   } else {
     77     ++current_;
     78   }
     79 }
     80 
     81 bool FMLParser::NextItem() {
     82   // Skip white space and comments.
     83   while (!eos()) {
     84     if (CurrentChar() == '#') {
     85       // Skip comment.
     86       while (!eos() && CurrentChar() != '\n') Next();
     87     } else if (isspace(CurrentChar())) {
     88       // Skip whitespace.
     89       while (!eos() && isspace(CurrentChar())) Next();
     90     } else {
     91       break;
     92     }
     93   }
     94 
     95   // Record start position for next item.
     96   item_start_ = current_;
     97   item_line_number_ = line_number_;
     98 
     99   // Check for end of input.
    100   if (eos()) {
    101     item_type_ = END;
    102     return true;
    103   }
    104 
    105   // Parse number.
    106   if (IsValidCharAtStartOfNumber(CurrentChar())) {
    107     std::string::iterator start = current_;
    108     Next();
    109     while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next();
    110     item_text_.assign(start, current_);
    111     item_type_ = NUMBER;
    112     return true;
    113   }
    114 
    115   // Parse std::string.
    116   if (CurrentChar() == '"') {
    117     Next();
    118     std::string::iterator start = current_;
    119     while (CurrentChar() != '"') {
    120       if (eos()) {
    121         ReportError("Unterminated string");
    122         return false;
    123       }
    124       Next();
    125     }
    126     item_text_.assign(start, current_);
    127     item_type_ = STRING;
    128     Next();
    129     return true;
    130   }
    131 
    132   // Parse identifier name.
    133   if (IsValidCharAtStartOfIdentifier(CurrentChar())) {
    134     std::string::iterator start = current_;
    135     while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) {
    136       Next();
    137     }
    138     item_text_.assign(start, current_);
    139     item_type_ = NAME;
    140     return true;
    141   }
    142 
    143   // Single character item.
    144   item_type_ = CurrentChar();
    145   Next();
    146   return true;
    147 }
    148 
    149 bool FMLParser::Parse(const std::string &source,
    150                       FeatureExtractorDescriptor *result) {
    151   // Initialize parser.
    152   if (!Initialize(source)) {
    153     return false;
    154   }
    155 
    156   while (item_type_ != END) {
    157     // Current item should be a feature name.
    158     if (item_type_ != NAME) {
    159       ReportError("Feature type name expected");
    160       return false;
    161     }
    162     std::string name = item_text_;
    163     if (!NextItem()) {
    164       return false;
    165     }
    166 
    167     // Parse feature.
    168     FeatureFunctionDescriptor *descriptor = result->add_feature();
    169     descriptor->set_type(name);
    170     if (!ParseFeature(descriptor)) {
    171       return false;
    172     }
    173   }
    174 
    175   return true;
    176 }
    177 
    178 bool FMLParser::ParseFeature(FeatureFunctionDescriptor *result) {
    179   // Parse argument and parameters.
    180   if (item_type_ == '(') {
    181     if (!NextItem()) return false;
    182     if (!ParseParameter(result)) return false;
    183     while (item_type_ == ',') {
    184       if (!NextItem()) return false;
    185       if (!ParseParameter(result)) return false;
    186     }
    187 
    188     if (item_type_ != ')') {
    189       ReportError(") expected");
    190       return false;
    191     }
    192     if (!NextItem()) return false;
    193   }
    194 
    195   // Parse feature name.
    196   if (item_type_ == ':') {
    197     if (!NextItem()) return false;
    198     if (item_type_ != NAME && item_type_ != STRING) {
    199       ReportError("Feature name expected");
    200       return false;
    201     }
    202     std::string name = item_text_;
    203     if (!NextItem()) return false;
    204 
    205     // Set feature name.
    206     result->set_name(name);
    207   }
    208 
    209   // Parse sub-features.
    210   if (item_type_ == '.') {
    211     // Parse dotted sub-feature.
    212     if (!NextItem()) return false;
    213     if (item_type_ != NAME) {
    214       ReportError("Feature type name expected");
    215       return false;
    216     }
    217     std::string type = item_text_;
    218     if (!NextItem()) return false;
    219 
    220     // Parse sub-feature.
    221     FeatureFunctionDescriptor *subfeature = result->add_feature();
    222     subfeature->set_type(type);
    223     if (!ParseFeature(subfeature)) return false;
    224   } else if (item_type_ == '{') {
    225     // Parse sub-feature block.
    226     if (!NextItem()) return false;
    227     while (item_type_ != '}') {
    228       if (item_type_ != NAME) {
    229         ReportError("Feature type name expected");
    230         return false;
    231       }
    232       std::string type = item_text_;
    233       if (!NextItem()) return false;
    234 
    235       // Parse sub-feature.
    236       FeatureFunctionDescriptor *subfeature = result->add_feature();
    237       subfeature->set_type(type);
    238       if (!ParseFeature(subfeature)) return false;
    239     }
    240     if (!NextItem()) return false;
    241   }
    242   return true;
    243 }
    244 
    245 bool FMLParser::ParseParameter(FeatureFunctionDescriptor *result) {
    246   if (item_type_ == NUMBER) {
    247     int32 argument;
    248     if (!ParseInt32(item_text_.c_str(), &argument)) {
    249       ReportError("Unable to parse number");
    250       return false;
    251     }
    252     if (!NextItem()) return false;
    253 
    254     // Set default argument for feature.
    255     result->set_argument(argument);
    256   } else if (item_type_ == NAME) {
    257     std::string name = item_text_;
    258     if (!NextItem()) return false;
    259     if (item_type_ != '=') {
    260       ReportError("= expected");
    261       return false;
    262     }
    263     if (!NextItem()) return false;
    264     if (item_type_ >= END) {
    265       ReportError("Parameter value expected");
    266       return false;
    267     }
    268     std::string value = item_text_;
    269     if (!NextItem()) return false;
    270 
    271     // Add parameter to feature.
    272     Parameter *parameter;
    273     parameter = result->add_parameter();
    274     parameter->set_name(name);
    275     parameter->set_value(value);
    276   } else {
    277     ReportError("Syntax error in parameter list");
    278     return false;
    279   }
    280   return true;
    281 }
    282 
    283 void ToFMLFunction(const FeatureFunctionDescriptor &function,
    284                    std::string *output) {
    285   output->append(function.type());
    286   if (function.argument() != 0 || function.parameter_size() > 0) {
    287     output->append("(");
    288     bool first = true;
    289     if (function.argument() != 0) {
    290       output->append(IntToString(function.argument()));
    291       first = false;
    292     }
    293     for (int i = 0; i < function.parameter_size(); ++i) {
    294       if (!first) output->append(",");
    295       output->append(function.parameter(i).name());
    296       output->append("=");
    297       output->append("\"");
    298       output->append(function.parameter(i).value());
    299       output->append("\"");
    300       first = false;
    301     }
    302     output->append(")");
    303   }
    304 }
    305 
    306 void ToFML(const FeatureFunctionDescriptor &function, std::string *output) {
    307   ToFMLFunction(function, output);
    308   if (function.feature_size() == 1) {
    309     output->append(".");
    310     ToFML(function.feature(0), output);
    311   } else if (function.feature_size() > 1) {
    312     output->append(" { ");
    313     for (int i = 0; i < function.feature_size(); ++i) {
    314       if (i > 0) output->append(" ");
    315       ToFML(function.feature(i), output);
    316     }
    317     output->append(" } ");
    318   }
    319 }
    320 
    321 void ToFML(const FeatureExtractorDescriptor &extractor, std::string *output) {
    322   for (int i = 0; i < extractor.feature_size(); ++i) {
    323     ToFML(extractor.feature(i), output);
    324     output->append("\n");
    325   }
    326 }
    327 
    328 }  // namespace nlp_core
    329 }  // namespace libtextclassifier
    330