1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "lang_id/common/fel/fel-parser.h" 18 19 #include <ctype.h> 20 #include <string> 21 22 #include "lang_id/common/lite_base/logging.h" 23 #include "lang_id/common/lite_strings/numbers.h" 24 25 namespace libtextclassifier3 { 26 namespace mobile { 27 28 namespace { 29 inline bool IsValidCharAtStartOfIdentifier(char c) { 30 return isalpha(c) || (c == '_') || (c == '/'); 31 } 32 33 // Returns true iff character c can appear inside an identifier. 34 inline bool IsValidCharInsideIdentifier(char c) { 35 return isalnum(c) || (c == '_') || (c == '-') || (c == '/'); 36 } 37 38 // Returns true iff character c can appear at the beginning of a number. 39 inline bool IsValidCharAtStartOfNumber(char c) { 40 return isdigit(c) || (c == '+') || (c == '-'); 41 } 42 43 // Returns true iff character c can appear inside a number. 44 inline bool IsValidCharInsideNumber(char c) { 45 return isdigit(c) || (c == '.'); 46 } 47 } // namespace 48 49 bool FELParser::Initialize(const string &source) { 50 // Initialize parser state. 51 source_ = source; 52 current_ = source_.begin(); 53 item_start_ = line_start_ = current_; 54 line_number_ = item_line_number_ = 1; 55 56 // Read first input item. 57 return NextItem(); 58 } 59 60 void FELParser::ReportError(const string &error_message) { 61 const int position = item_start_ - line_start_ + 1; 62 const string line(line_start_, current_); 63 64 SAFTM_LOG(ERROR) << "Error in feature model, line " << item_line_number_ 65 << ", position " << position << ": " << error_message 66 << "\n " << line << " <--HERE"; 67 } 68 69 void FELParser::Next() { 70 // Move to the next input character. If we are at a line break update line 71 // number and line start position. 72 if (CurrentChar() == '\n') { 73 ++line_number_; 74 ++current_; 75 line_start_ = current_; 76 } else { 77 ++current_; 78 } 79 } 80 81 bool FELParser::NextItem() { 82 // Skip white space and comments. 83 while (!eos()) { 84 if (CurrentChar() == '#') { 85 // Skip comment. 86 while (!eos() && CurrentChar() != '\n') Next(); 87 } else if (isspace(CurrentChar())) { 88 // Skip whitespace. 89 while (!eos() && isspace(CurrentChar())) Next(); 90 } else { 91 break; 92 } 93 } 94 95 // Record start position for next item. 96 item_start_ = current_; 97 item_line_number_ = line_number_; 98 99 // Check for end of input. 100 if (eos()) { 101 item_type_ = END; 102 return true; 103 } 104 105 // Parse number. 106 if (IsValidCharAtStartOfNumber(CurrentChar())) { 107 string::iterator start = current_; 108 Next(); 109 while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next(); 110 item_text_.assign(start, current_); 111 item_type_ = NUMBER; 112 return true; 113 } 114 115 // Parse string. 116 if (CurrentChar() == '"') { 117 Next(); 118 string::iterator start = current_; 119 while (CurrentChar() != '"') { 120 if (eos()) { 121 ReportError("Unterminated string"); 122 return false; 123 } 124 Next(); 125 } 126 item_text_.assign(start, current_); 127 item_type_ = STRING; 128 Next(); 129 return true; 130 } 131 132 // Parse identifier name. 133 if (IsValidCharAtStartOfIdentifier(CurrentChar())) { 134 string::iterator start = current_; 135 while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) { 136 Next(); 137 } 138 item_text_.assign(start, current_); 139 item_type_ = NAME; 140 return true; 141 } 142 143 // Single character item. 144 item_type_ = CurrentChar(); 145 Next(); 146 return true; 147 } 148 149 bool FELParser::Parse(const string &source, 150 FeatureExtractorDescriptor *result) { 151 // Initialize parser. 152 if (!Initialize(source)) { 153 return false; 154 } 155 156 while (item_type_ != END) { 157 // Current item should be a feature name. 158 if (item_type_ != NAME) { 159 ReportError("Feature type name expected"); 160 return false; 161 } 162 string name = item_text_; 163 if (!NextItem()) { 164 return false; 165 } 166 167 if (item_type_ == '=') { 168 ReportError("Invalid syntax: feature expected"); 169 return false; 170 } else { 171 // Parse feature. 172 FeatureFunctionDescriptor *descriptor = result->add_feature(); 173 descriptor->set_type(name); 174 if (!ParseFeature(descriptor)) { 175 return false; 176 } 177 } 178 } 179 180 return true; 181 } 182 183 bool FELParser::ParseFeature(FeatureFunctionDescriptor *result) { 184 // Parse argument and parameters. 185 if (item_type_ == '(') { 186 if (!NextItem()) return false; 187 if (!ParseParameter(result)) return false; 188 while (item_type_ == ',') { 189 if (!NextItem()) return false; 190 if (!ParseParameter(result)) return false; 191 } 192 193 if (item_type_ != ')') { 194 ReportError(") expected"); 195 return false; 196 } 197 if (!NextItem()) return false; 198 } 199 200 // Parse feature name. 201 if (item_type_ == ':') { 202 if (!NextItem()) return false; 203 if (item_type_ != NAME && item_type_ != STRING) { 204 ReportError("Feature name expected"); 205 return false; 206 } 207 string name = item_text_; 208 if (!NextItem()) return false; 209 210 // Set feature name. 211 result->set_name(name); 212 } 213 214 // Parse sub-features. 215 if (item_type_ == '.') { 216 // Parse dotted sub-feature. 217 if (!NextItem()) return false; 218 if (item_type_ != NAME) { 219 ReportError("Feature type name expected"); 220 return false; 221 } 222 string type = item_text_; 223 if (!NextItem()) return false; 224 225 // Parse sub-feature. 226 FeatureFunctionDescriptor *subfeature = result->add_feature(); 227 subfeature->set_type(type); 228 if (!ParseFeature(subfeature)) return false; 229 } else if (item_type_ == '{') { 230 // Parse sub-feature block. 231 if (!NextItem()) return false; 232 while (item_type_ != '}') { 233 if (item_type_ != NAME) { 234 ReportError("Feature type name expected"); 235 return false; 236 } 237 string type = item_text_; 238 if (!NextItem()) return false; 239 240 // Parse sub-feature. 241 FeatureFunctionDescriptor *subfeature = result->add_feature(); 242 subfeature->set_type(type); 243 if (!ParseFeature(subfeature)) return false; 244 } 245 if (!NextItem()) return false; 246 } 247 return true; 248 } 249 250 bool FELParser::ParseParameter(FeatureFunctionDescriptor *result) { 251 if (item_type_ == NUMBER) { 252 int argument; 253 if (!LiteAtoi(item_text_, &argument)) { 254 ReportError("Unable to parse number"); 255 return false; 256 } 257 if (!NextItem()) return false; 258 259 // Set default argument for feature. 260 result->set_argument(argument); 261 } else if (item_type_ == NAME) { 262 string name = item_text_; 263 if (!NextItem()) return false; 264 if (item_type_ != '=') { 265 ReportError("= expected"); 266 return false; 267 } 268 if (!NextItem()) return false; 269 if (item_type_ >= END) { 270 ReportError("Parameter value expected"); 271 return false; 272 } 273 string value = item_text_; 274 if (!NextItem()) return false; 275 276 // Add parameter to feature. 277 Parameter *parameter; 278 parameter = result->add_parameter(); 279 parameter->set_name(name); 280 parameter->set_value(value); 281 } else { 282 ReportError("Syntax error in parameter list"); 283 return false; 284 } 285 return true; 286 } 287 288 } // namespace mobile 289 } // namespace nlp_saft 290