1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBTEXTCLASSIFIER_DATETIME_PARSER_H_ 18 #define LIBTEXTCLASSIFIER_DATETIME_PARSER_H_ 19 20 #include <memory> 21 #include <string> 22 #include <unordered_map> 23 #include <unordered_set> 24 #include <vector> 25 26 #include "datetime/extractor.h" 27 #include "model_generated.h" 28 #include "types.h" 29 #include "util/base/integral_types.h" 30 #include "util/calendar/calendar.h" 31 #include "util/utf8/unilib.h" 32 #include "zlib-utils.h" 33 34 namespace libtextclassifier2 { 35 36 // Parses datetime expressions in the input and resolves them to actual absolute 37 // time. 38 class DatetimeParser { 39 public: 40 static std::unique_ptr<DatetimeParser> Instance( 41 const DatetimeModel* model, const UniLib& unilib, 42 ZlibDecompressor* decompressor); 43 44 // Parses the dates in 'input' and fills result. Makes sure that the results 45 // do not overlap. 46 // If 'anchor_start_end' is true the extracted results need to start at the 47 // beginning of 'input' and end at the end of it. 48 bool Parse(const std::string& input, int64 reference_time_ms_utc, 49 const std::string& reference_timezone, const std::string& locales, 50 ModeFlag mode, bool anchor_start_end, 51 std::vector<DatetimeParseResultSpan>* results) const; 52 53 // Same as above but takes UnicodeText. 54 bool Parse(const UnicodeText& input, int64 reference_time_ms_utc, 55 const std::string& reference_timezone, const std::string& locales, 56 ModeFlag mode, bool anchor_start_end, 57 std::vector<DatetimeParseResultSpan>* results) const; 58 59 protected: 60 DatetimeParser(const DatetimeModel* model, const UniLib& unilib, 61 ZlibDecompressor* decompressor); 62 63 // Returns a list of locale ids for given locale spec string (comma-separated 64 // locale names). Assigns the first parsed locale to reference_locale. 65 std::vector<int> ParseAndExpandLocales(const std::string& locales, 66 std::string* reference_locale) const; 67 68 // Helper function that finds datetime spans, only using the rules associated 69 // with the given locales. 70 bool FindSpansUsingLocales( 71 const std::vector<int>& locale_ids, const UnicodeText& input, 72 const int64 reference_time_ms_utc, const std::string& reference_timezone, 73 ModeFlag mode, bool anchor_start_end, const std::string& reference_locale, 74 std::unordered_set<int>* executed_rules, 75 std::vector<DatetimeParseResultSpan>* found_spans) const; 76 77 bool ParseWithRule(const CompiledRule& rule, const UnicodeText& input, 78 int64 reference_time_ms_utc, 79 const std::string& reference_timezone, 80 const std::string& reference_locale, const int locale_id, 81 bool anchor_start_end, 82 std::vector<DatetimeParseResultSpan>* result) const; 83 84 // Converts the current match in 'matcher' into DatetimeParseResult. 85 bool ExtractDatetime(const CompiledRule& rule, 86 const UniLib::RegexMatcher& matcher, 87 int64 reference_time_ms_utc, 88 const std::string& reference_timezone, 89 const std::string& reference_locale, int locale_id, 90 DatetimeParseResult* result, 91 CodepointSpan* result_span) const; 92 93 // Parse and extract information from current match in 'matcher'. 94 bool HandleParseMatch(const CompiledRule& rule, 95 const UniLib::RegexMatcher& matcher, 96 int64 reference_time_ms_utc, 97 const std::string& reference_timezone, 98 const std::string& reference_locale, int locale_id, 99 std::vector<DatetimeParseResultSpan>* result) const; 100 101 private: 102 bool initialized_; 103 const UniLib& unilib_; 104 std::vector<CompiledRule> rules_; 105 std::unordered_map<int, std::vector<int>> locale_to_rules_; 106 std::vector<std::unique_ptr<const UniLib::RegexPattern>> extractor_rules_; 107 std::unordered_map<DatetimeExtractorType, std::unordered_map<int, int>> 108 type_and_locale_to_extractor_rule_; 109 std::unordered_map<std::string, int> locale_string_to_id_; 110 std::vector<int> default_locale_ids_; 111 CalendarLib calendar_lib_; 112 bool use_extractors_for_locating_; 113 }; 114 115 } // namespace libtextclassifier2 116 117 #endif // LIBTEXTCLASSIFIER_DATETIME_PARSER_H_ 118