Home | History | Annotate | Download | only in datetime
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LIBTEXTCLASSIFIER_DATETIME_PARSER_H_
     18 #define LIBTEXTCLASSIFIER_DATETIME_PARSER_H_
     19 
     20 #include <memory>
     21 #include <string>
     22 #include <unordered_map>
     23 #include <unordered_set>
     24 #include <vector>
     25 
     26 #include "datetime/extractor.h"
     27 #include "model_generated.h"
     28 #include "types.h"
     29 #include "util/base/integral_types.h"
     30 #include "util/calendar/calendar.h"
     31 #include "util/utf8/unilib.h"
     32 #include "zlib-utils.h"
     33 
     34 namespace libtextclassifier2 {
     35 
     36 // Parses datetime expressions in the input and resolves them to actual absolute
     37 // time.
     38 class DatetimeParser {
     39  public:
     40   static std::unique_ptr<DatetimeParser> Instance(
     41       const DatetimeModel* model, const UniLib& unilib,
     42       ZlibDecompressor* decompressor);
     43 
     44   // Parses the dates in 'input' and fills result. Makes sure that the results
     45   // do not overlap.
     46   // If 'anchor_start_end' is true the extracted results need to start at the
     47   // beginning of 'input' and end at the end of it.
     48   bool Parse(const std::string& input, int64 reference_time_ms_utc,
     49              const std::string& reference_timezone, const std::string& locales,
     50              ModeFlag mode, bool anchor_start_end,
     51              std::vector<DatetimeParseResultSpan>* results) const;
     52 
     53   // Same as above but takes UnicodeText.
     54   bool Parse(const UnicodeText& input, int64 reference_time_ms_utc,
     55              const std::string& reference_timezone, const std::string& locales,
     56              ModeFlag mode, bool anchor_start_end,
     57              std::vector<DatetimeParseResultSpan>* results) const;
     58 
     59  protected:
     60   DatetimeParser(const DatetimeModel* model, const UniLib& unilib,
     61                  ZlibDecompressor* decompressor);
     62 
     63   // Returns a list of locale ids for given locale spec string (comma-separated
     64   // locale names). Assigns the first parsed locale to reference_locale.
     65   std::vector<int> ParseAndExpandLocales(const std::string& locales,
     66                                          std::string* reference_locale) const;
     67 
     68   // Helper function that finds datetime spans, only using the rules associated
     69   // with the given locales.
     70   bool FindSpansUsingLocales(
     71       const std::vector<int>& locale_ids, const UnicodeText& input,
     72       const int64 reference_time_ms_utc, const std::string& reference_timezone,
     73       ModeFlag mode, bool anchor_start_end, const std::string& reference_locale,
     74       std::unordered_set<int>* executed_rules,
     75       std::vector<DatetimeParseResultSpan>* found_spans) const;
     76 
     77   bool ParseWithRule(const CompiledRule& rule, const UnicodeText& input,
     78                      int64 reference_time_ms_utc,
     79                      const std::string& reference_timezone,
     80                      const std::string& reference_locale, const int locale_id,
     81                      bool anchor_start_end,
     82                      std::vector<DatetimeParseResultSpan>* result) const;
     83 
     84   // Converts the current match in 'matcher' into DatetimeParseResult.
     85   bool ExtractDatetime(const CompiledRule& rule,
     86                        const UniLib::RegexMatcher& matcher,
     87                        int64 reference_time_ms_utc,
     88                        const std::string& reference_timezone,
     89                        const std::string& reference_locale, int locale_id,
     90                        DatetimeParseResult* result,
     91                        CodepointSpan* result_span) const;
     92 
     93   // Parse and extract information from current match in 'matcher'.
     94   bool HandleParseMatch(const CompiledRule& rule,
     95                         const UniLib::RegexMatcher& matcher,
     96                         int64 reference_time_ms_utc,
     97                         const std::string& reference_timezone,
     98                         const std::string& reference_locale, int locale_id,
     99                         std::vector<DatetimeParseResultSpan>* result) const;
    100 
    101  private:
    102   bool initialized_;
    103   const UniLib& unilib_;
    104   std::vector<CompiledRule> rules_;
    105   std::unordered_map<int, std::vector<int>> locale_to_rules_;
    106   std::vector<std::unique_ptr<const UniLib::RegexPattern>> extractor_rules_;
    107   std::unordered_map<DatetimeExtractorType, std::unordered_map<int, int>>
    108       type_and_locale_to_extractor_rule_;
    109   std::unordered_map<std::string, int> locale_string_to_id_;
    110   std::vector<int> default_locale_ids_;
    111   CalendarLib calendar_lib_;
    112   bool use_extractors_for_locating_;
    113 };
    114 
    115 }  // namespace libtextclassifier2
    116 
    117 #endif  // LIBTEXTCLASSIFIER_DATETIME_PARSER_H_
    118