Home | History | Annotate | Download | only in convert_dict
      1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
      6 #define CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
      7 
      8 #include <map>
      9 #include <stdio.h>
     10 #include <string>
     11 #include <vector>
     12 
     13 namespace base {
     14 class FilePath;
     15 }
     16 
     17 namespace convert_dict {
     18 
     19 class AffReader {
     20  public:
     21   explicit AffReader(const base::FilePath& path);
     22   ~AffReader();
     23 
     24   bool Read();
     25 
     26   // Returns whether this file uses indexed affixes, or, on false, whether the
     27   // rule string will be specified literally in the .dic file. This must be
     28   // called after Read().
     29   bool has_indexed_affixes() const { return has_indexed_affixes_; }
     30 
     31   // Returns a string representing the encoding of the dictionary. This will
     32   // default to ISO-8859-1 if the .aff file does not specify it.
     33   const char* encoding() const { return encoding_.c_str(); }
     34 
     35   // Converts the given string from the file encoding to UTF-8, returning true
     36   // on success.
     37   bool EncodingToUTF8(const std::string& encoded, std::string* utf8) const;
     38 
     39   // Adds a new affix string, returning the index. If it already exists, returns
     40   // the index of the existing one. This is used to convert .dic files which
     41   // list the
     42   // You must not call this until after Read();
     43   int GetAFIndexForAFString(const std::string& af_string);
     44 
     45   // Getters for the computed data.
     46   const std::string& comments() const { return intro_comment_; }
     47   const std::vector<std::string>& affix_rules() const { return affix_rules_; }
     48   const std::vector< std::pair<std::string, std::string> >&
     49       replacements() const {
     50     return replacements_;
     51   }
     52   const std::vector<std::string>& other_commands() const {
     53     return other_commands_;
     54   }
     55 
     56   // Returns the affix groups ("AF" lines) for this file. The indices into this
     57   // are 1-based, but we don't use the 0th item, so lookups will have to
     58   // subtract one to get the index. This is how hunspell stores this data.
     59   std::vector<std::string> GetAffixGroups() const;
     60 
     61  private:
     62   // Command-specific handlers. These are given the string folling the
     63   // command. The input rule may be modified arbitrarily by the function.
     64   int AddAffixGroup(std::string* rule);  // Returns the new affix group ID.
     65   void AddAffix(std::string* rule);  // SFX/PFX
     66   void AddReplacement(std::string* rule);
     67   // void HandleFlag(std::string* rule);
     68 
     69   // Used to handle "other" commands. The "raw" just saves the line as-is.
     70   // The "encoded" version converts the line to UTF-8 and saves it.
     71   void HandleRawCommand(const std::string& line);
     72   void HandleEncodedCommand(const std::string& line);
     73 
     74   FILE* file_;
     75 
     76   // Comments from the beginning of the file. This is everything before the
     77   // first command. We want to store this since it often contains the copyright
     78   // information.
     79   std::string intro_comment_;
     80 
     81   // Encoding of the source words.
     82   std::string encoding_;
     83 
     84   // Affix rules. These are populated by "AF" commands. The .dic file can refer
     85   // to these by index. They are indexed by their string value (the list of
     86   // characters representing rules), and map to the numeric affix IDs.
     87   //
     88   // These can also be added using GetAFIndexForAFString.
     89   std::map<std::string, int> affix_groups_;
     90 
     91   // True when the affixes were specified in the .aff file using indices. The
     92   // dictionary reader uses this to see how it should treat the stuff after the
     93   // word on each line.
     94   bool has_indexed_affixes_;
     95 
     96   // SFX and PFX commands. This is a list of each of those lines in the order
     97   // they appear in the file. They have been re-encoded.
     98   std::vector<std::string> affix_rules_;
     99 
    100   // Replacement commands. The first string is a possible input, and the second
    101   // is the replacment.
    102   std::vector< std::pair<std::string, std::string> > replacements_;
    103 
    104   // All other commands.
    105   std::vector<std::string> other_commands_;
    106 };
    107 
    108 }  // namespace convert_dict
    109 
    110 #endif  // CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
    111