1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__ 6 #define CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__ 7 8 #include <map> 9 #include <stdio.h> 10 #include <string> 11 #include <vector> 12 13 namespace base { 14 class FilePath; 15 } 16 17 namespace convert_dict { 18 19 class AffReader { 20 public: 21 explicit AffReader(const base::FilePath& path); 22 ~AffReader(); 23 24 bool Read(); 25 26 // Returns whether this file uses indexed affixes, or, on false, whether the 27 // rule string will be specified literally in the .dic file. This must be 28 // called after Read(). 29 bool has_indexed_affixes() const { return has_indexed_affixes_; } 30 31 // Returns a string representing the encoding of the dictionary. This will 32 // default to ISO-8859-1 if the .aff file does not specify it. 33 const char* encoding() const { return encoding_.c_str(); } 34 35 // Converts the given string from the file encoding to UTF-8, returning true 36 // on success. 37 bool EncodingToUTF8(const std::string& encoded, std::string* utf8) const; 38 39 // Adds a new affix string, returning the index. If it already exists, returns 40 // the index of the existing one. This is used to convert .dic files which 41 // list the 42 // You must not call this until after Read(); 43 int GetAFIndexForAFString(const std::string& af_string); 44 45 // Getters for the computed data. 46 const std::string& comments() const { return intro_comment_; } 47 const std::vector<std::string>& affix_rules() const { return affix_rules_; } 48 const std::vector< std::pair<std::string, std::string> >& 49 replacements() const { 50 return replacements_; 51 } 52 const std::vector<std::string>& other_commands() const { 53 return other_commands_; 54 } 55 56 // Returns the affix groups ("AF" lines) for this file. The indices into this 57 // are 1-based, but we don't use the 0th item, so lookups will have to 58 // subtract one to get the index. This is how hunspell stores this data. 59 std::vector<std::string> GetAffixGroups() const; 60 61 private: 62 // Command-specific handlers. These are given the string folling the 63 // command. The input rule may be modified arbitrarily by the function. 64 int AddAffixGroup(std::string* rule); // Returns the new affix group ID. 65 void AddAffix(std::string* rule); // SFX/PFX 66 void AddReplacement(std::string* rule); 67 // void HandleFlag(std::string* rule); 68 69 // Used to handle "other" commands. The "raw" just saves the line as-is. 70 // The "encoded" version converts the line to UTF-8 and saves it. 71 void HandleRawCommand(const std::string& line); 72 void HandleEncodedCommand(const std::string& line); 73 74 FILE* file_; 75 76 // Comments from the beginning of the file. This is everything before the 77 // first command. We want to store this since it often contains the copyright 78 // information. 79 std::string intro_comment_; 80 81 // Encoding of the source words. 82 std::string encoding_; 83 84 // Affix rules. These are populated by "AF" commands. The .dic file can refer 85 // to these by index. They are indexed by their string value (the list of 86 // characters representing rules), and map to the numeric affix IDs. 87 // 88 // These can also be added using GetAFIndexForAFString. 89 std::map<std::string, int> affix_groups_; 90 91 // True when the affixes were specified in the .aff file using indices. The 92 // dictionary reader uses this to see how it should treat the stuff after the 93 // word on each line. 94 bool has_indexed_affixes_; 95 96 // SFX and PFX commands. This is a list of each of those lines in the order 97 // they appear in the file. They have been re-encoded. 98 std::vector<std::string> affix_rules_; 99 100 // Replacement commands. The first string is a possible input, and the second 101 // is the replacment. 102 std::vector< std::pair<std::string, std::string> > replacements_; 103 104 // All other commands. 105 std::vector<std::string> other_commands_; 106 }; 107 108 } // namespace convert_dict 109 110 #endif // CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__ 111