1 /* 2 ********************************************************************** 3 * Copyright (C) 1999-2007, International Business Machines Corporation 4 * and others. All Rights Reserved. 5 ********************************************************************** 6 * Date Name Description 7 * 11/17/99 aliu Creation. 8 ********************************************************************** 9 */ 10 #ifndef RBT_SET_H 11 #define RBT_SET_H 12 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_TRANSLITERATION 16 17 #include "unicode/uobject.h" 18 #include "unicode/utrans.h" 19 #include "uvector.h" 20 21 U_NAMESPACE_BEGIN 22 23 class Replaceable; 24 class TransliterationRule; 25 class TransliterationRuleData; 26 class UnicodeFilter; 27 class UnicodeString; 28 class UnicodeSet; 29 30 /** 31 * A set of rules for a <code>RuleBasedTransliterator</code>. 32 * @author Alan Liu 33 */ 34 class TransliterationRuleSet : public UMemory { 35 /** 36 * Vector of rules, in the order added. This is used while the 37 * rule set is getting built. After that, freeze() reorders and 38 * indexes the rules into rules[]. Any given rule is stored once 39 * in ruleVector, and one or more times in rules[]. ruleVector 40 * owns and deletes the rules. 41 */ 42 UVector* ruleVector; 43 44 /** 45 * Sorted and indexed table of rules. This is created by freeze() 46 * from the rules in ruleVector. It contains alias pointers to 47 * the rules in ruleVector. It is zero before freeze() is called 48 * and non-zero thereafter. 49 */ 50 TransliterationRule** rules; 51 52 /** 53 * Index table. For text having a first character c, compute x = c&0xFF. 54 * Now use rules[index[x]..index[x+1]-1]. This index table is created by 55 * freeze(). Before freeze() is called it contains garbage. 56 */ 57 int32_t index[257]; 58 59 /** 60 * Length of the longest preceding context 61 */ 62 int32_t maxContextLength; 63 64 public: 65 66 /** 67 * Construct a new empty rule set. 68 * @param status Output parameter filled in with success or failure status. 69 */ 70 TransliterationRuleSet(UErrorCode& status); 71 72 /** 73 * Copy constructor. 74 */ 75 TransliterationRuleSet(const TransliterationRuleSet&); 76 77 /** 78 * Destructor. 79 */ 80 virtual ~TransliterationRuleSet(); 81 82 /** 83 * Change the data object that this rule belongs to. Used 84 * internally by the TransliterationRuleData copy constructor. 85 * @param data the new data value to be set. 86 */ 87 void setData(const TransliterationRuleData* data); 88 89 /** 90 * Return the maximum context length. 91 * @return the length of the longest preceding context. 92 */ 93 virtual int32_t getMaximumContextLength(void) const; 94 95 /** 96 * Add a rule to this set. Rules are added in order, and order is 97 * significant. The last call to this method must be followed by 98 * a call to <code>freeze()</code> before the rule set is used. 99 * This method must <em>not</em> be called after freeze() has been 100 * called. 101 * 102 * @param adoptedRule the rule to add 103 */ 104 virtual void addRule(TransliterationRule* adoptedRule, 105 UErrorCode& status); 106 107 /** 108 * Check this for masked rules and index it to optimize performance. 109 * The sequence of operations is: (1) add rules to a set using 110 * <code>addRule()</code>; (2) freeze the set using 111 * <code>freeze()</code>; (3) use the rule set. If 112 * <code>addRule()</code> is called after calling this method, it 113 * invalidates this object, and this method must be called again. 114 * That is, <code>freeze()</code> may be called multiple times, 115 * although for optimal performance it shouldn't be. 116 * @param parseError A pointer to UParseError to receive information about errors 117 * occurred. 118 * @param status Output parameter filled in with success or failure status. 119 */ 120 virtual void freeze(UParseError& parseError, UErrorCode& status); 121 122 /** 123 * Transliterate the given text with the given UTransPosition 124 * indices. Return TRUE if the transliteration should continue 125 * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). 126 * Note that FALSE is only ever returned if isIncremental is TRUE. 127 * @param text the text to be transliterated 128 * @param index the position indices, which will be updated 129 * @param isIncremental if TRUE, assume new text may be inserted 130 * at index.limit, and return FALSE if thre is a partial match. 131 * @return TRUE unless a U_PARTIAL_MATCH has been obtained, 132 * indicating that transliteration should stop until more text 133 * arrives. 134 */ 135 UBool transliterate(Replaceable& text, 136 UTransPosition& index, 137 UBool isIncremental); 138 139 /** 140 * Create rule strings that represents this rule set. 141 * @param result string to receive the rule strings. Current 142 * contents will be deleted. 143 * @param escapeUnprintable True, will escape the unprintable characters 144 * @return A reference to 'result'. 145 */ 146 virtual UnicodeString& toRules(UnicodeString& result, 147 UBool escapeUnprintable) const; 148 149 /** 150 * Return the set of all characters that may be modified 151 * (getTarget=false) or emitted (getTarget=true) by this set. 152 */ 153 UnicodeSet& getSourceTargetSet(UnicodeSet& result, 154 UBool getTarget) const; 155 156 private: 157 158 TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class 159 }; 160 161 U_NAMESPACE_END 162 163 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 164 165 #endif 166