1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 1999-2007, International Business Machines Corporation 6 * and others. All Rights Reserved. 7 ********************************************************************** 8 * Date Name Description 9 * 11/17/99 aliu Creation. 10 ********************************************************************** 11 */ 12 #ifndef RBT_SET_H 13 #define RBT_SET_H 14 15 #include "unicode/utypes.h" 16 17 #if !UCONFIG_NO_TRANSLITERATION 18 19 #include "unicode/uobject.h" 20 #include "unicode/utrans.h" 21 #include "uvector.h" 22 23 U_NAMESPACE_BEGIN 24 25 class Replaceable; 26 class TransliterationRule; 27 class TransliterationRuleData; 28 class UnicodeFilter; 29 class UnicodeString; 30 class UnicodeSet; 31 32 /** 33 * A set of rules for a <code>RuleBasedTransliterator</code>. 34 * @author Alan Liu 35 */ 36 class TransliterationRuleSet : public UMemory { 37 /** 38 * Vector of rules, in the order added. This is used while the 39 * rule set is getting built. After that, freeze() reorders and 40 * indexes the rules into rules[]. Any given rule is stored once 41 * in ruleVector, and one or more times in rules[]. ruleVector 42 * owns and deletes the rules. 43 */ 44 UVector* ruleVector; 45 46 /** 47 * Sorted and indexed table of rules. This is created by freeze() 48 * from the rules in ruleVector. It contains alias pointers to 49 * the rules in ruleVector. It is zero before freeze() is called 50 * and non-zero thereafter. 51 */ 52 TransliterationRule** rules; 53 54 /** 55 * Index table. For text having a first character c, compute x = c&0xFF. 56 * Now use rules[index[x]..index[x+1]-1]. This index table is created by 57 * freeze(). Before freeze() is called it contains garbage. 58 */ 59 int32_t index[257]; 60 61 /** 62 * Length of the longest preceding context 63 */ 64 int32_t maxContextLength; 65 66 public: 67 68 /** 69 * Construct a new empty rule set. 70 * @param status Output parameter filled in with success or failure status. 71 */ 72 TransliterationRuleSet(UErrorCode& status); 73 74 /** 75 * Copy constructor. 76 */ 77 TransliterationRuleSet(const TransliterationRuleSet&); 78 79 /** 80 * Destructor. 81 */ 82 virtual ~TransliterationRuleSet(); 83 84 /** 85 * Change the data object that this rule belongs to. Used 86 * internally by the TransliterationRuleData copy constructor. 87 * @param data the new data value to be set. 88 */ 89 void setData(const TransliterationRuleData* data); 90 91 /** 92 * Return the maximum context length. 93 * @return the length of the longest preceding context. 94 */ 95 virtual int32_t getMaximumContextLength(void) const; 96 97 /** 98 * Add a rule to this set. Rules are added in order, and order is 99 * significant. The last call to this method must be followed by 100 * a call to <code>freeze()</code> before the rule set is used. 101 * This method must <em>not</em> be called after freeze() has been 102 * called. 103 * 104 * @param adoptedRule the rule to add 105 */ 106 virtual void addRule(TransliterationRule* adoptedRule, 107 UErrorCode& status); 108 109 /** 110 * Check this for masked rules and index it to optimize performance. 111 * The sequence of operations is: (1) add rules to a set using 112 * <code>addRule()</code>; (2) freeze the set using 113 * <code>freeze()</code>; (3) use the rule set. If 114 * <code>addRule()</code> is called after calling this method, it 115 * invalidates this object, and this method must be called again. 116 * That is, <code>freeze()</code> may be called multiple times, 117 * although for optimal performance it shouldn't be. 118 * @param parseError A pointer to UParseError to receive information about errors 119 * occurred. 120 * @param status Output parameter filled in with success or failure status. 121 */ 122 virtual void freeze(UParseError& parseError, UErrorCode& status); 123 124 /** 125 * Transliterate the given text with the given UTransPosition 126 * indices. Return TRUE if the transliteration should continue 127 * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). 128 * Note that FALSE is only ever returned if isIncremental is TRUE. 129 * @param text the text to be transliterated 130 * @param index the position indices, which will be updated 131 * @param isIncremental if TRUE, assume new text may be inserted 132 * at index.limit, and return FALSE if thre is a partial match. 133 * @return TRUE unless a U_PARTIAL_MATCH has been obtained, 134 * indicating that transliteration should stop until more text 135 * arrives. 136 */ 137 UBool transliterate(Replaceable& text, 138 UTransPosition& index, 139 UBool isIncremental); 140 141 /** 142 * Create rule strings that represents this rule set. 143 * @param result string to receive the rule strings. Current 144 * contents will be deleted. 145 * @param escapeUnprintable True, will escape the unprintable characters 146 * @return A reference to 'result'. 147 */ 148 virtual UnicodeString& toRules(UnicodeString& result, 149 UBool escapeUnprintable) const; 150 151 /** 152 * Return the set of all characters that may be modified 153 * (getTarget=false) or emitted (getTarget=true) by this set. 154 */ 155 UnicodeSet& getSourceTargetSet(UnicodeSet& result, 156 UBool getTarget) const; 157 158 private: 159 160 TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class 161 }; 162 163 U_NAMESPACE_END 164 165 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 166 167 #endif 168