1 /* 2 * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. 3 ********************************************************************** 4 * Date Name Description 5 * 07/18/01 aliu Creation. 6 ********************************************************************** 7 */ 8 #ifndef UNIMATCH_H 9 #define UNIMATCH_H 10 11 #include "unicode/utypes.h" 12 13 /** 14 * \file 15 * \brief C++ API: Unicode Matcher 16 */ 17 18 19 U_NAMESPACE_BEGIN 20 21 class Replaceable; 22 class UnicodeString; 23 class UnicodeSet; 24 25 /** 26 * Constants returned by <code>UnicodeMatcher::matches()</code> 27 * indicating the degree of match. 28 * @stable ICU 2.4 29 */ 30 enum UMatchDegree { 31 /** 32 * Constant returned by <code>matches()</code> indicating a 33 * mismatch between the text and this matcher. The text contains 34 * a character which does not match, or the text does not contain 35 * all desired characters for a non-incremental match. 36 * @stable ICU 2.4 37 */ 38 U_MISMATCH, 39 40 /** 41 * Constant returned by <code>matches()</code> indicating a 42 * partial match between the text and this matcher. This value is 43 * only returned for incremental match operations. All characters 44 * of the text match, but more characters are required for a 45 * complete match. Alternatively, for variable-length matchers, 46 * all characters of the text match, and if more characters were 47 * supplied at limit, they might also match. 48 * @stable ICU 2.4 49 */ 50 U_PARTIAL_MATCH, 51 52 /** 53 * Constant returned by <code>matches()</code> indicating a 54 * complete match between the text and this matcher. For an 55 * incremental variable-length match, this value is returned if 56 * the given text matches, and it is known that additional 57 * characters would not alter the extent of the match. 58 * @stable ICU 2.4 59 */ 60 U_MATCH 61 }; 62 63 /** 64 * <code>UnicodeMatcher</code> defines a protocol for objects that can 65 * match a range of characters in a Replaceable string. 66 * @stable ICU 2.4 67 */ 68 class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ { 69 70 public: 71 /** 72 * Destructor. 73 * @stable ICU 2.4 74 */ 75 virtual ~UnicodeMatcher(); 76 77 /** 78 * Return a UMatchDegree value indicating the degree of match for 79 * the given text at the given offset. Zero, one, or more 80 * characters may be matched. 81 * 82 * Matching in the forward direction is indicated by limit > 83 * offset. Characters from offset forwards to limit-1 will be 84 * considered for matching. 85 * 86 * Matching in the reverse direction is indicated by limit < 87 * offset. Characters from offset backwards to limit+1 will be 88 * considered for matching. 89 * 90 * If limit == offset then the only match possible is a zero 91 * character match (which subclasses may implement if desired). 92 * 93 * As a side effect, advance the offset parameter to the limit of 94 * the matched substring. In the forward direction, this will be 95 * the index of the last matched character plus one. In the 96 * reverse direction, this will be the index of the last matched 97 * character minus one. 98 * 99 * <p>Note: This method is not const because some classes may 100 * modify their state as the result of a match. 101 * 102 * @param text the text to be matched 103 * @param offset on input, the index into text at which to begin 104 * matching. On output, the limit of the matched text. The 105 * number of matched characters is the output value of offset 106 * minus the input value. Offset should always point to the 107 * HIGH SURROGATE (leading code unit) of a pair of surrogates, 108 * both on entry and upon return. 109 * @param limit the limit index of text to be matched. Greater 110 * than offset for a forward direction match, less than offset for 111 * a backward direction match. The last character to be 112 * considered for matching will be text.charAt(limit-1) in the 113 * forward direction or text.charAt(limit+1) in the backward 114 * direction. 115 * @param incremental if TRUE, then assume further characters may 116 * be inserted at limit and check for partial matching. Otherwise 117 * assume the text as given is complete. 118 * @return a match degree value indicating a full match, a partial 119 * match, or a mismatch. If incremental is FALSE then 120 * U_PARTIAL_MATCH should never be returned. 121 * @stable ICU 2.4 122 */ 123 virtual UMatchDegree matches(const Replaceable& text, 124 int32_t& offset, 125 int32_t limit, 126 UBool incremental) = 0; 127 128 /** 129 * Returns a string representation of this matcher. If the result of 130 * calling this function is passed to the appropriate parser, it 131 * will produce another matcher that is equal to this one. 132 * @param result the string to receive the pattern. Previous 133 * contents will be deleted. 134 * @param escapeUnprintable if TRUE then convert unprintable 135 * character to their hex escape representations, \\uxxxx or 136 * \\Uxxxxxxxx. Unprintable characters are those other than 137 * U+000A, U+0020..U+007E. 138 * @stable ICU 2.4 139 */ 140 virtual UnicodeString& toPattern(UnicodeString& result, 141 UBool escapeUnprintable = FALSE) const = 0; 142 143 /** 144 * Returns TRUE if this matcher will match a character c, where c 145 * & 0xFF == v, at offset, in the forward direction (with limit > 146 * offset). This is used by <tt>RuleBasedTransliterator</tt> for 147 * indexing. 148 * @stable ICU 2.4 149 */ 150 virtual UBool matchesIndexValue(uint8_t v) const = 0; 151 152 /** 153 * Union the set of all characters that may be matched by this object 154 * into the given set. 155 * @param toUnionTo the set into which to union the source characters 156 * @stable ICU 2.4 157 */ 158 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; 159 }; 160 161 U_NAMESPACE_END 162 163 #endif 164