Home | History | Annotate | Download | only in i18n
      1 /*
      2 **********************************************************************
      3 * Copyright (C) 1999-2007, International Business Machines Corporation
      4 * and others. All Rights Reserved.
      5 **********************************************************************
      6 *   Date        Name        Description
      7 *   11/17/99    aliu        Creation.
      8 **********************************************************************
      9 */
     10 #ifndef RBT_SET_H
     11 #define RBT_SET_H
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #if !UCONFIG_NO_TRANSLITERATION
     16 
     17 #include "unicode/uobject.h"
     18 #include "unicode/utrans.h"
     19 #include "uvector.h"
     20 
     21 U_NAMESPACE_BEGIN
     22 
     23 class Replaceable;
     24 class TransliterationRule;
     25 class TransliterationRuleData;
     26 class UnicodeFilter;
     27 class UnicodeString;
     28 class UnicodeSet;
     29 
     30 /**
     31  * A set of rules for a <code>RuleBasedTransliterator</code>.
     32  * @author Alan Liu
     33  */
     34 class TransliterationRuleSet : public UMemory {
     35     /**
     36      * Vector of rules, in the order added.  This is used while the
     37      * rule set is getting built.  After that, freeze() reorders and
     38      * indexes the rules into rules[].  Any given rule is stored once
     39      * in ruleVector, and one or more times in rules[].  ruleVector
     40      * owns and deletes the rules.
     41      */
     42     UVector* ruleVector;
     43 
     44     /**
     45      * Sorted and indexed table of rules.  This is created by freeze()
     46      * from the rules in ruleVector.  It contains alias pointers to
     47      * the rules in ruleVector.  It is zero before freeze() is called
     48      * and non-zero thereafter.
     49      */
     50     TransliterationRule** rules;
     51 
     52     /**
     53      * Index table.  For text having a first character c, compute x = c&0xFF.
     54      * Now use rules[index[x]..index[x+1]-1].  This index table is created by
     55      * freeze().  Before freeze() is called it contains garbage.
     56      */
     57     int32_t index[257];
     58 
     59     /**
     60      * Length of the longest preceding context
     61      */
     62     int32_t maxContextLength;
     63 
     64 public:
     65 
     66     /**
     67      * Construct a new empty rule set.
     68      * @param status    Output parameter filled in with success or failure status.
     69      */
     70     TransliterationRuleSet(UErrorCode& status);
     71 
     72     /**
     73      * Copy constructor.
     74      */
     75     TransliterationRuleSet(const TransliterationRuleSet&);
     76 
     77     /**
     78      * Destructor.
     79      */
     80     virtual ~TransliterationRuleSet();
     81 
     82     /**
     83      * Change the data object that this rule belongs to.  Used
     84      * internally by the TransliterationRuleData copy constructor.
     85      * @param data    the new data value to be set.
     86      */
     87     void setData(const TransliterationRuleData* data);
     88 
     89     /**
     90      * Return the maximum context length.
     91      * @return the length of the longest preceding context.
     92      */
     93     virtual int32_t getMaximumContextLength(void) const;
     94 
     95     /**
     96      * Add a rule to this set.  Rules are added in order, and order is
     97      * significant.  The last call to this method must be followed by
     98      * a call to <code>freeze()</code> before the rule set is used.
     99      * This method must <em>not</em> be called after freeze() has been
    100      * called.
    101      *
    102      * @param adoptedRule the rule to add
    103      */
    104     virtual void addRule(TransliterationRule* adoptedRule,
    105                          UErrorCode& status);
    106 
    107     /**
    108      * Check this for masked rules and index it to optimize performance.
    109      * The sequence of operations is: (1) add rules to a set using
    110      * <code>addRule()</code>; (2) freeze the set using
    111      * <code>freeze()</code>; (3) use the rule set.  If
    112      * <code>addRule()</code> is called after calling this method, it
    113      * invalidates this object, and this method must be called again.
    114      * That is, <code>freeze()</code> may be called multiple times,
    115      * although for optimal performance it shouldn't be.
    116      * @param parseError A pointer to UParseError to receive information about errors
    117      *                   occurred.
    118      * @param status     Output parameter filled in with success or failure status.
    119      */
    120     virtual void freeze(UParseError& parseError, UErrorCode& status);
    121 
    122     /**
    123      * Transliterate the given text with the given UTransPosition
    124      * indices.  Return TRUE if the transliteration should continue
    125      * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
    126      * Note that FALSE is only ever returned if isIncremental is TRUE.
    127      * @param text the text to be transliterated
    128      * @param index the position indices, which will be updated
    129      * @param isIncremental if TRUE, assume new text may be inserted
    130      * at index.limit, and return FALSE if thre is a partial match.
    131      * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
    132      * indicating that transliteration should stop until more text
    133      * arrives.
    134      */
    135     UBool transliterate(Replaceable& text,
    136                         UTransPosition& index,
    137                         UBool isIncremental);
    138 
    139     /**
    140      * Create rule strings that represents this rule set.
    141      * @param result string to receive the rule strings.  Current
    142      * contents will be deleted.
    143      * @param escapeUnprintable  True, will escape the unprintable characters
    144      * @return    A reference to 'result'.
    145      */
    146     virtual UnicodeString& toRules(UnicodeString& result,
    147                                    UBool escapeUnprintable) const;
    148 
    149     /**
    150      * Return the set of all characters that may be modified
    151      * (getTarget=false) or emitted (getTarget=true) by this set.
    152      */
    153     UnicodeSet& getSourceTargetSet(UnicodeSet& result,
    154                    UBool getTarget) const;
    155 
    156 private:
    157 
    158     TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
    159 };
    160 
    161 U_NAMESPACE_END
    162 
    163 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    164 
    165 #endif
    166