Home | History | Annotate | Download | only in i18n
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 * Copyright (C) 1999-2007, International Business Machines Corporation
      6 * and others. All Rights Reserved.
      7 **********************************************************************
      8 *   Date        Name        Description
      9 *   11/17/99    aliu        Creation.
     10 **********************************************************************
     11 */
     12 #ifndef RBT_SET_H
     13 #define RBT_SET_H
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #if !UCONFIG_NO_TRANSLITERATION
     18 
     19 #include "unicode/uobject.h"
     20 #include "unicode/utrans.h"
     21 #include "uvector.h"
     22 
     23 U_NAMESPACE_BEGIN
     24 
     25 class Replaceable;
     26 class TransliterationRule;
     27 class TransliterationRuleData;
     28 class UnicodeFilter;
     29 class UnicodeString;
     30 class UnicodeSet;
     31 
     32 /**
     33  * A set of rules for a <code>RuleBasedTransliterator</code>.
     34  * @author Alan Liu
     35  */
     36 class TransliterationRuleSet : public UMemory {
     37     /**
     38      * Vector of rules, in the order added.  This is used while the
     39      * rule set is getting built.  After that, freeze() reorders and
     40      * indexes the rules into rules[].  Any given rule is stored once
     41      * in ruleVector, and one or more times in rules[].  ruleVector
     42      * owns and deletes the rules.
     43      */
     44     UVector* ruleVector;
     45 
     46     /**
     47      * Sorted and indexed table of rules.  This is created by freeze()
     48      * from the rules in ruleVector.  It contains alias pointers to
     49      * the rules in ruleVector.  It is zero before freeze() is called
     50      * and non-zero thereafter.
     51      */
     52     TransliterationRule** rules;
     53 
     54     /**
     55      * Index table.  For text having a first character c, compute x = c&0xFF.
     56      * Now use rules[index[x]..index[x+1]-1].  This index table is created by
     57      * freeze().  Before freeze() is called it contains garbage.
     58      */
     59     int32_t index[257];
     60 
     61     /**
     62      * Length of the longest preceding context
     63      */
     64     int32_t maxContextLength;
     65 
     66 public:
     67 
     68     /**
     69      * Construct a new empty rule set.
     70      * @param status    Output parameter filled in with success or failure status.
     71      */
     72     TransliterationRuleSet(UErrorCode& status);
     73 
     74     /**
     75      * Copy constructor.
     76      */
     77     TransliterationRuleSet(const TransliterationRuleSet&);
     78 
     79     /**
     80      * Destructor.
     81      */
     82     virtual ~TransliterationRuleSet();
     83 
     84     /**
     85      * Change the data object that this rule belongs to.  Used
     86      * internally by the TransliterationRuleData copy constructor.
     87      * @param data    the new data value to be set.
     88      */
     89     void setData(const TransliterationRuleData* data);
     90 
     91     /**
     92      * Return the maximum context length.
     93      * @return the length of the longest preceding context.
     94      */
     95     virtual int32_t getMaximumContextLength(void) const;
     96 
     97     /**
     98      * Add a rule to this set.  Rules are added in order, and order is
     99      * significant.  The last call to this method must be followed by
    100      * a call to <code>freeze()</code> before the rule set is used.
    101      * This method must <em>not</em> be called after freeze() has been
    102      * called.
    103      *
    104      * @param adoptedRule the rule to add
    105      */
    106     virtual void addRule(TransliterationRule* adoptedRule,
    107                          UErrorCode& status);
    108 
    109     /**
    110      * Check this for masked rules and index it to optimize performance.
    111      * The sequence of operations is: (1) add rules to a set using
    112      * <code>addRule()</code>; (2) freeze the set using
    113      * <code>freeze()</code>; (3) use the rule set.  If
    114      * <code>addRule()</code> is called after calling this method, it
    115      * invalidates this object, and this method must be called again.
    116      * That is, <code>freeze()</code> may be called multiple times,
    117      * although for optimal performance it shouldn't be.
    118      * @param parseError A pointer to UParseError to receive information about errors
    119      *                   occurred.
    120      * @param status     Output parameter filled in with success or failure status.
    121      */
    122     virtual void freeze(UParseError& parseError, UErrorCode& status);
    123 
    124     /**
    125      * Transliterate the given text with the given UTransPosition
    126      * indices.  Return TRUE if the transliteration should continue
    127      * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
    128      * Note that FALSE is only ever returned if isIncremental is TRUE.
    129      * @param text the text to be transliterated
    130      * @param index the position indices, which will be updated
    131      * @param isIncremental if TRUE, assume new text may be inserted
    132      * at index.limit, and return FALSE if thre is a partial match.
    133      * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
    134      * indicating that transliteration should stop until more text
    135      * arrives.
    136      */
    137     UBool transliterate(Replaceable& text,
    138                         UTransPosition& index,
    139                         UBool isIncremental);
    140 
    141     /**
    142      * Create rule strings that represents this rule set.
    143      * @param result string to receive the rule strings.  Current
    144      * contents will be deleted.
    145      * @param escapeUnprintable  True, will escape the unprintable characters
    146      * @return    A reference to 'result'.
    147      */
    148     virtual UnicodeString& toRules(UnicodeString& result,
    149                                    UBool escapeUnprintable) const;
    150 
    151     /**
    152      * Return the set of all characters that may be modified
    153      * (getTarget=false) or emitted (getTarget=true) by this set.
    154      */
    155     UnicodeSet& getSourceTargetSet(UnicodeSet& result,
    156                    UBool getTarget) const;
    157 
    158 private:
    159 
    160     TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
    161 };
    162 
    163 U_NAMESPACE_END
    164 
    165 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    166 
    167 #endif
    168