Home | History | Annotate | Download | only in i18n
      1 /*
      2 **********************************************************************
      3 * Copyright (C) 1999-2007, International Business Machines Corporation
      4 * and others. All Rights Reserved.
      5 **********************************************************************
      6 *   Date        Name        Description
      7 *   11/17/99    aliu        Creation.
      8 **********************************************************************
      9 */
     10 #ifndef RBT_DATA_H
     11 #define RBT_DATA_H
     12 
     13 #include "unicode/utypes.h"
     14 #include "unicode/uclean.h"
     15 
     16 #if !UCONFIG_NO_TRANSLITERATION
     17 
     18 #include "unicode/uobject.h"
     19 #include "rbt_set.h"
     20 #include "hash.h"
     21 
     22 U_NAMESPACE_BEGIN
     23 
     24 class UnicodeFunctor;
     25 class UnicodeMatcher;
     26 class UnicodeReplacer;
     27 
     28 /**
     29  * The rule data for a RuleBasedTransliterators.  RBT objects hold
     30  * a const pointer to a TRD object that they do not own.  TRD objects
     31  * are essentially the parsed rules in compact, usable form.  The
     32  * TRD objects themselves are held for the life of the process in
     33  * a static cache owned by Transliterator.
     34  *
     35  * This class' API is a little asymmetric.  There is a method to
     36  * define a variable, but no way to define a set.  This is because the
     37  * sets are defined by the parser in a UVector, and the vector is
     38  * copied into a fixed-size array here.  Once this is done, no new
     39  * sets may be defined.  In practice, there is no need to do so, since
     40  * generating the data and using it are discrete phases.  When there
     41  * is a need to access the set data during the parse phase, another
     42  * data structure handles this.  See the parsing code for more
     43  * details.
     44  */
     45 class TransliterationRuleData : public UMemory {
     46 
     47 public:
     48 
     49     // PUBLIC DATA MEMBERS
     50 
     51     /**
     52      * Rule table.  May be empty.
     53      */
     54     TransliterationRuleSet ruleSet;
     55 
     56     /**
     57      * Map variable name (String) to variable (UnicodeString).  A variable name
     58      * corresponds to zero or more characters, stored in a UnicodeString in
     59      * this hash.  One or more of these chars may also correspond to a
     60      * UnicodeMatcher, in which case the character in the UnicodeString in this hash is
     61      * a stand-in: it is an index for a secondary lookup in
     62      * data.variables.  The stand-in also represents the UnicodeMatcher in
     63      * the stored rules.
     64      */
     65     Hashtable variableNames;
     66 
     67     /**
     68      * Map category variable (UChar) to set (UnicodeFunctor).
     69      * Variables that correspond to a set of characters are mapped
     70      * from variable name to a stand-in character in data.variableNames.
     71      * The stand-in then serves as a key in this hash to lookup the
     72      * actual UnicodeFunctor object.  In addition, the stand-in is
     73      * stored in the rule text to represent the set of characters.
     74      * variables[i] represents character (variablesBase + i).
     75      */
     76     UnicodeFunctor** variables;
     77 
     78     /**
     79      * Flag that indicates whether the variables are owned (if a single
     80      * call to Transliterator::createFromRules() produces a CompoundTransliterator
     81      * with more than one RuleBasedTransliterator as children, they all share
     82      * the same variables list, so only the first one is considered to own
     83      * the variables)
     84      */
     85     UBool variablesAreOwned;
     86 
     87     /**
     88      * The character that represents variables[0].  Characters
     89      * variablesBase through variablesBase +
     90      * variablesLength - 1 represent UnicodeFunctor objects.
     91      */
     92     UChar variablesBase;
     93 
     94     /**
     95      * The length of variables.
     96      */
     97     int32_t variablesLength;
     98 
     99 public:
    100 
    101     /**
    102      * Constructor
    103      * @param status Output param set to success/failure code on exit.
    104      */
    105     TransliterationRuleData(UErrorCode& status);
    106 
    107     /**
    108      * Copy Constructor
    109      */
    110     TransliterationRuleData(const TransliterationRuleData&);
    111 
    112     /**
    113      * destructor
    114      */
    115     ~TransliterationRuleData();
    116 
    117     /**
    118      * Given a stand-in character, return the UnicodeFunctor that it
    119      * represents, or NULL if it doesn't represent anything.
    120      * @param standIn    the given stand-in character.
    121      * @return           the UnicodeFunctor that 'standIn' represents
    122      */
    123     UnicodeFunctor* lookup(UChar32 standIn) const;
    124 
    125     /**
    126      * Given a stand-in character, return the UnicodeMatcher that it
    127      * represents, or NULL if it doesn't represent anything or if it
    128      * represents something that is not a matcher.
    129      * @param standIn    the given stand-in character.
    130      * @return           return the UnicodeMatcher that 'standIn' represents
    131      */
    132     UnicodeMatcher* lookupMatcher(UChar32 standIn) const;
    133 
    134     /**
    135      * Given a stand-in character, return the UnicodeReplacer that it
    136      * represents, or NULL if it doesn't represent anything or if it
    137      * represents something that is not a replacer.
    138      * @param standIn    the given stand-in character.
    139      * @return           return the UnicodeReplacer that 'standIn' represents
    140      */
    141     UnicodeReplacer* lookupReplacer(UChar32 standIn) const;
    142 
    143 
    144 private:
    145     TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class
    146 };
    147 
    148 U_NAMESPACE_END
    149 
    150 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    151 
    152 #endif
    153