Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 * Copyright (C) 1999-2007, International Business Machines Corporation
      6 * and others. All Rights Reserved.
      7 **********************************************************************
      8 *   Date        Name        Description
      9 *   11/17/99    aliu        Creation.
     10 **********************************************************************
     11 */
     12 #ifndef RBT_DATA_H
     13 #define RBT_DATA_H
     14 
     15 #include "unicode/utypes.h"
     16 #include "unicode/uclean.h"
     17 
     18 #if !UCONFIG_NO_TRANSLITERATION
     19 
     20 #include "unicode/uobject.h"
     21 #include "rbt_set.h"
     22 #include "hash.h"
     23 
     24 U_NAMESPACE_BEGIN
     25 
     26 class UnicodeFunctor;
     27 class UnicodeMatcher;
     28 class UnicodeReplacer;
     29 
     30 /**
     31  * The rule data for a RuleBasedTransliterators.  RBT objects hold
     32  * a const pointer to a TRD object that they do not own.  TRD objects
     33  * are essentially the parsed rules in compact, usable form.  The
     34  * TRD objects themselves are held for the life of the process in
     35  * a static cache owned by Transliterator.
     36  *
     37  * This class' API is a little asymmetric.  There is a method to
     38  * define a variable, but no way to define a set.  This is because the
     39  * sets are defined by the parser in a UVector, and the vector is
     40  * copied into a fixed-size array here.  Once this is done, no new
     41  * sets may be defined.  In practice, there is no need to do so, since
     42  * generating the data and using it are discrete phases.  When there
     43  * is a need to access the set data during the parse phase, another
     44  * data structure handles this.  See the parsing code for more
     45  * details.
     46  */
     47 class TransliterationRuleData : public UMemory {
     48 
     49 public:
     50 
     51     // PUBLIC DATA MEMBERS
     52 
     53     /**
     54      * Rule table.  May be empty.
     55      */
     56     TransliterationRuleSet ruleSet;
     57 
     58     /**
     59      * Map variable name (String) to variable (UnicodeString).  A variable name
     60      * corresponds to zero or more characters, stored in a UnicodeString in
     61      * this hash.  One or more of these chars may also correspond to a
     62      * UnicodeMatcher, in which case the character in the UnicodeString in this hash is
     63      * a stand-in: it is an index for a secondary lookup in
     64      * data.variables.  The stand-in also represents the UnicodeMatcher in
     65      * the stored rules.
     66      */
     67     Hashtable variableNames;
     68 
     69     /**
     70      * Map category variable (UChar) to set (UnicodeFunctor).
     71      * Variables that correspond to a set of characters are mapped
     72      * from variable name to a stand-in character in data.variableNames.
     73      * The stand-in then serves as a key in this hash to lookup the
     74      * actual UnicodeFunctor object.  In addition, the stand-in is
     75      * stored in the rule text to represent the set of characters.
     76      * variables[i] represents character (variablesBase + i).
     77      */
     78     UnicodeFunctor** variables;
     79 
     80     /**
     81      * Flag that indicates whether the variables are owned (if a single
     82      * call to Transliterator::createFromRules() produces a CompoundTransliterator
     83      * with more than one RuleBasedTransliterator as children, they all share
     84      * the same variables list, so only the first one is considered to own
     85      * the variables)
     86      */
     87     UBool variablesAreOwned;
     88 
     89     /**
     90      * The character that represents variables[0].  Characters
     91      * variablesBase through variablesBase +
     92      * variablesLength - 1 represent UnicodeFunctor objects.
     93      */
     94     UChar variablesBase;
     95 
     96     /**
     97      * The length of variables.
     98      */
     99     int32_t variablesLength;
    100 
    101 public:
    102 
    103     /**
    104      * Constructor
    105      * @param status Output param set to success/failure code on exit.
    106      */
    107     TransliterationRuleData(UErrorCode& status);
    108 
    109     /**
    110      * Copy Constructor
    111      */
    112     TransliterationRuleData(const TransliterationRuleData&);
    113 
    114     /**
    115      * destructor
    116      */
    117     ~TransliterationRuleData();
    118 
    119     /**
    120      * Given a stand-in character, return the UnicodeFunctor that it
    121      * represents, or NULL if it doesn't represent anything.
    122      * @param standIn    the given stand-in character.
    123      * @return           the UnicodeFunctor that 'standIn' represents
    124      */
    125     UnicodeFunctor* lookup(UChar32 standIn) const;
    126 
    127     /**
    128      * Given a stand-in character, return the UnicodeMatcher that it
    129      * represents, or NULL if it doesn't represent anything or if it
    130      * represents something that is not a matcher.
    131      * @param standIn    the given stand-in character.
    132      * @return           return the UnicodeMatcher that 'standIn' represents
    133      */
    134     UnicodeMatcher* lookupMatcher(UChar32 standIn) const;
    135 
    136     /**
    137      * Given a stand-in character, return the UnicodeReplacer that it
    138      * represents, or NULL if it doesn't represent anything or if it
    139      * represents something that is not a replacer.
    140      * @param standIn    the given stand-in character.
    141      * @return           return the UnicodeReplacer that 'standIn' represents
    142      */
    143     UnicodeReplacer* lookupReplacer(UChar32 standIn) const;
    144 
    145 
    146 private:
    147     TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class
    148 };
    149 
    150 U_NAMESPACE_END
    151 
    152 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    153 
    154 #endif
    155