Home | History | Annotate | Download | only in i18n
      1 /*
      2 **********************************************************************
      3 *   Copyright (c) 2001-2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   Date        Name        Description
      7 *   08/10/2001  aliu        Creation.
      8 **********************************************************************
      9 */
     10 #ifndef _TRANSREG_H
     11 #define _TRANSREG_H
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #if !UCONFIG_NO_TRANSLITERATION
     16 
     17 #include "unicode/uobject.h"
     18 #include "unicode/translit.h"
     19 #include "hash.h"
     20 #include "uvector.h"
     21 
     22 U_NAMESPACE_BEGIN
     23 
     24 class TransliteratorEntry;
     25 class TransliteratorSpec;
     26 class UnicodeString;
     27 
     28 //------------------------------------------------------------------
     29 // TransliteratorAlias
     30 //------------------------------------------------------------------
     31 
     32 /**
     33  * A TransliteratorAlias object is returned by get() if the given ID
     34  * actually translates into something else.  The caller then invokes
     35  * the create() method on the alias to create the actual
     36  * transliterator, and deletes the alias.
     37  *
     38  * Why all the shenanigans?  To prevent circular calls between
     39  * the registry code and the transliterator code that deadlocks.
     40  */
     41 class TransliteratorAlias : public UMemory {
     42  public:
     43     /**
     44      * Construct a simple alias (type == SIMPLE)
     45      * @param aliasID the given id.
     46      */
     47     TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
     48 
     49     /**
     50      * Construct a compound RBT alias (type == COMPOUND)
     51      */
     52     TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
     53                         UVector* adoptedTransliterators,
     54                         const UnicodeSet* compoundFilter);
     55 
     56     /**
     57      * Construct a rules alias (type = RULES)
     58      */
     59     TransliteratorAlias(const UnicodeString& theID,
     60                         const UnicodeString& rules,
     61                         UTransDirection dir);
     62 
     63     ~TransliteratorAlias();
     64 
     65     /**
     66      * The whole point of create() is that the caller must invoke
     67      * it when the registry mutex is NOT held, to prevent deadlock.
     68      * It may only be called once.
     69      *
     70      * Note: Only call create() if isRuleBased() returns FALSE.
     71      *
     72      * This method must be called *outside* of the TransliteratorRegistry
     73      * mutex.
     74      */
     75     Transliterator* create(UParseError&, UErrorCode&);
     76 
     77     /**
     78      * Return TRUE if this alias is rule-based.  If so, the caller
     79      * must call parse() on it, then call TransliteratorRegistry::reget().
     80      */
     81     UBool isRuleBased() const;
     82 
     83     /**
     84      * If isRuleBased() returns TRUE, then the caller must call this
     85      * method, followed by TransliteratorRegistry::reget().  The latter
     86      * method must be called inside the TransliteratorRegistry mutex.
     87      *
     88      * Note: Only call parse() if isRuleBased() returns TRUE.
     89      *
     90      * This method must be called *outside* of the TransliteratorRegistry
     91      * mutex, because it can instantiate Transliterators embedded in
     92      * the rules via the "&Latin-Arabic()" syntax.
     93      */
     94     void parse(TransliteratorParser& parser,
     95                UParseError& pe, UErrorCode& ec) const;
     96 
     97  private:
     98     // We actually come in three flavors:
     99     // 1. Simple alias
    100     //    Here aliasID is the alias string.  Everything else is
    101     //    null, zero, empty.
    102     // 2. CompoundRBT
    103     //    Here ID is the ID, aliasID is the idBlock, trans is the
    104     //    contained RBT, and idSplitPoint is the offet in aliasID
    105     //    where the contained RBT goes.  compoundFilter is the
    106     //    compound filter, and it is _not_ owned.
    107     // 3. Rules
    108     //    Here ID is the ID, aliasID is the rules string.
    109     //    idSplitPoint is the UTransDirection.
    110     UnicodeString ID;
    111     UnicodeString aliasesOrRules;
    112     UVector* transes; // owned
    113     const UnicodeSet* compoundFilter; // alias
    114     UTransDirection direction;
    115     enum { SIMPLE, COMPOUND, RULES } type;
    116 
    117     TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
    118     TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
    119 };
    120 
    121 
    122 /**
    123  * A registry of system transliterators.  This is the data structure
    124  * that implements the mapping between transliterator IDs and the data
    125  * or function pointers used to create the corresponding
    126  * transliterators.  There is one instance of the registry that is
    127  * created statically.
    128  *
    129  * The registry consists of a dynamic component -- a hashtable -- and
    130  * a static component -- locale resource bundles.  The dynamic store
    131  * is semantically overlaid on the static store, so the static mapping
    132  * can be dynamically overridden.
    133  *
    134  * This is an internal class that is only used by Transliterator.
    135  * Transliterator maintains one static instance of this class and
    136  * delegates all registry-related operations to it.
    137  *
    138  * @author Alan Liu
    139  */
    140 class TransliteratorRegistry : public UMemory {
    141 
    142  public:
    143 
    144     /**
    145      * Contructor
    146      * @param status Output param set to success/failure code.
    147      */
    148     TransliteratorRegistry(UErrorCode& status);
    149 
    150     /**
    151      * Nonvirtual destructor -- this class is not subclassable.
    152      */
    153     ~TransliteratorRegistry();
    154 
    155     //------------------------------------------------------------------
    156     // Basic public API
    157     //------------------------------------------------------------------
    158 
    159     /**
    160      * Given a simple ID (forward direction, no inline filter, not
    161      * compound) attempt to instantiate it from the registry.  Return
    162      * 0 on failure.
    163      *
    164      * Return a non-NULL aliasReturn value if the ID points to an alias.
    165      * We cannot instantiate it ourselves because the alias may contain
    166      * filters or compounds, which we do not understand.  Caller should
    167      * make aliasReturn NULL before calling.
    168      * @param ID          the given ID
    169      * @param aliasReturn output param to receive TransliteratorAlias;
    170      *                    should be NULL on entry
    171      * @param parseError  Struct to recieve information on position
    172      *                    of error if an error is encountered
    173      * @param status      Output param set to success/failure code.
    174      */
    175     Transliterator* get(const UnicodeString& ID,
    176                         TransliteratorAlias*& aliasReturn,
    177                         UErrorCode& status);
    178 
    179     /**
    180      * The caller must call this after calling get(), if [a] calling get()
    181      * returns an alias, and [b] the alias is rule based.  In that
    182      * situation the caller must call alias->parse() to do the parsing
    183      * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
    184      * instantiating the transliterator.
    185      *
    186      * Note: Another alias might be returned by this method.
    187      *
    188      * This method (like all public methods of this class) must be called
    189      * from within the TransliteratorRegistry mutex.
    190      *
    191      * @param aliasReturn output param to receive TransliteratorAlias;
    192      *                    should be NULL on entry
    193      */
    194     Transliterator* reget(const UnicodeString& ID,
    195                           TransliteratorParser& parser,
    196                           TransliteratorAlias*& aliasReturn,
    197                           UErrorCode& status);
    198 
    199     /**
    200      * Register a prototype (adopted).  This adds an entry to the
    201      * dynamic store, or replaces an existing entry.  Any entry in the
    202      * underlying static locale resource store is masked.
    203      */
    204     void put(Transliterator* adoptedProto,
    205              UBool visible,
    206              UErrorCode& ec);
    207 
    208     /**
    209      * Register an ID and a factory function pointer.  This adds an
    210      * entry to the dynamic store, or replaces an existing entry.  Any
    211      * entry in the underlying static locale resource store is masked.
    212      */
    213     void put(const UnicodeString& ID,
    214              Transliterator::Factory factory,
    215              Transliterator::Token context,
    216              UBool visible,
    217              UErrorCode& ec);
    218 
    219     /**
    220      * Register an ID and a resource name.  This adds an entry to the
    221      * dynamic store, or replaces an existing entry.  Any entry in the
    222      * underlying static locale resource store is masked.
    223      */
    224     void put(const UnicodeString& ID,
    225              const UnicodeString& resourceName,
    226              UTransDirection dir,
    227              UBool readonlyResourceAlias,
    228              UBool visible,
    229              UErrorCode& ec);
    230 
    231     /**
    232      * Register an ID and an alias ID.  This adds an entry to the
    233      * dynamic store, or replaces an existing entry.  Any entry in the
    234      * underlying static locale resource store is masked.
    235      */
    236     void put(const UnicodeString& ID,
    237              const UnicodeString& alias,
    238              UBool readonlyAliasAlias,
    239              UBool visible,
    240              UErrorCode& ec);
    241 
    242     /**
    243      * Unregister an ID.  This removes an entry from the dynamic store
    244      * if there is one.  The static locale resource store is
    245      * unaffected.
    246      * @param ID    the given ID.
    247      */
    248     void remove(const UnicodeString& ID);
    249 
    250     //------------------------------------------------------------------
    251     // Public ID and spec management
    252     //------------------------------------------------------------------
    253 
    254     /**
    255      * Return a StringEnumeration over the IDs currently registered
    256      * with the system.
    257      * @internal
    258      */
    259     StringEnumeration* getAvailableIDs() const;
    260 
    261     /**
    262      * == OBSOLETE - remove in ICU 3.4 ==
    263      * Return the number of IDs currently registered with the system.
    264      * To retrieve the actual IDs, call getAvailableID(i) with
    265      * i from 0 to countAvailableIDs() - 1.
    266      * @return the number of IDs currently registered with the system.
    267      * @internal
    268      */
    269     int32_t countAvailableIDs(void) const;
    270 
    271     /**
    272      * == OBSOLETE - remove in ICU 3.4 ==
    273      * Return the index-th available ID.  index must be between 0
    274      * and countAvailableIDs() - 1, inclusive.  If index is out of
    275      * range, the result of getAvailableID(0) is returned.
    276      * @param index the given index.
    277      * @return the index-th available ID.  index must be between 0
    278      *         and countAvailableIDs() - 1, inclusive.  If index is out of
    279      *         range, the result of getAvailableID(0) is returned.
    280      * @internal
    281      */
    282     const UnicodeString& getAvailableID(int32_t index) const;
    283 
    284     /**
    285      * Return the number of registered source specifiers.
    286      * @return the number of registered source specifiers.
    287      */
    288     int32_t countAvailableSources(void) const;
    289 
    290     /**
    291      * Return a registered source specifier.
    292      * @param index which specifier to return, from 0 to n-1, where
    293      * n = countAvailableSources()
    294      * @param result fill-in paramter to receive the source specifier.
    295      * If index is out of range, result will be empty.
    296      * @return reference to result
    297      */
    298     UnicodeString& getAvailableSource(int32_t index,
    299                                       UnicodeString& result) const;
    300 
    301     /**
    302      * Return the number of registered target specifiers for a given
    303      * source specifier.
    304      * @param source the given source specifier.
    305      * @return the number of registered target specifiers for a given
    306      *         source specifier.
    307      */
    308     int32_t countAvailableTargets(const UnicodeString& source) const;
    309 
    310     /**
    311      * Return a registered target specifier for a given source.
    312      * @param index which specifier to return, from 0 to n-1, where
    313      * n = countAvailableTargets(source)
    314      * @param source the source specifier
    315      * @param result fill-in paramter to receive the target specifier.
    316      * If source is invalid or if index is out of range, result will
    317      * be empty.
    318      * @return reference to result
    319      */
    320     UnicodeString& getAvailableTarget(int32_t index,
    321                                       const UnicodeString& source,
    322                                       UnicodeString& result) const;
    323 
    324     /**
    325      * Return the number of registered variant specifiers for a given
    326      * source-target pair.  There is always at least one variant: If
    327      * just source-target is registered, then the single variant
    328      * NO_VARIANT is returned.  If source-target/variant is registered
    329      * then that variant is returned.
    330      * @param source the source specifiers
    331      * @param target the target specifiers
    332      * @return the number of registered variant specifiers for a given
    333      *         source-target pair.
    334      */
    335     int32_t countAvailableVariants(const UnicodeString& source,
    336                                    const UnicodeString& target) const;
    337 
    338     /**
    339      * Return a registered variant specifier for a given source-target
    340      * pair.  If NO_VARIANT is one of the variants, then it will be
    341      * at index 0.
    342      * @param index which specifier to return, from 0 to n-1, where
    343      * n = countAvailableVariants(source, target)
    344      * @param source the source specifier
    345      * @param target the target specifier
    346      * @param result fill-in paramter to receive the variant
    347      * specifier.  If source is invalid or if target is invalid or if
    348      * index is out of range, result will be empty.
    349      * @return reference to result
    350      */
    351     UnicodeString& getAvailableVariant(int32_t index,
    352                                        const UnicodeString& source,
    353                                        const UnicodeString& target,
    354                                        UnicodeString& result) const;
    355 
    356  private:
    357 
    358     //----------------------------------------------------------------
    359     // Private implementation
    360     //----------------------------------------------------------------
    361 
    362     TransliteratorEntry* find(const UnicodeString& ID);
    363 
    364     TransliteratorEntry* find(UnicodeString& source,
    365                 UnicodeString& target,
    366                 UnicodeString& variant);
    367 
    368     TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
    369                               const TransliteratorSpec& trg,
    370                               const UnicodeString& variant) const;
    371 
    372     TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
    373                              const TransliteratorSpec& trg,
    374                              const UnicodeString& variant);
    375 
    376     static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
    377                                const TransliteratorSpec& specToFind,
    378                                const UnicodeString& variant,
    379                                UTransDirection direction);
    380 
    381     void registerEntry(const UnicodeString& source,
    382                        const UnicodeString& target,
    383                        const UnicodeString& variant,
    384                        TransliteratorEntry* adopted,
    385                        UBool visible);
    386 
    387     void registerEntry(const UnicodeString& ID,
    388                        TransliteratorEntry* adopted,
    389                        UBool visible);
    390 
    391     void registerEntry(const UnicodeString& ID,
    392                        const UnicodeString& source,
    393                        const UnicodeString& target,
    394                        const UnicodeString& variant,
    395                        TransliteratorEntry* adopted,
    396                        UBool visible);
    397 
    398     void registerSTV(const UnicodeString& source,
    399                      const UnicodeString& target,
    400                      const UnicodeString& variant);
    401 
    402     void removeSTV(const UnicodeString& source,
    403                    const UnicodeString& target,
    404                    const UnicodeString& variant);
    405 
    406     Transliterator* instantiateEntry(const UnicodeString& ID,
    407                                      TransliteratorEntry *entry,
    408                                      TransliteratorAlias*& aliasReturn,
    409                                      UErrorCode& status);
    410 
    411     /**
    412      * A StringEnumeration over the registered IDs in this object.
    413      */
    414     class Enumeration : public StringEnumeration {
    415     public:
    416         Enumeration(const TransliteratorRegistry& reg);
    417         virtual ~Enumeration();
    418         virtual int32_t count(UErrorCode& status) const;
    419         virtual const UnicodeString* snext(UErrorCode& status);
    420         virtual void reset(UErrorCode& status);
    421         static UClassID U_EXPORT2 getStaticClassID();
    422         virtual UClassID getDynamicClassID() const;
    423     private:
    424         int32_t index;
    425         const TransliteratorRegistry& reg;
    426     };
    427     friend class Enumeration;
    428 
    429  private:
    430 
    431     /**
    432      * Dynamic registry mapping full IDs to Entry objects.  This
    433      * contains both public and internal entities.  The visibility is
    434      * controlled by whether an entry is listed in availableIDs and
    435      * specDAG or not.
    436      */
    437     Hashtable registry;
    438 
    439     /**
    440      * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
    441      * target => (UVector: variant)) The UVector of variants is never
    442      * empty.  For a source-target with no variant, the special
    443      * variant NO_VARIANT (the empty string) is stored in slot zero of
    444      * the UVector.
    445      */
    446     Hashtable specDAG;
    447 
    448     /**
    449      * Vector of public full IDs.
    450      */
    451     UVector availableIDs;
    452 
    453     TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
    454     TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
    455 };
    456 
    457 U_NAMESPACE_END
    458 
    459 U_CFUNC UBool utrans_transliterator_cleanup(void);
    460 
    461 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    462 
    463 #endif
    464 //eof
    465