Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (c) 2001-2014, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *   Date        Name        Description
      9 *   08/10/2001  aliu        Creation.
     10 **********************************************************************
     11 */
     12 #ifndef _TRANSREG_H
     13 #define _TRANSREG_H
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #if !UCONFIG_NO_TRANSLITERATION
     18 
     19 #include "unicode/uobject.h"
     20 #include "unicode/translit.h"
     21 #include "hash.h"
     22 #include "uvector.h"
     23 
     24 U_NAMESPACE_BEGIN
     25 
     26 class TransliteratorEntry;
     27 class TransliteratorSpec;
     28 class UnicodeString;
     29 
     30 //------------------------------------------------------------------
     31 // TransliteratorAlias
     32 //------------------------------------------------------------------
     33 
     34 /**
     35  * A TransliteratorAlias object is returned by get() if the given ID
     36  * actually translates into something else.  The caller then invokes
     37  * the create() method on the alias to create the actual
     38  * transliterator, and deletes the alias.
     39  *
     40  * Why all the shenanigans?  To prevent circular calls between
     41  * the registry code and the transliterator code that deadlocks.
     42  */
     43 class TransliteratorAlias : public UMemory {
     44  public:
     45     /**
     46      * Construct a simple alias (type == SIMPLE)
     47      * @param aliasID the given id.
     48      */
     49     TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
     50 
     51     /**
     52      * Construct a compound RBT alias (type == COMPOUND)
     53      */
     54     TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
     55                         UVector* adoptedTransliterators,
     56                         const UnicodeSet* compoundFilter);
     57 
     58     /**
     59      * Construct a rules alias (type = RULES)
     60      */
     61     TransliteratorAlias(const UnicodeString& theID,
     62                         const UnicodeString& rules,
     63                         UTransDirection dir);
     64 
     65     ~TransliteratorAlias();
     66 
     67     /**
     68      * The whole point of create() is that the caller must invoke
     69      * it when the registry mutex is NOT held, to prevent deadlock.
     70      * It may only be called once.
     71      *
     72      * Note: Only call create() if isRuleBased() returns FALSE.
     73      *
     74      * This method must be called *outside* of the TransliteratorRegistry
     75      * mutex.
     76      */
     77     Transliterator* create(UParseError&, UErrorCode&);
     78 
     79     /**
     80      * Return TRUE if this alias is rule-based.  If so, the caller
     81      * must call parse() on it, then call TransliteratorRegistry::reget().
     82      */
     83     UBool isRuleBased() const;
     84 
     85     /**
     86      * If isRuleBased() returns TRUE, then the caller must call this
     87      * method, followed by TransliteratorRegistry::reget().  The latter
     88      * method must be called inside the TransliteratorRegistry mutex.
     89      *
     90      * Note: Only call parse() if isRuleBased() returns TRUE.
     91      *
     92      * This method must be called *outside* of the TransliteratorRegistry
     93      * mutex, because it can instantiate Transliterators embedded in
     94      * the rules via the "&Latin-Arabic()" syntax.
     95      */
     96     void parse(TransliteratorParser& parser,
     97                UParseError& pe, UErrorCode& ec) const;
     98 
     99  private:
    100     // We actually come in three flavors:
    101     // 1. Simple alias
    102     //    Here aliasID is the alias string.  Everything else is
    103     //    null, zero, empty.
    104     // 2. CompoundRBT
    105     //    Here ID is the ID, aliasID is the idBlock, trans is the
    106     //    contained RBT, and idSplitPoint is the offet in aliasID
    107     //    where the contained RBT goes.  compoundFilter is the
    108     //    compound filter, and it is _not_ owned.
    109     // 3. Rules
    110     //    Here ID is the ID, aliasID is the rules string.
    111     //    idSplitPoint is the UTransDirection.
    112     UnicodeString ID;
    113     UnicodeString aliasesOrRules;
    114     UVector* transes; // owned
    115     const UnicodeSet* compoundFilter; // alias
    116     UTransDirection direction;
    117     enum { SIMPLE, COMPOUND, RULES } type;
    118 
    119     TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
    120     TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
    121 };
    122 
    123 
    124 /**
    125  * A registry of system transliterators.  This is the data structure
    126  * that implements the mapping between transliterator IDs and the data
    127  * or function pointers used to create the corresponding
    128  * transliterators.  There is one instance of the registry that is
    129  * created statically.
    130  *
    131  * The registry consists of a dynamic component -- a hashtable -- and
    132  * a static component -- locale resource bundles.  The dynamic store
    133  * is semantically overlaid on the static store, so the static mapping
    134  * can be dynamically overridden.
    135  *
    136  * This is an internal class that is only used by Transliterator.
    137  * Transliterator maintains one static instance of this class and
    138  * delegates all registry-related operations to it.
    139  *
    140  * @author Alan Liu
    141  */
    142 class TransliteratorRegistry : public UMemory {
    143 
    144  public:
    145 
    146     /**
    147      * Contructor
    148      * @param status Output param set to success/failure code.
    149      */
    150     TransliteratorRegistry(UErrorCode& status);
    151 
    152     /**
    153      * Nonvirtual destructor -- this class is not subclassable.
    154      */
    155     ~TransliteratorRegistry();
    156 
    157     //------------------------------------------------------------------
    158     // Basic public API
    159     //------------------------------------------------------------------
    160 
    161     /**
    162      * Given a simple ID (forward direction, no inline filter, not
    163      * compound) attempt to instantiate it from the registry.  Return
    164      * 0 on failure.
    165      *
    166      * Return a non-NULL aliasReturn value if the ID points to an alias.
    167      * We cannot instantiate it ourselves because the alias may contain
    168      * filters or compounds, which we do not understand.  Caller should
    169      * make aliasReturn NULL before calling.
    170      * @param ID          the given ID
    171      * @param aliasReturn output param to receive TransliteratorAlias;
    172      *                    should be NULL on entry
    173      * @param parseError  Struct to recieve information on position
    174      *                    of error if an error is encountered
    175      * @param status      Output param set to success/failure code.
    176      */
    177     Transliterator* get(const UnicodeString& ID,
    178                         TransliteratorAlias*& aliasReturn,
    179                         UErrorCode& status);
    180 
    181     /**
    182      * The caller must call this after calling get(), if [a] calling get()
    183      * returns an alias, and [b] the alias is rule based.  In that
    184      * situation the caller must call alias->parse() to do the parsing
    185      * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
    186      * instantiating the transliterator.
    187      *
    188      * Note: Another alias might be returned by this method.
    189      *
    190      * This method (like all public methods of this class) must be called
    191      * from within the TransliteratorRegistry mutex.
    192      *
    193      * @param aliasReturn output param to receive TransliteratorAlias;
    194      *                    should be NULL on entry
    195      */
    196     Transliterator* reget(const UnicodeString& ID,
    197                           TransliteratorParser& parser,
    198                           TransliteratorAlias*& aliasReturn,
    199                           UErrorCode& status);
    200 
    201     /**
    202      * Register a prototype (adopted).  This adds an entry to the
    203      * dynamic store, or replaces an existing entry.  Any entry in the
    204      * underlying static locale resource store is masked.
    205      */
    206     void put(Transliterator* adoptedProto,
    207              UBool visible,
    208              UErrorCode& ec);
    209 
    210     /**
    211      * Register an ID and a factory function pointer.  This adds an
    212      * entry to the dynamic store, or replaces an existing entry.  Any
    213      * entry in the underlying static locale resource store is masked.
    214      */
    215     void put(const UnicodeString& ID,
    216              Transliterator::Factory factory,
    217              Transliterator::Token context,
    218              UBool visible,
    219              UErrorCode& ec);
    220 
    221     /**
    222      * Register an ID and a resource name.  This adds an entry to the
    223      * dynamic store, or replaces an existing entry.  Any entry in the
    224      * underlying static locale resource store is masked.
    225      */
    226     void put(const UnicodeString& ID,
    227              const UnicodeString& resourceName,
    228              UTransDirection dir,
    229              UBool readonlyResourceAlias,
    230              UBool visible,
    231              UErrorCode& ec);
    232 
    233     /**
    234      * Register an ID and an alias ID.  This adds an entry to the
    235      * dynamic store, or replaces an existing entry.  Any entry in the
    236      * underlying static locale resource store is masked.
    237      */
    238     void put(const UnicodeString& ID,
    239              const UnicodeString& alias,
    240              UBool readonlyAliasAlias,
    241              UBool visible,
    242              UErrorCode& ec);
    243 
    244     /**
    245      * Unregister an ID.  This removes an entry from the dynamic store
    246      * if there is one.  The static locale resource store is
    247      * unaffected.
    248      * @param ID    the given ID.
    249      */
    250     void remove(const UnicodeString& ID);
    251 
    252     //------------------------------------------------------------------
    253     // Public ID and spec management
    254     //------------------------------------------------------------------
    255 
    256     /**
    257      * Return a StringEnumeration over the IDs currently registered
    258      * with the system.
    259      * @internal
    260      */
    261     StringEnumeration* getAvailableIDs() const;
    262 
    263     /**
    264      * == OBSOLETE - remove in ICU 3.4 ==
    265      * Return the number of IDs currently registered with the system.
    266      * To retrieve the actual IDs, call getAvailableID(i) with
    267      * i from 0 to countAvailableIDs() - 1.
    268      * @return the number of IDs currently registered with the system.
    269      * @internal
    270      */
    271     int32_t countAvailableIDs(void) const;
    272 
    273     /**
    274      * == OBSOLETE - remove in ICU 3.4 ==
    275      * Return the index-th available ID.  index must be between 0
    276      * and countAvailableIDs() - 1, inclusive.  If index is out of
    277      * range, the result of getAvailableID(0) is returned.
    278      * @param index the given index.
    279      * @return the index-th available ID.  index must be between 0
    280      *         and countAvailableIDs() - 1, inclusive.  If index is out of
    281      *         range, the result of getAvailableID(0) is returned.
    282      * @internal
    283      */
    284     const UnicodeString& getAvailableID(int32_t index) const;
    285 
    286     /**
    287      * Return the number of registered source specifiers.
    288      * @return the number of registered source specifiers.
    289      */
    290     int32_t countAvailableSources(void) const;
    291 
    292     /**
    293      * Return a registered source specifier.
    294      * @param index which specifier to return, from 0 to n-1, where
    295      * n = countAvailableSources()
    296      * @param result fill-in paramter to receive the source specifier.
    297      * If index is out of range, result will be empty.
    298      * @return reference to result
    299      */
    300     UnicodeString& getAvailableSource(int32_t index,
    301                                       UnicodeString& result) const;
    302 
    303     /**
    304      * Return the number of registered target specifiers for a given
    305      * source specifier.
    306      * @param source the given source specifier.
    307      * @return the number of registered target specifiers for a given
    308      *         source specifier.
    309      */
    310     int32_t countAvailableTargets(const UnicodeString& source) const;
    311 
    312     /**
    313      * Return a registered target specifier for a given source.
    314      * @param index which specifier to return, from 0 to n-1, where
    315      * n = countAvailableTargets(source)
    316      * @param source the source specifier
    317      * @param result fill-in paramter to receive the target specifier.
    318      * If source is invalid or if index is out of range, result will
    319      * be empty.
    320      * @return reference to result
    321      */
    322     UnicodeString& getAvailableTarget(int32_t index,
    323                                       const UnicodeString& source,
    324                                       UnicodeString& result) const;
    325 
    326     /**
    327      * Return the number of registered variant specifiers for a given
    328      * source-target pair.  There is always at least one variant: If
    329      * just source-target is registered, then the single variant
    330      * NO_VARIANT is returned.  If source-target/variant is registered
    331      * then that variant is returned.
    332      * @param source the source specifiers
    333      * @param target the target specifiers
    334      * @return the number of registered variant specifiers for a given
    335      *         source-target pair.
    336      */
    337     int32_t countAvailableVariants(const UnicodeString& source,
    338                                    const UnicodeString& target) const;
    339 
    340     /**
    341      * Return a registered variant specifier for a given source-target
    342      * pair.  If NO_VARIANT is one of the variants, then it will be
    343      * at index 0.
    344      * @param index which specifier to return, from 0 to n-1, where
    345      * n = countAvailableVariants(source, target)
    346      * @param source the source specifier
    347      * @param target the target specifier
    348      * @param result fill-in paramter to receive the variant
    349      * specifier.  If source is invalid or if target is invalid or if
    350      * index is out of range, result will be empty.
    351      * @return reference to result
    352      */
    353     UnicodeString& getAvailableVariant(int32_t index,
    354                                        const UnicodeString& source,
    355                                        const UnicodeString& target,
    356                                        UnicodeString& result) const;
    357 
    358  private:
    359 
    360     //----------------------------------------------------------------
    361     // Private implementation
    362     //----------------------------------------------------------------
    363 
    364     TransliteratorEntry* find(const UnicodeString& ID);
    365 
    366     TransliteratorEntry* find(UnicodeString& source,
    367                 UnicodeString& target,
    368                 UnicodeString& variant);
    369 
    370     TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
    371                               const TransliteratorSpec& trg,
    372                               const UnicodeString& variant) const;
    373 
    374     TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
    375                              const TransliteratorSpec& trg,
    376                              const UnicodeString& variant);
    377 
    378     static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
    379                                const TransliteratorSpec& specToFind,
    380                                const UnicodeString& variant,
    381                                UTransDirection direction);
    382 
    383     void registerEntry(const UnicodeString& source,
    384                        const UnicodeString& target,
    385                        const UnicodeString& variant,
    386                        TransliteratorEntry* adopted,
    387                        UBool visible);
    388 
    389     void registerEntry(const UnicodeString& ID,
    390                        TransliteratorEntry* adopted,
    391                        UBool visible);
    392 
    393     void registerEntry(const UnicodeString& ID,
    394                        const UnicodeString& source,
    395                        const UnicodeString& target,
    396                        const UnicodeString& variant,
    397                        TransliteratorEntry* adopted,
    398                        UBool visible);
    399 
    400     void registerSTV(const UnicodeString& source,
    401                      const UnicodeString& target,
    402                      const UnicodeString& variant);
    403 
    404     void removeSTV(const UnicodeString& source,
    405                    const UnicodeString& target,
    406                    const UnicodeString& variant);
    407 
    408     Transliterator* instantiateEntry(const UnicodeString& ID,
    409                                      TransliteratorEntry *entry,
    410                                      TransliteratorAlias*& aliasReturn,
    411                                      UErrorCode& status);
    412 
    413     /**
    414      * A StringEnumeration over the registered IDs in this object.
    415      */
    416     class Enumeration : public StringEnumeration {
    417     public:
    418         Enumeration(const TransliteratorRegistry& reg);
    419         virtual ~Enumeration();
    420         virtual int32_t count(UErrorCode& status) const;
    421         virtual const UnicodeString* snext(UErrorCode& status);
    422         virtual void reset(UErrorCode& status);
    423         static UClassID U_EXPORT2 getStaticClassID();
    424         virtual UClassID getDynamicClassID() const;
    425     private:
    426         int32_t index;
    427         const TransliteratorRegistry& reg;
    428     };
    429     friend class Enumeration;
    430 
    431  private:
    432 
    433     /**
    434      * Dynamic registry mapping full IDs to Entry objects.  This
    435      * contains both public and internal entities.  The visibility is
    436      * controlled by whether an entry is listed in availableIDs and
    437      * specDAG or not.
    438      */
    439     Hashtable registry;
    440 
    441     /**
    442      * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
    443      * target => variant bitmask)
    444      */
    445     Hashtable specDAG;
    446 
    447     /**
    448      * Vector of all variant names
    449      */
    450     UVector variantList;
    451 
    452     /**
    453      * Vector of public full IDs.
    454      */
    455     UVector availableIDs;
    456 
    457     TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
    458     TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
    459 };
    460 
    461 U_NAMESPACE_END
    462 
    463 U_CFUNC UBool utrans_transliterator_cleanup(void);
    464 
    465 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    466 
    467 #endif
    468 //eof
    469