Home | History | Annotate | Download | only in i18n
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 2008-2009, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *   file name:  uspoof_buildconf.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2009Jan05
     14 *   created by: Andy Heninger
     15 *
     16 *   Internal classes for compiling confusable data into its binary (runtime) form.
     17 */
     18 
     19 #ifndef __USPOOF_BUILDCONF_H__
     20 #define __USPOOF_BUILDCONF_H__
     21 
     22 #if !UCONFIG_NO_NORMALIZATION
     23 
     24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     25 
     26 #include "uspoof_impl.h"
     27 
     28 // SPUString
     29 //              Holds a string that is the result of one of the mappings defined
     30 //              by the confusable mapping data (confusables.txt from Unicode.org)
     31 //              Instances of SPUString exist during the compilation process only.
     32 
     33 struct SPUString : public UMemory {
     34     UnicodeString  *fStr;             // The actual string.
     35     int32_t         fStrTableIndex;   // Index into the final runtime data for this string.
     36                                       //  (or, for length 1, the single string char itself,
     37                                       //   there being no string table entry for it.)
     38     SPUString(UnicodeString *s);
     39     ~SPUString();
     40 };
     41 
     42 
     43 //  String Pool   A utility class for holding the strings that are the result of
     44 //                the spoof mappings.  These strings will utimately end up in the
     45 //                run-time String Table.
     46 //                This is sort of like a sorted set of strings, except that ICU's anemic
     47 //                built-in collections don't support those, so it is implemented with a
     48 //                combination of a uhash and a UVector.
     49 
     50 
     51 class SPUStringPool : public UMemory {
     52   public:
     53     SPUStringPool(UErrorCode &status);
     54     ~SPUStringPool();
     55 
     56     // Add a string. Return the string from the table.
     57     // If the input parameter string is already in the table, delete the
     58     //  input parameter and return the existing string.
     59     SPUString *addString(UnicodeString *src, UErrorCode &status);
     60 
     61 
     62     // Get the n-th string in the collection.
     63     SPUString *getByIndex(int32_t i);
     64 
     65     // Sort the contents; affects the ordering of getByIndex().
     66     void sort(UErrorCode &status);
     67 
     68     int32_t size();
     69 
     70   private:
     71     UVector     *fVec;    // Elements are SPUString *
     72     UHashtable  *fHash;   // Key: UnicodeString  Value: SPUString
     73 };
     74 
     75 
     76 // class ConfusabledataBuilder
     77 //     An instance of this class exists while the confusable data is being built from source.
     78 //     It encapsulates the intermediate data structures that are used for building.
     79 //     It exports one static function, to do a confusable data build.
     80 
     81 class ConfusabledataBuilder : public UMemory {
     82   private:
     83     SpoofImpl  *fSpoofImpl;
     84     UChar      *fInput;
     85     UHashtable *fSLTable;
     86     UHashtable *fSATable;
     87     UHashtable *fMLTable;
     88     UHashtable *fMATable;
     89     UnicodeSet *fKeySet;     // A set of all keys (UChar32s) that go into the four mapping tables.
     90 
     91     // The binary data is first assembled into the following four collections, then
     92     //   copied to its final raw-memory destination.
     93     UVector            *fKeyVec;
     94     UVector            *fValueVec;
     95     UnicodeString      *fStringTable;
     96     UVector            *fStringLengthsTable;
     97 
     98     SPUStringPool      *stringPool;
     99     URegularExpression *fParseLine;
    100     URegularExpression *fParseHexNum;
    101     int32_t             fLineNum;
    102 
    103     ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
    104     ~ConfusabledataBuilder();
    105     void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
    106 
    107     // Add an entry to the key and value tables being built
    108     //   input:  data from SLTable, MATable, etc.
    109     //   outut:  entry added to fKeyVec and fValueVec
    110     void addKeyEntry(UChar32     keyChar,     // The key character
    111                      UHashtable *table,       // The table, one of SATable, MATable, etc.
    112                      int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
    113                      UErrorCode &status);
    114 
    115     // From an index into fKeyVec & fValueVec
    116     //   get a UnicodeString with the corresponding mapping.
    117     UnicodeString getMapping(int32_t key);
    118 
    119     // Populate the final binary output data array with the compiled data.
    120     void outputData(UErrorCode &status);
    121 
    122   public:
    123     static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
    124         int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
    125 };
    126 
    127 #endif
    128 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
    129 #endif  // __USPOOF_BUILDCONF_H__
    130