1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 2008-2009, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: uspoof_buildconf.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2009Jan05 14 * created by: Andy Heninger 15 * 16 * Internal classes for compiling confusable data into its binary (runtime) form. 17 */ 18 19 #ifndef __USPOOF_BUILDCONF_H__ 20 #define __USPOOF_BUILDCONF_H__ 21 22 #if !UCONFIG_NO_NORMALIZATION 23 24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 25 26 #include "uspoof_impl.h" 27 28 // SPUString 29 // Holds a string that is the result of one of the mappings defined 30 // by the confusable mapping data (confusables.txt from Unicode.org) 31 // Instances of SPUString exist during the compilation process only. 32 33 struct SPUString : public UMemory { 34 UnicodeString *fStr; // The actual string. 35 int32_t fStrTableIndex; // Index into the final runtime data for this string. 36 // (or, for length 1, the single string char itself, 37 // there being no string table entry for it.) 38 SPUString(UnicodeString *s); 39 ~SPUString(); 40 }; 41 42 43 // String Pool A utility class for holding the strings that are the result of 44 // the spoof mappings. These strings will utimately end up in the 45 // run-time String Table. 46 // This is sort of like a sorted set of strings, except that ICU's anemic 47 // built-in collections don't support those, so it is implemented with a 48 // combination of a uhash and a UVector. 49 50 51 class SPUStringPool : public UMemory { 52 public: 53 SPUStringPool(UErrorCode &status); 54 ~SPUStringPool(); 55 56 // Add a string. Return the string from the table. 57 // If the input parameter string is already in the table, delete the 58 // input parameter and return the existing string. 59 SPUString *addString(UnicodeString *src, UErrorCode &status); 60 61 62 // Get the n-th string in the collection. 63 SPUString *getByIndex(int32_t i); 64 65 // Sort the contents; affects the ordering of getByIndex(). 66 void sort(UErrorCode &status); 67 68 int32_t size(); 69 70 private: 71 UVector *fVec; // Elements are SPUString * 72 UHashtable *fHash; // Key: UnicodeString Value: SPUString 73 }; 74 75 76 // class ConfusabledataBuilder 77 // An instance of this class exists while the confusable data is being built from source. 78 // It encapsulates the intermediate data structures that are used for building. 79 // It exports one static function, to do a confusable data build. 80 81 class ConfusabledataBuilder : public UMemory { 82 private: 83 SpoofImpl *fSpoofImpl; 84 UChar *fInput; 85 UHashtable *fSLTable; 86 UHashtable *fSATable; 87 UHashtable *fMLTable; 88 UHashtable *fMATable; 89 UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables. 90 91 // The binary data is first assembled into the following four collections, then 92 // copied to its final raw-memory destination. 93 UVector *fKeyVec; 94 UVector *fValueVec; 95 UnicodeString *fStringTable; 96 UVector *fStringLengthsTable; 97 98 SPUStringPool *stringPool; 99 URegularExpression *fParseLine; 100 URegularExpression *fParseHexNum; 101 int32_t fLineNum; 102 103 ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status); 104 ~ConfusabledataBuilder(); 105 void build(const char * confusables, int32_t confusablesLen, UErrorCode &status); 106 107 // Add an entry to the key and value tables being built 108 // input: data from SLTable, MATable, etc. 109 // outut: entry added to fKeyVec and fValueVec 110 void addKeyEntry(UChar32 keyChar, // The key character 111 UHashtable *table, // The table, one of SATable, MATable, etc. 112 int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc. 113 UErrorCode &status); 114 115 // From an index into fKeyVec & fValueVec 116 // get a UnicodeString with the corresponding mapping. 117 UnicodeString getMapping(int32_t key); 118 119 // Populate the final binary output data array with the compiled data. 120 void outputData(UErrorCode &status); 121 122 public: 123 static void buildConfusableData(SpoofImpl *spImpl, const char * confusables, 124 int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status); 125 }; 126 127 #endif 128 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 129 #endif // __USPOOF_BUILDCONF_H__ 130