Home | History | Annotate | Download | only in i18n
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 2008-2009, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *   file name:  uspoof_buildconf.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2009Jan05  (refactoring earlier files)
     14 *   created by: Andy Heninger
     15 *
     16 *   Internal classes for compililing confusable data into its binary (runtime) form.
     17 */
     18 
     19 #include "unicode/utypes.h"
     20 #include "unicode/uspoof.h"
     21 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     22 #if !UCONFIG_NO_NORMALIZATION
     23 
     24 #include "unicode/unorm.h"
     25 #include "unicode/uregex.h"
     26 #include "unicode/ustring.h"
     27 #include "cmemory.h"
     28 #include "uspoof_impl.h"
     29 #include "uhash.h"
     30 #include "uvector.h"
     31 #include "uassert.h"
     32 #include "uarrsort.h"
     33 #include "uspoof_buildconf.h"
     34 
     35 U_NAMESPACE_USE
     36 
     37 
     38 //---------------------------------------------------------------------
     39 //
     40 //  buildConfusableData   Compile the source confusable data, as defined by
     41 //                        the Unicode data file confusables.txt, into the binary
     42 //                        structures used by the confusable detector.
     43 //
     44 //                        The binary structures are described in uspoof_impl.h
     45 //
     46 //     1.  parse the data, building 4 hash tables, one each for the SL, SA, ML and MA
     47 //         tables.  Each maps from a UChar32 to a String.
     48 //
     49 //     2.  Sort all of the strings encountered by length, since they will need to
     50 //         be stored in that order in the final string table.
     51 //
     52 //     3.  Build a list of keys (UChar32s) from the four mapping tables.  Sort the
     53 //         list because that will be the ordering of our runtime table.
     54 //
     55 //     4.  Generate the run time string table.  This is generated before the key & value
     56 //         tables because we need the string indexes when building those tables.
     57 //
     58 //     5.  Build the run-time key and value tables.  These are parallel tables, and are built
     59 //         at the same time
     60 //
     61 
     62 SPUString::SPUString(UnicodeString *s) {
     63     fStr = s;
     64     fStrTableIndex = 0;
     65 }
     66 
     67 
     68 SPUString::~SPUString() {
     69     delete fStr;
     70 }
     71 
     72 
     73 SPUStringPool::SPUStringPool(UErrorCode &status) : fVec(NULL), fHash(NULL) {
     74     fVec = new UVector(status);
     75     fHash = uhash_open(uhash_hashUnicodeString,           // key hash function
     76                        uhash_compareUnicodeString,        // Key Comparator
     77                        NULL,                              // Value Comparator
     78                        &status);
     79 }
     80 
     81 
     82 SPUStringPool::~SPUStringPool() {
     83     int i;
     84     for (i=fVec->size()-1; i>=0; i--) {
     85         SPUString *s = static_cast<SPUString *>(fVec->elementAt(i));
     86         delete s;
     87     }
     88     delete fVec;
     89     uhash_close(fHash);
     90 }
     91 
     92 
     93 int32_t SPUStringPool::size() {
     94     return fVec->size();
     95 }
     96 
     97 SPUString *SPUStringPool::getByIndex(int32_t index) {
     98     SPUString *retString = (SPUString *)fVec->elementAt(index);
     99     return retString;
    100 }
    101 
    102 
    103 // Comparison function for ordering strings in the string pool.
    104 // Compare by length first, then, within a group of the same length,
    105 // by code point order.
    106 // Conforms to the type signature for a USortComparator in uvector.h
    107 
    108 static int8_t U_CALLCONV SPUStringCompare(UHashTok left, UHashTok right) {
    109     const SPUString *sL = static_cast<const SPUString *>(left.pointer);
    110     const SPUString *sR = static_cast<const SPUString *>(right.pointer);
    111     int32_t lenL = sL->fStr->length();
    112     int32_t lenR = sR->fStr->length();
    113     if (lenL < lenR) {
    114         return -1;
    115     } else if (lenL > lenR) {
    116         return 1;
    117     } else {
    118         return sL->fStr->compare(*(sR->fStr));
    119     }
    120 }
    121 
    122 void SPUStringPool::sort(UErrorCode &status) {
    123     fVec->sort(SPUStringCompare, status);
    124 }
    125 
    126 
    127 SPUString *SPUStringPool::addString(UnicodeString *src, UErrorCode &status) {
    128     SPUString *hashedString = static_cast<SPUString *>(uhash_get(fHash, src));
    129     if (hashedString != NULL) {
    130         delete src;
    131     } else {
    132         hashedString = new SPUString(src);
    133         uhash_put(fHash, src, hashedString, &status);
    134         fVec->addElement(hashedString, status);
    135     }
    136     return hashedString;
    137 }
    138 
    139 
    140 
    141 ConfusabledataBuilder::ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status) :
    142     fSpoofImpl(spImpl),
    143     fInput(NULL),
    144     fSLTable(NULL),
    145     fSATable(NULL),
    146     fMLTable(NULL),
    147     fMATable(NULL),
    148     fKeySet(NULL),
    149     fKeyVec(NULL),
    150     fValueVec(NULL),
    151     fStringTable(NULL),
    152     fStringLengthsTable(NULL),
    153     stringPool(NULL),
    154     fParseLine(NULL),
    155     fParseHexNum(NULL),
    156     fLineNum(0)
    157 {
    158     if (U_FAILURE(status)) {
    159         return;
    160     }
    161     fSLTable    = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
    162     fSATable    = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
    163     fMLTable    = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
    164     fMATable    = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
    165     fKeySet     = new UnicodeSet();
    166     fKeyVec     = new UVector(status);
    167     fValueVec   = new UVector(status);
    168     stringPool = new SPUStringPool(status);
    169 }
    170 
    171 
    172 ConfusabledataBuilder::~ConfusabledataBuilder() {
    173     uprv_free(fInput);
    174     uregex_close(fParseLine);
    175     uregex_close(fParseHexNum);
    176     uhash_close(fSLTable);
    177     uhash_close(fSATable);
    178     uhash_close(fMLTable);
    179     uhash_close(fMATable);
    180     delete fKeySet;
    181     delete fKeyVec;
    182     delete fStringTable;
    183     delete fStringLengthsTable;
    184     delete fValueVec;
    185     delete stringPool;
    186 }
    187 
    188 
    189 void ConfusabledataBuilder::buildConfusableData(SpoofImpl * spImpl, const char * confusables,
    190     int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status) {
    191 
    192     if (U_FAILURE(status)) {
    193         return;
    194     }
    195     ConfusabledataBuilder builder(spImpl, status);
    196     builder.build(confusables, confusablesLen, status);
    197     if (U_FAILURE(status) && errorType != NULL) {
    198         *errorType = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
    199         pe->line = builder.fLineNum;
    200     }
    201 }
    202 
    203 
    204 void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesLen,
    205                UErrorCode &status) {
    206 
    207     // Convert the user input data from UTF-8 to UChar (UTF-16)
    208     int32_t inputLen = 0;
    209     if (U_FAILURE(status)) {
    210         return;
    211     }
    212     u_strFromUTF8(NULL, 0, &inputLen, confusables, confusablesLen, &status);
    213     if (status != U_BUFFER_OVERFLOW_ERROR) {
    214         return;
    215     }
    216     status = U_ZERO_ERROR;
    217     fInput = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar)));
    218     if (fInput == NULL) {
    219         status = U_MEMORY_ALLOCATION_ERROR;
    220     }
    221     u_strFromUTF8(fInput, inputLen+1, NULL, confusables, confusablesLen, &status);
    222 
    223 
    224     // Regular Expression to parse a line from Confusables.txt.  The expression will match
    225     // any line.  What was matched is determined by examining which capture groups have a match.
    226     //   Capture Group 1:  the source char
    227     //   Capture Group 2:  the replacement chars
    228     //   Capture Group 3-6  the table type, SL, SA, ML, or MA
    229     //   Capture Group 7:  A blank or comment only line.
    230     //   Capture Group 8:  A syntactically invalid line.  Anything that didn't match before.
    231     // Example Line from the confusables.txt source file:
    232     //   "1D702 ;	006E 0329 ;	SL	# MATHEMATICAL ITALIC SMALL ETA ... "
    233     fParseLine = uregex_openC(
    234         "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;"      // Match the source char
    235         "[ \\t]*([0-9A-Fa-f]+"                    // Match the replacement char(s)
    236            "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;"    //     (continued)
    237         "\\s*(?:(SL)|(SA)|(ML)|(MA))"             // Match the table type
    238         "[ \\t]*(?:#.*?)?$"                       // Match any trailing #comment
    239         "|^([ \\t]*(?:#.*?)?)$"       // OR match empty lines or lines with only a #comment
    240         "|^(.*?)$",                   // OR match any line, which catches illegal lines.
    241         0, NULL, &status);
    242 
    243     // Regular expression for parsing a hex number out of a space-separated list of them.
    244     //   Capture group 1 gets the number, with spaces removed.
    245     fParseHexNum = uregex_openC("\\s*([0-9A-F]+)", 0, NULL, &status);
    246 
    247     // Zap any Byte Order Mark at the start of input.  Changing it to a space is benign
    248     //   given the syntax of the input.
    249     if (*fInput == 0xfeff) {
    250         *fInput = 0x20;
    251     }
    252 
    253     // Parse the input, one line per iteration of this loop.
    254     uregex_setText(fParseLine, fInput, inputLen, &status);
    255     while (uregex_findNext(fParseLine, &status)) {
    256         fLineNum++;
    257         if (uregex_start(fParseLine, 7, &status) >= 0) {
    258             // this was a blank or comment line.
    259             continue;
    260         }
    261         if (uregex_start(fParseLine, 8, &status) >= 0) {
    262             // input file syntax error.
    263             status = U_PARSE_ERROR;
    264             return;
    265         }
    266 
    267         // We have a good input line.  Extract the key character and mapping string, and
    268         //    put them into the appropriate mapping table.
    269         UChar32 keyChar = SpoofImpl::ScanHex(fInput, uregex_start(fParseLine, 1, &status),
    270                           uregex_end(fParseLine, 1, &status), status);
    271 
    272         int32_t mapStringStart = uregex_start(fParseLine, 2, &status);
    273         int32_t mapStringLength = uregex_end(fParseLine, 2, &status) - mapStringStart;
    274         uregex_setText(fParseHexNum, &fInput[mapStringStart], mapStringLength, &status);
    275 
    276         UnicodeString  *mapString = new UnicodeString();
    277         if (mapString == NULL) {
    278             status = U_MEMORY_ALLOCATION_ERROR;
    279             return;
    280         }
    281         while (uregex_findNext(fParseHexNum, &status)) {
    282             UChar32 c = SpoofImpl::ScanHex(&fInput[mapStringStart], uregex_start(fParseHexNum, 1, &status),
    283                                  uregex_end(fParseHexNum, 1, &status), status);
    284             mapString->append(c);
    285         }
    286         U_ASSERT(mapString->length() >= 1);
    287 
    288         // Put the map (value) string into the string pool
    289         // This a little like a Java intern() - any duplicates will be eliminated.
    290         SPUString *smapString = stringPool->addString(mapString, status);
    291 
    292         // Add the UChar -> string mapping to the appropriate table.
    293         UHashtable *table = uregex_start(fParseLine, 3, &status) >= 0 ? fSLTable :
    294                             uregex_start(fParseLine, 4, &status) >= 0 ? fSATable :
    295                             uregex_start(fParseLine, 5, &status) >= 0 ? fMLTable :
    296                             uregex_start(fParseLine, 6, &status) >= 0 ? fMATable :
    297                             NULL;
    298         U_ASSERT(table != NULL);
    299         uhash_iput(table, keyChar, smapString, &status);
    300         fKeySet->add(keyChar);
    301         if (U_FAILURE(status)) {
    302             return;
    303         }
    304     }
    305 
    306     // Input data is now all parsed and collected.
    307     // Now create the run-time binary form of the data.
    308     //
    309     // This is done in two steps.  First the data is assembled into vectors and strings,
    310     //   for ease of construction, then the contents of these collections are dumped
    311     //   into the actual raw-bytes data storage.
    312 
    313     // Build up the string array, and record the index of each string therein
    314     //  in the (build time only) string pool.
    315     // Strings of length one are not entered into the strings array.
    316     // At the same time, build up the string lengths table, which records the
    317     // position in the string table of the first string of each length >= 4.
    318     // (Strings in the table are sorted by length)
    319     stringPool->sort(status);
    320     fStringTable = new UnicodeString();
    321     fStringLengthsTable = new UVector(status);
    322     int32_t previousStringLength = 0;
    323     int32_t previousStringIndex  = 0;
    324     int32_t poolSize = stringPool->size();
    325     int32_t i;
    326     for (i=0; i<poolSize; i++) {
    327         SPUString *s = stringPool->getByIndex(i);
    328         int32_t strLen = s->fStr->length();
    329         int32_t strIndex = fStringTable->length();
    330         U_ASSERT(strLen >= previousStringLength);
    331         if (strLen == 1) {
    332             // strings of length one do not get an entry in the string table.
    333             // Keep the single string character itself here, which is the same
    334             //  convention that is used in the final run-time string table index.
    335             s->fStrTableIndex = s->fStr->charAt(0);
    336         } else {
    337             if ((strLen > previousStringLength) && (previousStringLength >= 4)) {
    338                 fStringLengthsTable->addElement(previousStringIndex, status);
    339                 fStringLengthsTable->addElement(previousStringLength, status);
    340             }
    341             s->fStrTableIndex = strIndex;
    342             fStringTable->append(*(s->fStr));
    343         }
    344         previousStringLength = strLen;
    345         previousStringIndex  = strIndex;
    346     }
    347     // Make the final entry to the string lengths table.
    348     //   (it holds an entry for the _last_ string of each length, so adding the
    349     //    final one doesn't happen in the main loop because no longer string was encountered.)
    350     if (previousStringLength >= 4) {
    351         fStringLengthsTable->addElement(previousStringIndex, status);
    352         fStringLengthsTable->addElement(previousStringLength, status);
    353     }
    354 
    355     // Construct the compile-time Key and Value tables
    356     //
    357     // For each key code point, check which mapping tables it applies to,
    358     //   and create the final data for the key & value structures.
    359     //
    360     //   The four logical mapping tables are conflated into one combined table.
    361     //   If multiple logical tables have the same mapping for some key, they
    362     //     share a single entry in the combined table.
    363     //   If more than one mapping exists for the same key code point, multiple
    364     //     entries will be created in the table
    365 
    366     for (int32_t range=0; range<fKeySet->getRangeCount(); range++) {
    367         // It is an oddity of the UnicodeSet API that simply enumerating the contained
    368         //   code points requires a nested loop.
    369         for (UChar32 keyChar=fKeySet->getRangeStart(range);
    370                 keyChar <= fKeySet->getRangeEnd(range); keyChar++) {
    371             addKeyEntry(keyChar, fSLTable, USPOOF_SL_TABLE_FLAG, status);
    372             addKeyEntry(keyChar, fSATable, USPOOF_SA_TABLE_FLAG, status);
    373             addKeyEntry(keyChar, fMLTable, USPOOF_ML_TABLE_FLAG, status);
    374             addKeyEntry(keyChar, fMATable, USPOOF_MA_TABLE_FLAG, status);
    375         }
    376     }
    377 
    378     // Put the assembled data into the flat runtime array
    379     outputData(status);
    380 
    381     // All of the intermediate allocated data belongs to the ConfusabledataBuilder
    382     //  object  (this), and is deleted in the destructor.
    383     return;
    384 }
    385 
    386 //
    387 // outputData     The confusable data has been compiled and stored in intermediate
    388 //                collections and strings.  Copy it from there to the final flat
    389 //                binary array.
    390 //
    391 //                Note that as each section is added to the output data, the
    392 //                expand (reserveSpace() function will likely relocate it in memory.
    393 //                Be careful with pointers.
    394 //
    395 void ConfusabledataBuilder::outputData(UErrorCode &status) {
    396 
    397     U_ASSERT(fSpoofImpl->fSpoofData->fDataOwned == TRUE);
    398 
    399     //  The Key Table
    400     //     While copying the keys to the runtime array,
    401     //       also sanity check that they are sorted.
    402 
    403     int32_t numKeys = fKeyVec->size();
    404     int32_t *keys =
    405         static_cast<int32_t *>(fSpoofImpl->fSpoofData->reserveSpace(numKeys*sizeof(int32_t), status));
    406     if (U_FAILURE(status)) {
    407         return;
    408     }
    409     int i;
    410     int32_t previousKey = 0;
    411     for (i=0; i<numKeys; i++) {
    412         int32_t key =  fKeyVec->elementAti(i);
    413         U_ASSERT((key & 0x00ffffff) >= (previousKey & 0x00ffffff));
    414         U_ASSERT((key & 0xff000000) != 0);
    415         keys[i] = key;
    416         previousKey = key;
    417     }
    418     SpoofDataHeader *rawData = fSpoofImpl->fSpoofData->fRawData;
    419     rawData->fCFUKeys = (char *)keys - (char *)rawData;
    420     rawData->fCFUKeysSize = numKeys;
    421     fSpoofImpl->fSpoofData->fCFUKeys = keys;
    422 
    423 
    424     // The Value Table, parallels the key table
    425     int32_t numValues = fValueVec->size();
    426     U_ASSERT(numKeys == numValues);
    427     uint16_t *values =
    428         static_cast<uint16_t *>(fSpoofImpl->fSpoofData->reserveSpace(numKeys*sizeof(uint16_t), status));
    429     if (U_FAILURE(status)) {
    430         return;
    431     }
    432     for (i=0; i<numValues; i++) {
    433         uint32_t value = static_cast<uint32_t>(fValueVec->elementAti(i));
    434         U_ASSERT(value < 0xffff);
    435         values[i] = static_cast<uint16_t>(value);
    436     }
    437     rawData = fSpoofImpl->fSpoofData->fRawData;
    438     rawData->fCFUStringIndex = (char *)values - (char *)rawData;
    439     rawData->fCFUStringIndexSize = numValues;
    440     fSpoofImpl->fSpoofData->fCFUValues = values;
    441 
    442     // The Strings Table.
    443 
    444     uint32_t stringsLength = fStringTable->length();
    445     // Reserve an extra space so the string will be nul-terminated.  This is
    446     // only a convenience, for when debugging; it is not needed otherwise.
    447     UChar *strings =
    448         static_cast<UChar *>(fSpoofImpl->fSpoofData->reserveSpace(stringsLength*sizeof(UChar)+2, status));
    449     if (U_FAILURE(status)) {
    450         return;
    451     }
    452     fStringTable->extract(strings, stringsLength+1, status);
    453     rawData = fSpoofImpl->fSpoofData->fRawData;
    454     U_ASSERT(rawData->fCFUStringTable == 0);
    455     rawData->fCFUStringTable = (char *)strings - (char *)rawData;
    456     rawData->fCFUStringTableLen = stringsLength;
    457     fSpoofImpl->fSpoofData->fCFUStrings = strings;
    458 
    459     // The String Lengths Table
    460     //    While copying into the runtime array do some sanity checks on the values
    461     //    Each complete entry contains two fields, an index and an offset.
    462     //    Lengths should increase with each entry.
    463     //    Offsets should be less than the size of the string table.
    464     int32_t lengthTableLength = fStringLengthsTable->size();
    465     uint16_t *stringLengths =
    466         static_cast<uint16_t *>(fSpoofImpl->fSpoofData->reserveSpace(lengthTableLength*sizeof(uint16_t), status));
    467     if (U_FAILURE(status)) {
    468         return;
    469     }
    470     int32_t destIndex = 0;
    471     uint32_t previousLength = 0;
    472     for (i=0; i<lengthTableLength; i+=2) {
    473         uint32_t offset = static_cast<uint32_t>(fStringLengthsTable->elementAti(i));
    474         uint32_t length = static_cast<uint32_t>(fStringLengthsTable->elementAti(i+1));
    475         U_ASSERT(offset < stringsLength);
    476         U_ASSERT(length < 40);
    477         U_ASSERT(length > previousLength);
    478         stringLengths[destIndex++] = static_cast<uint16_t>(offset);
    479         stringLengths[destIndex++] = static_cast<uint16_t>(length);
    480         previousLength = length;
    481     }
    482     rawData = fSpoofImpl->fSpoofData->fRawData;
    483     rawData->fCFUStringLengths = (char *)stringLengths - (char *)rawData;
    484     // Note: StringLengthsSize in the raw data is the number of complete entries,
    485     //       each consisting of a pair of 16 bit values, hence the divide by 2.
    486     rawData->fCFUStringLengthsSize = lengthTableLength / 2;
    487     fSpoofImpl->fSpoofData->fCFUStringLengths =
    488         reinterpret_cast<SpoofStringLengthsElement *>(stringLengths);
    489 }
    490 
    491 
    492 
    493 //  addKeyEntry   Construction of the confusable Key and Mapping Values tables.
    494 //                This is an intermediate point in the building process.
    495 //                We already have the mappings in the hash tables fSLTable, etc.
    496 //                This function builds corresponding run-time style table entries into
    497 //                  fKeyVec and fValueVec
    498 
    499 void ConfusabledataBuilder::addKeyEntry(
    500     UChar32     keyChar,     // The key character
    501     UHashtable *table,       // The table, one of SATable, MATable, etc.
    502     int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
    503     UErrorCode &status) {
    504 
    505     SPUString *targetMapping = static_cast<SPUString *>(uhash_iget(table, keyChar));
    506     if (targetMapping == NULL) {
    507         // No mapping for this key character.
    508         //   (This function is called for all four tables for each key char that
    509         //    is seen anywhere, so this no entry cases are very much expected.)
    510         return;
    511     }
    512 
    513     // Check whether there is already an entry with the correct mapping.
    514     // If so, simply set the flag in the keyTable saying that the existing entry
    515     // applies to the table that we're doing now.
    516 
    517     UBool keyHasMultipleValues = FALSE;
    518     int32_t i;
    519     for (i=fKeyVec->size()-1; i>=0 ; i--) {
    520         int32_t key = fKeyVec->elementAti(i);
    521         if ((key & 0x0ffffff) != keyChar) {
    522             // We have now checked all existing key entries for this key char (if any)
    523             //  without finding one with the same mapping.
    524             break;
    525         }
    526         UnicodeString mapping = getMapping(i);
    527         if (mapping == *(targetMapping->fStr)) {
    528             // The run time entry we are currently testing has the correct mapping.
    529             // Set the flag in it indicating that it applies to the new table also.
    530             key |= tableFlag;
    531             fKeyVec->setElementAt(key, i);
    532             return;
    533         }
    534         keyHasMultipleValues = TRUE;
    535     }
    536 
    537     // Need to add a new entry to the binary data being built for this mapping.
    538     // Includes adding entries to both the key table and the parallel values table.
    539 
    540     int32_t newKey = keyChar | tableFlag;
    541     if (keyHasMultipleValues) {
    542         newKey |= USPOOF_KEY_MULTIPLE_VALUES;
    543     }
    544     int32_t adjustedMappingLength = targetMapping->fStr->length() - 1;
    545     if (adjustedMappingLength>3) {
    546         adjustedMappingLength = 3;
    547     }
    548     newKey |= adjustedMappingLength << USPOOF_KEY_LENGTH_SHIFT;
    549 
    550     int32_t newData = targetMapping->fStrTableIndex;
    551 
    552     fKeyVec->addElement(newKey, status);
    553     fValueVec->addElement(newData, status);
    554 
    555     // If the preceding key entry is for the same key character (but with a different mapping)
    556     //   set the multiple-values flag on it.
    557     if (keyHasMultipleValues) {
    558         int32_t previousKeyIndex = fKeyVec->size() - 2;
    559         int32_t previousKey = fKeyVec->elementAti(previousKeyIndex);
    560         previousKey |= USPOOF_KEY_MULTIPLE_VALUES;
    561         fKeyVec->setElementAt(previousKey, previousKeyIndex);
    562     }
    563 }
    564 
    565 
    566 
    567 UnicodeString ConfusabledataBuilder::getMapping(int32_t index) {
    568     int32_t key = fKeyVec->elementAti(index);
    569     int32_t value = fValueVec->elementAti(index);
    570     int32_t length = USPOOF_KEY_LENGTH_FIELD(key);
    571     int32_t lastIndexWithLen;
    572     switch (length) {
    573       case 0:
    574         return UnicodeString(static_cast<UChar>(value));
    575       case 1:
    576       case 2:
    577         return UnicodeString(*fStringTable, value, length+1);
    578       case 3:
    579         length = 0;
    580         int32_t i;
    581         for (i=0; i<fStringLengthsTable->size(); i+=2) {
    582             lastIndexWithLen = fStringLengthsTable->elementAti(i);
    583             if (value <= lastIndexWithLen) {
    584                 length = fStringLengthsTable->elementAti(i+1);
    585                 break;
    586             }
    587         }
    588         U_ASSERT(length>=3);
    589         return UnicodeString(*fStringTable, value, length);
    590       default:
    591         U_ASSERT(FALSE);
    592     }
    593     return UnicodeString();
    594 }
    595 
    596 #endif
    597 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
    598 
    599