Home | History | Annotate | Download | only in intltest
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 1999-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *   Date        Name        Description
      9 *   11/10/99    aliu        Creation.
     10 **********************************************************************
     11 */
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #if !UCONFIG_NO_TRANSLITERATION
     16 
     17 #include "transtst.h"
     18 #include "unicode/locid.h"
     19 #include "unicode/dtfmtsym.h"
     20 #include "unicode/normlzr.h"
     21 #include "unicode/translit.h"
     22 #include "unicode/uchar.h"
     23 #include "unicode/unifilt.h"
     24 #include "unicode/uniset.h"
     25 #include "unicode/ustring.h"
     26 #include "unicode/usetiter.h"
     27 #include "unicode/uscript.h"
     28 #include "unicode/utf16.h"
     29 #include "cpdtrans.h"
     30 #include "nultrans.h"
     31 #include "rbt.h"
     32 #include "rbt_pars.h"
     33 #include "anytrans.h"
     34 #include "esctrn.h"
     35 #include "name2uni.h"
     36 #include "nortrans.h"
     37 #include "remtrans.h"
     38 #include "titletrn.h"
     39 #include "tolowtrn.h"
     40 #include "toupptrn.h"
     41 #include "unesctrn.h"
     42 #include "uni2name.h"
     43 #include "cstring.h"
     44 #include "cmemory.h"
     45 #include <stdio.h>
     46 
     47 /***********************************************************************
     48 
     49                      HOW TO USE THIS TEST FILE
     50                                -or-
     51                   How I developed on two platforms
     52                 without losing (too much of) my mind
     53 
     54 
     55 1. Add new tests by copying/pasting/changing existing tests.  On Java,
     56    any public void method named Test...() taking no parameters becomes
     57    a test.  On C++, you need to modify the header and add a line to
     58    the runIndexedTest() dispatch method.
     59 
     60 2. Make liberal use of the expect() method; it is your friend.
     61 
     62 3. The tests in this file exactly match those in a sister file on the
     63    other side.  The two files are:
     64 
     65    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
     66    icu4c:  source/test/intltest/transtst.cpp
     67 
     68                   ==> THIS IS THE IMPORTANT PART <==
     69 
     70    When you add a test in this file, add it in TransliteratorTest.java
     71    too.  Give it the same name and put it in the same relative place.
     72    This makes maintenance a lot simpler for any poor soul who ends up
     73    trying to synchronize the tests between icu4j and icu4c.
     74 
     75 4. If you MUST enter a test that is NOT paralleled in the sister file,
     76    then add it in the special non-mirrored section.  These are
     77    labeled
     78 
     79      "icu4j ONLY"
     80 
     81    or
     82 
     83      "icu4c ONLY"
     84 
     85    Make sure you document the reason the test is here and not there.
     86 
     87 
     88 Thank you.
     89 The Management
     90 ***********************************************************************/
     91 
     92 // Define character constants thusly to be EBCDIC-friendly
     93 enum {
     94     LEFT_BRACE=((UChar)0x007B), /*{*/
     95     PIPE      =((UChar)0x007C), /*|*/
     96     ZERO      =((UChar)0x0030), /*0*/
     97     UPPER_A   =((UChar)0x0041)  /*A*/
     98 };
     99 
    100 TransliteratorTest::TransliteratorTest()
    101 :   DESERET_DEE((UChar32)0x10414),
    102     DESERET_dee((UChar32)0x1043C)
    103 {
    104 }
    105 
    106 TransliteratorTest::~TransliteratorTest() {}
    107 
    108 void
    109 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
    110                                    const char* &name, char* /*par*/) {
    111     switch (index) {
    112         TESTCASE(0,TestInstantiation);
    113         TESTCASE(1,TestSimpleRules);
    114         TESTCASE(2,TestRuleBasedInverse);
    115         TESTCASE(3,TestKeyboard);
    116         TESTCASE(4,TestKeyboard2);
    117         TESTCASE(5,TestKeyboard3);
    118         TESTCASE(6,TestArabic);
    119         TESTCASE(7,TestCompoundKana);
    120         TESTCASE(8,TestCompoundHex);
    121         TESTCASE(9,TestFiltering);
    122         TESTCASE(10,TestInlineSet);
    123         TESTCASE(11,TestPatternQuoting);
    124         TESTCASE(12,TestJ277);
    125         TESTCASE(13,TestJ243);
    126         TESTCASE(14,TestJ329);
    127         TESTCASE(15,TestSegments);
    128         TESTCASE(16,TestCursorOffset);
    129         TESTCASE(17,TestArbitraryVariableValues);
    130         TESTCASE(18,TestPositionHandling);
    131         TESTCASE(19,TestHiraganaKatakana);
    132         TESTCASE(20,TestCopyJ476);
    133         TESTCASE(21,TestAnchors);
    134         TESTCASE(22,TestInterIndic);
    135         TESTCASE(23,TestFilterIDs);
    136         TESTCASE(24,TestCaseMap);
    137         TESTCASE(25,TestNameMap);
    138         TESTCASE(26,TestLiberalizedID);
    139         TESTCASE(27,TestCreateInstance);
    140         TESTCASE(28,TestNormalizationTransliterator);
    141         TESTCASE(29,TestCompoundRBT);
    142         TESTCASE(30,TestCompoundFilter);
    143         TESTCASE(31,TestRemove);
    144         TESTCASE(32,TestToRules);
    145         TESTCASE(33,TestContext);
    146         TESTCASE(34,TestSupplemental);
    147         TESTCASE(35,TestQuantifier);
    148         TESTCASE(36,TestSTV);
    149         TESTCASE(37,TestCompoundInverse);
    150         TESTCASE(38,TestNFDChainRBT);
    151         TESTCASE(39,TestNullInverse);
    152         TESTCASE(40,TestAliasInverseID);
    153         TESTCASE(41,TestCompoundInverseID);
    154         TESTCASE(42,TestUndefinedVariable);
    155         TESTCASE(43,TestEmptyContext);
    156         TESTCASE(44,TestCompoundFilterID);
    157         TESTCASE(45,TestPropertySet);
    158         TESTCASE(46,TestNewEngine);
    159         TESTCASE(47,TestQuantifiedSegment);
    160         TESTCASE(48,TestDevanagariLatinRT);
    161         TESTCASE(49,TestTeluguLatinRT);
    162         TESTCASE(50,TestCompoundLatinRT);
    163         TESTCASE(51,TestSanskritLatinRT);
    164         TESTCASE(52,TestLocaleInstantiation);
    165         TESTCASE(53,TestTitleAccents);
    166         TESTCASE(54,TestLocaleResource);
    167         TESTCASE(55,TestParseError);
    168         TESTCASE(56,TestOutputSet);
    169         TESTCASE(57,TestVariableRange);
    170         TESTCASE(58,TestInvalidPostContext);
    171         TESTCASE(59,TestIDForms);
    172         TESTCASE(60,TestToRulesMark);
    173         TESTCASE(61,TestEscape);
    174         TESTCASE(62,TestAnchorMasking);
    175         TESTCASE(63,TestDisplayName);
    176         TESTCASE(64,TestSpecialCases);
    177 #if !UCONFIG_NO_FILE_IO
    178         TESTCASE(65,TestIncrementalProgress);
    179 #endif
    180         TESTCASE(66,TestSurrogateCasing);
    181         TESTCASE(67,TestFunction);
    182         TESTCASE(68,TestInvalidBackRef);
    183         TESTCASE(69,TestMulticharStringSet);
    184         TESTCASE(70,TestUserFunction);
    185         TESTCASE(71,TestAnyX);
    186         TESTCASE(72,TestSourceTargetSet);
    187         TESTCASE(73,TestGurmukhiDevanagari);
    188         TESTCASE(74,TestPatternWhiteSpace);
    189         TESTCASE(75,TestAllCodepoints);
    190         TESTCASE(76,TestBoilerplate);
    191         TESTCASE(77,TestAlternateSyntax);
    192         TESTCASE(78,TestBeginEnd);
    193         TESTCASE(79,TestBeginEndToRules);
    194         TESTCASE(80,TestRegisterAlias);
    195         TESTCASE(81,TestRuleStripping);
    196         TESTCASE(82,TestHalfwidthFullwidth);
    197         TESTCASE(83,TestThai);
    198         TESTCASE(84,TestAny);
    199         default: name = ""; break;
    200     }
    201 }
    202 
    203 /**
    204  * Make sure every system transliterator can be instantiated.
    205  *
    206  * ALSO test that the result of toRules() for each rule is a valid
    207  * rule.  Do this here so we don't have to have another test that
    208  * instantiates everything as well.
    209  */
    210 void TransliteratorTest::TestInstantiation() {
    211     UErrorCode ec = U_ZERO_ERROR;
    212     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
    213     assertSuccess("getAvailableIDs()", ec);
    214     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
    215     int32_t n = Transliterator::countAvailableIDs();
    216     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
    217                avail->count(ec) == n);
    218     assertSuccess("count()", ec);
    219     UnicodeString name;
    220     for (int32_t i=0; i<n; ++i) {
    221         const UnicodeString& id = *avail->snext(ec);
    222         if (!assertSuccess("snext()", ec) ||
    223             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
    224             break;
    225         }
    226         UnicodeString id2 = Transliterator::getAvailableID(i);
    227         if (id.length() < 1) {
    228             errln(UnicodeString("FAIL: getAvailableID(") +
    229                   i + ") returned empty string");
    230             continue;
    231         }
    232         if (id != id2) {
    233             errln(UnicodeString("FAIL: getAvailableID(") +
    234                   i + ") != getAvailableIDs().snext()");
    235             continue;
    236         }
    237         UParseError parseError;
    238         UErrorCode status = U_ZERO_ERROR;
    239         Transliterator* t = Transliterator::createInstance(id,
    240                               UTRANS_FORWARD, parseError,status);
    241         name.truncate(0);
    242         Transliterator::getDisplayName(id, name);
    243         if (t == 0) {
    244 #if UCONFIG_NO_BREAK_ITERATION
    245             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
    246             if (id.compare((UnicodeString)"Thai-Latn") != 0 &&
    247                 id.compare((UnicodeString)"Thai-Latin") != 0)
    248 #endif
    249                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
    250                       /*", parse error " + parseError.code +*/
    251                       ", line " + parseError.line +
    252                       ", offset " + parseError.offset +
    253                       ", pre-context " + prettify(parseError.preContext, TRUE) +
    254                       ", post-context " +prettify(parseError.postContext,TRUE) +
    255                       ", Error: " + u_errorName(status));
    256                 // When createInstance fails, it deletes the failing
    257                 // entry from the available ID list.  We detect this
    258                 // here by looking for a change in countAvailableIDs.
    259             int32_t nn = Transliterator::countAvailableIDs();
    260             if (nn == (n - 1)) {
    261                 n = nn;
    262                 --i; // Compensate for deleted entry
    263             }
    264         } else {
    265             logln(UnicodeString("OK: ") + name + " (" + id + ")");
    266 
    267             // Now test toRules
    268             UnicodeString rules;
    269             t->toRules(rules, TRUE);
    270             Transliterator *u = Transliterator::createFromRules("x",
    271                                     rules, UTRANS_FORWARD, parseError,status);
    272             if (u == 0) {
    273                 errln(UnicodeString("FAIL: ") + id +
    274                       ".createFromRules() => bad rules" +
    275                       /*", parse error " + parseError.code +*/
    276                       ", line " + parseError.line +
    277                       ", offset " + parseError.offset +
    278                       ", context " + prettify(parseError.preContext, TRUE) +
    279                       ", rules: " + prettify(rules, TRUE));
    280             } else {
    281                 delete u;
    282             }
    283             delete t;
    284         }
    285     }
    286     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
    287     assertSuccess("snext()", ec);
    288     delete avail;
    289 
    290     // Now test the failure path
    291     UParseError parseError;
    292     UErrorCode status = U_ZERO_ERROR;
    293     UnicodeString id("<Not a valid Transliterator ID>");
    294     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
    295     if (t != 0) {
    296         errln("FAIL: " + id + " returned a transliterator");
    297         delete t;
    298     } else {
    299         logln("OK: Bogus ID handled properly");
    300     }
    301 }
    302 
    303 void TransliteratorTest::TestSimpleRules(void) {
    304     /* Example: rules 1. ab>x|y
    305      *                2. yc>z
    306      *
    307      * []|eabcd  start - no match, copy e to tranlated buffer
    308      * [e]|abcd  match rule 1 - copy output & adjust cursor
    309      * [ex|y]cd  match rule 2 - copy output & adjust cursor
    310      * [exz]|d   no match, copy d to transliterated buffer
    311      * [exzd]|   done
    312      */
    313     expect(UnicodeString("ab>x|y;", "") +
    314            "yc>z",
    315            "eabcd", "exzd");
    316 
    317     /* Another set of rules:
    318      *    1. ab>x|yzacw
    319      *    2. za>q
    320      *    3. qc>r
    321      *    4. cw>n
    322      *
    323      * []|ab       Rule 1
    324      * [x|yzacw]   No match
    325      * [xy|zacw]   Rule 2
    326      * [xyq|cw]    Rule 4
    327      * [xyqn]|     Done
    328      */
    329     expect(UnicodeString("ab>x|yzacw;") +
    330            "za>q;" +
    331            "qc>r;" +
    332            "cw>n",
    333            "ab", "xyqn");
    334 
    335     /* Test categories
    336      */
    337     UErrorCode status = U_ZERO_ERROR;
    338     UParseError parseError;
    339     Transliterator *t = Transliterator::createFromRules(
    340         "<ID>",
    341         UnicodeString("$dummy=").append((UChar)0xE100) +
    342         UnicodeString(";"
    343                       "$vowel=[aeiouAEIOU];"
    344                       "$lu=[:Lu:];"
    345                       "$vowel } $lu > '!';"
    346                       "$vowel > '&';"
    347                       "'!' { $lu > '^';"
    348                       "$lu > '*';"
    349                       "a > ERROR", ""),
    350         UTRANS_FORWARD, parseError,
    351         status);
    352     if (U_FAILURE(status)) {
    353         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
    354         return;
    355     }
    356     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
    357     delete t;
    358 }
    359 
    360 /**
    361  * Test inline set syntax and set variable syntax.
    362  */
    363 void TransliteratorTest::TestInlineSet(void) {
    364     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
    365     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
    366 
    367     expect(UnicodeString(
    368            "$digit = [0-9];"
    369            "$alpha = [a-zA-Z];"
    370            "$alphanumeric = [$digit $alpha];" // ***
    371            "$special = [^$alphanumeric];"     // ***
    372            "$alphanumeric > '-';"
    373            "$special > '*';", ""),
    374 
    375            "thx-1138", "---*----");
    376 }
    377 
    378 /**
    379  * Create some inverses and confirm that they work.  We have to be
    380  * careful how we do this, since the inverses will not be true
    381  * inverses -- we can't throw any random string at the composition
    382  * of the transliterators and expect the identity function.  F x
    383  * F' != I.  However, if we are careful about the input, we will
    384  * get the expected results.
    385  */
    386 void TransliteratorTest::TestRuleBasedInverse(void) {
    387     UnicodeString RULES =
    388         UnicodeString("abc>zyx;") +
    389         "ab>yz;" +
    390         "bc>zx;" +
    391         "ca>xy;" +
    392         "a>x;" +
    393         "b>y;" +
    394         "c>z;" +
    395 
    396         "abc<zyx;" +
    397         "ab<yz;" +
    398         "bc<zx;" +
    399         "ca<xy;" +
    400         "a<x;" +
    401         "b<y;" +
    402         "c<z;" +
    403 
    404         "";
    405 
    406     const char* DATA[] = {
    407         // Careful here -- random strings will not work.  If we keep
    408         // the left side to the domain and the right side to the range
    409         // we will be okay though (left, abc; right xyz).
    410         "a", "x",
    411         "abcacab", "zyxxxyy",
    412         "caccb", "xyzzy",
    413     };
    414 
    415     int32_t DATA_length = UPRV_LENGTHOF(DATA);
    416 
    417     UErrorCode status = U_ZERO_ERROR;
    418     UParseError parseError;
    419     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
    420                                 UTRANS_FORWARD, parseError, status);
    421     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
    422                                 UTRANS_REVERSE, parseError, status);
    423     if (U_FAILURE(status)) {
    424         errln("FAIL: RBT constructor failed");
    425         return;
    426     }
    427     for (int32_t i=0; i<DATA_length; i+=2) {
    428         expect(*fwd, DATA[i], DATA[i+1]);
    429         expect(*rev, DATA[i+1], DATA[i]);
    430     }
    431     delete fwd;
    432     delete rev;
    433 }
    434 
    435 /**
    436  * Basic test of keyboard.
    437  */
    438 void TransliteratorTest::TestKeyboard(void) {
    439     UParseError parseError;
    440     UErrorCode status = U_ZERO_ERROR;
    441     Transliterator *t = Transliterator::createFromRules("<ID>",
    442                               UnicodeString("psch>Y;")
    443                               +"ps>y;"
    444                               +"ch>x;"
    445                               +"a>A;",
    446                               UTRANS_FORWARD, parseError,
    447                               status);
    448     if (U_FAILURE(status)) {
    449         errln("FAIL: RBT constructor failed");
    450         return;
    451     }
    452     const char* DATA[] = {
    453         // insertion, buffer
    454         "a", "A",
    455         "p", "Ap",
    456         "s", "Aps",
    457         "c", "Apsc",
    458         "a", "AycA",
    459         "psch", "AycAY",
    460         0, "AycAY", // null means finishKeyboardTransliteration
    461     };
    462 
    463     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
    464     delete t;
    465 }
    466 
    467 /**
    468  * Basic test of keyboard with cursor.
    469  */
    470 void TransliteratorTest::TestKeyboard2(void) {
    471     UParseError parseError;
    472     UErrorCode status = U_ZERO_ERROR;
    473     Transliterator *t = Transliterator::createFromRules("<ID>",
    474                               UnicodeString("ych>Y;")
    475                               +"ps>|y;"
    476                               +"ch>x;"
    477                               +"a>A;",
    478                               UTRANS_FORWARD, parseError,
    479                               status);
    480     if (U_FAILURE(status)) {
    481         errln("FAIL: RBT constructor failed");
    482         return;
    483     }
    484     const char* DATA[] = {
    485         // insertion, buffer
    486         "a", "A",
    487         "p", "Ap",
    488         "s", "Aps", // modified for rollback - "Ay",
    489         "c", "Apsc", // modified for rollback - "Ayc",
    490         "a", "AycA",
    491         "p", "AycAp",
    492         "s", "AycAps", // modified for rollback - "AycAy",
    493         "c", "AycApsc", // modified for rollback - "AycAyc",
    494         "h", "AycAY",
    495         0, "AycAY", // null means finishKeyboardTransliteration
    496     };
    497 
    498     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
    499     delete t;
    500 }
    501 
    502 /**
    503  * Test keyboard transliteration with back-replacement.
    504  */
    505 void TransliteratorTest::TestKeyboard3(void) {
    506     // We want th>z but t>y.  Furthermore, during keyboard
    507     // transliteration we want t>y then yh>z if t, then h are
    508     // typed.
    509     UnicodeString RULES("t>|y;"
    510                         "yh>z;");
    511 
    512     const char* DATA[] = {
    513         // Column 1: characters to add to buffer (as if typed)
    514         // Column 2: expected appearance of buffer after
    515         //           keyboard xliteration.
    516         "a", "a",
    517         "b", "ab",
    518         "t", "abt", // modified for rollback - "aby",
    519         "c", "abyc",
    520         "t", "abyct", // modified for rollback - "abycy",
    521         "h", "abycz",
    522         0, "abycz", // null means finishKeyboardTransliteration
    523     };
    524 
    525     UParseError parseError;
    526     UErrorCode status = U_ZERO_ERROR;
    527     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
    528     if (U_FAILURE(status)) {
    529         errln("FAIL: RBT constructor failed");
    530         return;
    531     }
    532     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
    533     delete t;
    534 }
    535 
    536 void TransliteratorTest::keyboardAux(const Transliterator& t,
    537                                      const char* DATA[], int32_t DATA_length) {
    538     UErrorCode status = U_ZERO_ERROR;
    539     UTransPosition index={0, 0, 0, 0};
    540     UnicodeString s;
    541     for (int32_t i=0; i<DATA_length; i+=2) {
    542         UnicodeString log;
    543         if (DATA[i] != 0) {
    544             log = s + " + "
    545                 + DATA[i]
    546                 + " -> ";
    547             t.transliterate(s, index, DATA[i], status);
    548         } else {
    549             log = s + " => ";
    550             t.finishTransliteration(s, index);
    551         }
    552         // Show the start index '{' and the cursor '|'
    553         UnicodeString a, b, c;
    554         s.extractBetween(0, index.contextStart, a);
    555         s.extractBetween(index.contextStart, index.start, b);
    556         s.extractBetween(index.start, s.length(), c);
    557         log.append(a).
    558             append((UChar)LEFT_BRACE).
    559             append(b).
    560             append((UChar)PIPE).
    561             append(c);
    562         if (s == DATA[i+1] && U_SUCCESS(status)) {
    563             logln(log);
    564         } else {
    565             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
    566         }
    567     }
    568 }
    569 
    570 void TransliteratorTest::TestArabic(void) {
    571 // Test disabled for 2.0 until new Arabic transliterator can be written.
    572 //    /*
    573 //    const char* DATA[] = {
    574 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
    575 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
    576 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
    577 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
    578 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
    579 //                  "\u062c\u0645\u064a\u0644\u0629",
    580 //    };
    581 //    */
    582 //
    583 //    UChar ar_raw[] = {
    584 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
    585 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
    586 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
    587 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
    588 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
    589 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
    590 //    };
    591 //    UnicodeString ar(ar_raw);
    592 //    UErrorCode status=U_ZERO_ERROR;
    593 //    UParseError parseError;
    594 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
    595 //    if (t == 0) {
    596 //        errln("FAIL: createInstance failed");
    597 //        return;
    598 //    }
    599 //    expect(*t, "Arabic", ar);
    600 //    delete t;
    601 }
    602 
    603 /**
    604  * Compose the Kana transliterator forward and reverse and try
    605  * some strings that should come out unchanged.
    606  */
    607 void TransliteratorTest::TestCompoundKana(void) {
    608     UParseError parseError;
    609     UErrorCode status = U_ZERO_ERROR;
    610     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
    611     if (t == 0) {
    612         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
    613     } else {
    614         expect(*t, "aaaaa", "aaaaa");
    615         delete t;
    616     }
    617 }
    618 
    619 /**
    620  * Compose the hex transliterators forward and reverse.
    621  */
    622 void TransliteratorTest::TestCompoundHex(void) {
    623     UParseError parseError;
    624     UErrorCode status = U_ZERO_ERROR;
    625     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
    626     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
    627     Transliterator* transab[] = { a, b };
    628     Transliterator* transba[] = { b, a };
    629     if (a == 0 || b == 0) {
    630         errln("FAIL: construction failed");
    631         delete a;
    632         delete b;
    633         return;
    634     }
    635     // Do some basic tests of a
    636     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
    637     // Do some basic tests of b
    638     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
    639 
    640     Transliterator* ab = new CompoundTransliterator(transab, 2);
    641     UnicodeString s("abcde", "");
    642     expect(*ab, s, s);
    643 
    644     UnicodeString str(s);
    645     a->transliterate(str);
    646     Transliterator* ba = new CompoundTransliterator(transba, 2);
    647     expect(*ba, str, str);
    648 
    649     delete ab;
    650     delete ba;
    651     delete a;
    652     delete b;
    653 }
    654 
    655 int gTestFilterClassID = 0;
    656 /**
    657  * Used by TestFiltering().
    658  */
    659 class TestFilter : public UnicodeFilter {
    660     virtual UnicodeFunctor* clone() const {
    661         return new TestFilter(*this);
    662     }
    663     virtual UBool contains(UChar32 c) const {
    664         return c != (UChar)0x0063 /*c*/;
    665     }
    666     // Stubs
    667     virtual UnicodeString& toPattern(UnicodeString& result,
    668                                      UBool /*escapeUnprintable*/) const {
    669         return result;
    670     }
    671     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
    672         return FALSE;
    673     }
    674     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
    675 public:
    676     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
    677 };
    678 
    679 /**
    680  * Do some basic tests of filtering.
    681  */
    682 void TransliteratorTest::TestFiltering(void) {
    683     UParseError parseError;
    684     UErrorCode status = U_ZERO_ERROR;
    685     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
    686     if (hex == 0) {
    687         errln("FAIL: createInstance(Any-Hex) failed");
    688         return;
    689     }
    690     hex->adoptFilter(new TestFilter());
    691     UnicodeString s("abcde");
    692     hex->transliterate(s);
    693     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
    694     if (s == exp) {
    695         logln(UnicodeString("Ok:   \"") + exp + "\"");
    696     } else {
    697         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
    698     }
    699 
    700     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
    701     UnicodeFilter *f = hex->orphanFilter();
    702     if (f == NULL){
    703         errln("FAIL: orphanFilter() should get a UnicodeFilter");
    704     } else {
    705         delete f;
    706     }
    707     delete hex;
    708 }
    709 
    710 /**
    711  * Test anchors
    712  */
    713 void TransliteratorTest::TestAnchors(void) {
    714     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
    715            "aaa",
    716            "012");
    717     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
    718            "aaa",
    719            "012");
    720     expect(UnicodeString("^ab  > 01 ;"
    721            " ab  > |8 ;"
    722            "  b  > k ;"
    723            " 8x$ > 45 ;"
    724            " 8x  > 77 ;", ""),
    725 
    726            "ababbabxabx",
    727            "018k7745");
    728     expect(UnicodeString("$s = [z$] ;"
    729            "$s{ab    > 01 ;"
    730            "   ab    > |8 ;"
    731            "    b    > k ;"
    732            "   8x}$s > 45 ;"
    733            "   8x    > 77 ;", ""),
    734 
    735            "abzababbabxzabxabx",
    736            "01z018k45z01x45");
    737 }
    738 
    739 /**
    740  * Test pattern quoting and escape mechanisms.
    741  */
    742 void TransliteratorTest::TestPatternQuoting(void) {
    743     // Array of 3n items
    744     // Each item is <rules>, <input>, <expected output>
    745     const UnicodeString DATA[] = {
    746         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
    747         UnicodeString(UChar(0x4E01)),
    748         "[male adult]"
    749     };
    750 
    751     for (int32_t i=0; i<3; i+=3) {
    752         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
    753         UParseError parseError;
    754         UErrorCode status = U_ZERO_ERROR;
    755         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
    756         if (U_FAILURE(status)) {
    757             errln("RBT constructor failed");
    758         } else {
    759             expect(*t, DATA[i+1], DATA[i+2]);
    760         }
    761         delete t;
    762     }
    763 }
    764 
    765 /**
    766  * Regression test for bugs found in Greek transliteration.
    767  */
    768 void TransliteratorTest::TestJ277(void) {
    769     UErrorCode status = U_ZERO_ERROR;
    770     UParseError parseError;
    771     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
    772     if (gl == NULL) {
    773         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
    774         return;
    775     }
    776 
    777     UChar sigma = 0x3C3;
    778     UChar upsilon = 0x3C5;
    779     UChar nu = 0x3BD;
    780 //    UChar PHI = 0x3A6;
    781     UChar alpha = 0x3B1;
    782 //    UChar omega = 0x3C9;
    783 //    UChar omicron = 0x3BF;
    784 //    UChar epsilon = 0x3B5;
    785 
    786     // sigma upsilon nu -> syn
    787     UnicodeString syn;
    788     syn.append(sigma).append(upsilon).append(nu);
    789     expect(*gl, syn, "syn");
    790 
    791     // sigma alpha upsilon nu -> saun
    792     UnicodeString sayn;
    793     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
    794     expect(*gl, sayn, "saun");
    795 
    796     // Again, using a smaller rule set
    797     UnicodeString rules(
    798                 "$alpha   = \\u03B1;"
    799                 "$nu      = \\u03BD;"
    800                 "$sigma   = \\u03C3;"
    801                 "$ypsilon = \\u03C5;"
    802                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
    803                 "s <>           $sigma;"
    804                 "a <>           $alpha;"
    805                 "u <>  $vowel { $ypsilon;"
    806                 "y <>           $ypsilon;"
    807                 "n <>           $nu;",
    808                 "");
    809     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
    810     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
    811     expect(*mini, syn, "syn");
    812     expect(*mini, sayn, "saun");
    813     delete mini;
    814     mini = NULL;
    815 
    816 #if !UCONFIG_NO_FORMATTING
    817     // Transliterate the Greek locale data
    818     Locale el("el");
    819     DateFormatSymbols syms(el, status);
    820     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
    821     int32_t i, count;
    822     const UnicodeString* data = syms.getMonths(count);
    823     for (i=0; i<count; ++i) {
    824         if (data[i].length() == 0) {
    825             continue;
    826         }
    827         UnicodeString out(data[i]);
    828         gl->transliterate(out);
    829         UBool ok = TRUE;
    830         if (data[i].length() >= 2 && out.length() >= 2 &&
    831             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
    832             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
    833                 ok = FALSE;
    834             }
    835         }
    836         if (ok) {
    837             logln(prettify(data[i] + " -> " + out));
    838         } else {
    839             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
    840         }
    841     }
    842 #endif
    843 
    844     delete gl;
    845 }
    846 
    847 /**
    848  * Prefix, suffix support in hex transliterators
    849  */
    850 void TransliteratorTest::TestJ243(void) {
    851     UErrorCode ec = U_ZERO_ERROR;
    852 
    853     // Test default Hex-Any, which should handle
    854     // \u, \U, u+, and U+
    855     Transliterator *hex =
    856         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
    857     if (assertSuccess("getInstance", ec)) {
    858         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
    859     }
    860     delete hex;
    861 
    862 //    // Try a custom Hex-Unicode
    863 //    // \uXXXX and &#xXXXX;
    864 //    ec = U_ZERO_ERROR;
    865 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
    866 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
    867 //           "abcd5fx012&#x00033;");
    868 //    // Try custom Any-Hex (default is tested elsewhere)
    869 //    ec = U_ZERO_ERROR;
    870 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
    871 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
    872 }
    873 
    874 /**
    875  * Parsers need better syntax error messages.
    876  */
    877 void TransliteratorTest::TestJ329(void) {
    878 
    879     struct { UBool containsErrors; const char* rule; } DATA[] = {
    880         { FALSE, "a > b; c > d" },
    881         { TRUE,  "a > b; no operator; c > d" },
    882     };
    883     int32_t DATA_length = UPRV_LENGTHOF(DATA);
    884 
    885     for (int32_t i=0; i<DATA_length; ++i) {
    886         UErrorCode status = U_ZERO_ERROR;
    887         UParseError parseError;
    888         Transliterator *rbt = Transliterator::createFromRules("<ID>",
    889                                     DATA[i].rule,
    890                                     UTRANS_FORWARD,
    891                                     parseError,
    892                                     status);
    893         UBool gotError = U_FAILURE(status);
    894         UnicodeString desc(DATA[i].rule);
    895         desc.append(gotError ? " -> error" : " -> no error");
    896         if (gotError) {
    897             desc = desc + ", ParseError code=" + u_errorName(status) +
    898                 " line=" + parseError.line +
    899                 " offset=" + parseError.offset +
    900                 " context=" + parseError.preContext;
    901         }
    902         if (gotError == DATA[i].containsErrors) {
    903             logln(UnicodeString("Ok:   ") + desc);
    904         } else {
    905             errln(UnicodeString("FAIL: ") + desc);
    906         }
    907         delete rbt;
    908     }
    909 }
    910 
    911 /**
    912  * Test segments and segment references.
    913  */
    914 void TransliteratorTest::TestSegments(void) {
    915     // Array of 3n items
    916     // Each item is <rules>, <input>, <expected output>
    917     UnicodeString DATA[] = {
    918         "([a-z]) '.' ([0-9]) > $2 '-' $1",
    919         "abc.123.xyz.456",
    920         "ab1-c23.xy4-z56",
    921 
    922         // nested
    923         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
    924         "a1 b2",
    925         "a1.a.1 b2.b.2",
    926     };
    927     int32_t DATA_length = UPRV_LENGTHOF(DATA);
    928 
    929     for (int32_t i=0; i<DATA_length; i+=3) {
    930         logln("Pattern: " + prettify(DATA[i]));
    931         UParseError parseError;
    932         UErrorCode status = U_ZERO_ERROR;
    933         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
    934         if (U_FAILURE(status)) {
    935             errln("FAIL: RBT constructor");
    936         } else {
    937             expect(*t, DATA[i+1], DATA[i+2]);
    938         }
    939         delete t;
    940     }
    941 }
    942 
    943 /**
    944  * Test cursor positioning outside of the key
    945  */
    946 void TransliteratorTest::TestCursorOffset(void) {
    947     // Array of 3n items
    948     // Each item is <rules>, <input>, <expected output>
    949     UnicodeString DATA[] = {
    950         "pre {alpha} post > | @ ALPHA ;"
    951         "eALPHA > beta ;"
    952         "pre {beta} post > BETA @@ | ;"
    953         "post > xyz",
    954 
    955         "prealphapost prebetapost",
    956 
    957         "prbetaxyz preBETApost",
    958     };
    959     int32_t DATA_length = UPRV_LENGTHOF(DATA);
    960 
    961     for (int32_t i=0; i<DATA_length; i+=3) {
    962         logln("Pattern: " + prettify(DATA[i]));
    963         UParseError parseError;
    964         UErrorCode status = U_ZERO_ERROR;
    965         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
    966         if (U_FAILURE(status)) {
    967             errln("FAIL: RBT constructor");
    968         } else {
    969             expect(*t, DATA[i+1], DATA[i+2]);
    970         }
    971         delete t;
    972     }
    973 }
    974 
    975 /**
    976  * Test zero length and > 1 char length variable values.  Test
    977  * use of variable refs in UnicodeSets.
    978  */
    979 void TransliteratorTest::TestArbitraryVariableValues(void) {
    980     // Array of 3n items
    981     // Each item is <rules>, <input>, <expected output>
    982     UnicodeString DATA[] = {
    983         "$abe = ab;"
    984         "$pat = x[yY]z;"
    985         "$ll  = 'a-z';"
    986         "$llZ = [$ll];"
    987         "$llY = [$ll$pat];"
    988         "$emp = ;"
    989 
    990         "$abe > ABE;"
    991         "$pat > END;"
    992         "$llZ > 1;"
    993         "$llY > 2;"
    994         "7$emp 8 > 9;"
    995         "",
    996 
    997         "ab xYzxyz stY78",
    998         "ABE ENDEND 1129",
    999     };
   1000     int32_t DATA_length = UPRV_LENGTHOF(DATA);
   1001 
   1002     for (int32_t i=0; i<DATA_length; i+=3) {
   1003         logln("Pattern: " + prettify(DATA[i]));
   1004         UParseError parseError;
   1005         UErrorCode status = U_ZERO_ERROR;
   1006         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
   1007         if (U_FAILURE(status)) {
   1008             errln("FAIL: RBT constructor");
   1009         } else {
   1010             expect(*t, DATA[i+1], DATA[i+2]);
   1011         }
   1012         delete t;
   1013     }
   1014 }
   1015 
   1016 /**
   1017  * Confirm that the contextStart, contextLimit, start, and limit
   1018  * behave correctly. J474.
   1019  */
   1020 void TransliteratorTest::TestPositionHandling(void) {
   1021     // Array of 3n items
   1022     // Each item is <rules>, <input>, <expected output>
   1023     const char* DATA[] = {
   1024         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
   1025         "xtat txtb", // pos 0,9,0,9
   1026         "xTTaSS TTxUUb",
   1027 
   1028         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
   1029         "xtat txtb", // pos 2,9,3,8
   1030         "xtaSS TTxUUb",
   1031 
   1032         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
   1033         "xtat txtb", // pos 3,8,3,8
   1034         "xtaTT TTxTTb",
   1035     };
   1036 
   1037     // Array of 4n positions -- these go with the DATA array
   1038     // They are: contextStart, contextLimit, start, limit
   1039     int32_t POS[] = {
   1040         0, 9, 0, 9,
   1041         2, 9, 3, 8,
   1042         3, 8, 3, 8,
   1043     };
   1044 
   1045     int32_t n = UPRV_LENGTHOF(DATA) / 3;
   1046     for (int32_t i=0; i<n; i++) {
   1047         UErrorCode status = U_ZERO_ERROR;
   1048         UParseError parseError;
   1049         Transliterator *t = Transliterator::createFromRules("<ID>",
   1050                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
   1051         if (U_FAILURE(status)) {
   1052             delete t;
   1053             errln("FAIL: RBT constructor");
   1054             return;
   1055         }
   1056         UTransPosition pos;
   1057         pos.contextStart= POS[4*i];
   1058         pos.contextLimit = POS[4*i+1];
   1059         pos.start = POS[4*i+2];
   1060         pos.limit = POS[4*i+3];
   1061         UnicodeString rsource(DATA[3*i+1]);
   1062         t->transliterate(rsource, pos, status);
   1063         if (U_FAILURE(status)) {
   1064             delete t;
   1065             errln("FAIL: transliterate");
   1066             return;
   1067         }
   1068         t->finishTransliteration(rsource, pos);
   1069         expectAux(DATA[3*i],
   1070                   DATA[3*i+1],
   1071                   rsource,
   1072                   DATA[3*i+2]);
   1073         delete t;
   1074     }
   1075 }
   1076 
   1077 /**
   1078  * Test the Hiragana-Katakana transliterator.
   1079  */
   1080 void TransliteratorTest::TestHiraganaKatakana(void) {
   1081     UParseError parseError;
   1082     UErrorCode status = U_ZERO_ERROR;
   1083     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
   1084     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
   1085     if (hk == 0 || kh == 0) {
   1086         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   1087         delete hk;
   1088         delete kh;
   1089         return;
   1090     }
   1091 
   1092     // Array of 3n items
   1093     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
   1094     const char* DATA[] = {
   1095         "both",
   1096         "\\u3042\\u3090\\u3099\\u3092\\u3050",
   1097         "\\u30A2\\u30F8\\u30F2\\u30B0",
   1098 
   1099         "kh",
   1100         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
   1101         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
   1102     };
   1103     int32_t DATA_length = UPRV_LENGTHOF(DATA);
   1104 
   1105     for (int32_t i=0; i<DATA_length; i+=3) {
   1106         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
   1107         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
   1108         switch (*DATA[i]) {
   1109         case 0x68: //'h': // Hiragana-Katakana
   1110             expect(*hk, h, k);
   1111             break;
   1112         case 0x6B: //'k': // Katakana-Hiragana
   1113             expect(*kh, k, h);
   1114             break;
   1115         case 0x62: //'b': // both
   1116             expect(*hk, h, k);
   1117             expect(*kh, k, h);
   1118             break;
   1119         }
   1120     }
   1121     delete hk;
   1122     delete kh;
   1123 }
   1124 
   1125 /**
   1126  * Test cloning / copy constructor of RBT.
   1127  */
   1128 void TransliteratorTest::TestCopyJ476(void) {
   1129     // The real test here is what happens when the destructors are
   1130     // called.  So we let one object get destructed, and check to
   1131     // see that its copy still works.
   1132     Transliterator *t2 = 0;
   1133     {
   1134         UParseError parseError;
   1135         UErrorCode status = U_ZERO_ERROR;
   1136         Transliterator *t1 = Transliterator::createFromRules("t1",
   1137             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
   1138         if (U_FAILURE(status)) {
   1139             errln("FAIL: RBT constructor");
   1140             return;
   1141         }
   1142         t2 = t1->clone(); // Call copy constructor under the covers.
   1143         expect(*t1, "abcfoofoo", "ABcbar");
   1144         delete t1;
   1145     }
   1146     expect(*t2, "abcfoofoo", "ABcbar");
   1147     delete t2;
   1148 }
   1149 
   1150 /**
   1151  * Test inter-Indic transliterators.  These are composed.
   1152  * ICU4C Jitterbug 483.
   1153  */
   1154 void TransliteratorTest::TestInterIndic(void) {
   1155     UnicodeString ID("Devanagari-Gujarati", "");
   1156     UErrorCode status = U_ZERO_ERROR;
   1157     UParseError parseError;
   1158     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
   1159     if (dg == 0) {
   1160         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
   1161         return;
   1162     }
   1163     UnicodeString id = dg->getID();
   1164     if (id != ID) {
   1165         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
   1166     }
   1167     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
   1168     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
   1169     expect(*dg, dev, guj);
   1170     delete dg;
   1171 }
   1172 
   1173 /**
   1174  * Test filter syntax in IDs. (J918)
   1175  */
   1176 void TransliteratorTest::TestFilterIDs(void) {
   1177     // Array of 3n strings:
   1178     // <id>, <inverse id>, <input>, <expected output>
   1179     const char* DATA[] = {
   1180         "[aeiou]Any-Hex", // ID
   1181         "[aeiou]Hex-Any", // expected inverse ID
   1182         "quizzical",      // src
   1183         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
   1184 
   1185         "[aeiou]Any-Hex;[^5]Hex-Any",
   1186         "[^5]Any-Hex;[aeiou]Hex-Any",
   1187         "quizzical",
   1188         "q\\u0075izzical",
   1189 
   1190         "[abc]Null",
   1191         "[abc]Null",
   1192         "xyz",
   1193         "xyz",
   1194     };
   1195     enum { DATA_length = UPRV_LENGTHOF(DATA) };
   1196 
   1197     for (int i=0; i<DATA_length; i+=4) {
   1198         UnicodeString ID(DATA[i], "");
   1199         UnicodeString uID(DATA[i+1], "");
   1200         UnicodeString data2(DATA[i+2], "");
   1201         UnicodeString data3(DATA[i+3], "");
   1202         UParseError parseError;
   1203         UErrorCode status = U_ZERO_ERROR;
   1204         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
   1205         if (t == 0) {
   1206             errln("FAIL: createInstance(" + ID + ") returned NULL");
   1207             return;
   1208         }
   1209         expect(*t, data2, data3);
   1210 
   1211         // Check the ID
   1212         if (ID != t->getID()) {
   1213             errln("FAIL: createInstance(" + ID + ").getID() => " +
   1214                   t->getID());
   1215         }
   1216 
   1217         // Check the inverse
   1218         Transliterator *u = t->createInverse(status);
   1219         if (u == 0) {
   1220             errln("FAIL: " + ID + ".createInverse() returned NULL");
   1221         } else if (u->getID() != uID) {
   1222             errln("FAIL: " + ID + ".createInverse().getID() => " +
   1223                   u->getID() + ", expected " + uID);
   1224         }
   1225 
   1226         delete t;
   1227         delete u;
   1228     }
   1229 }
   1230 
   1231 /**
   1232  * Test the case mapping transliterators.
   1233  */
   1234 void TransliteratorTest::TestCaseMap(void) {
   1235     UParseError parseError;
   1236     UErrorCode status = U_ZERO_ERROR;
   1237     Transliterator* toUpper =
   1238         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
   1239     Transliterator* toLower =
   1240         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
   1241     Transliterator* toTitle =
   1242         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
   1243     if (toUpper==0 || toLower==0 || toTitle==0) {
   1244         errln("FAIL: createInstance returned NULL");
   1245         delete toUpper;
   1246         delete toLower;
   1247         delete toTitle;
   1248         return;
   1249     }
   1250 
   1251     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
   1252            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
   1253     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
   1254            "the quick brown foX jumped over the lazY dogs.");
   1255     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
   1256            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
   1257 
   1258     delete toUpper;
   1259     delete toLower;
   1260     delete toTitle;
   1261 }
   1262 
   1263 /**
   1264  * Test the name mapping transliterators.
   1265  */
   1266 void TransliteratorTest::TestNameMap(void) {
   1267     UParseError parseError;
   1268     UErrorCode status = U_ZERO_ERROR;
   1269     Transliterator* uni2name =
   1270         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
   1271     Transliterator* name2uni =
   1272         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
   1273     if (uni2name==0 || name2uni==0) {
   1274         errln("FAIL: createInstance returned NULL");
   1275         delete uni2name;
   1276         delete name2uni;
   1277         return;
   1278     }
   1279 
   1280     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
   1281     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
   1282            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
   1283     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
   1284            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
   1285 
   1286     delete uni2name;
   1287     delete name2uni;
   1288 
   1289     // round trip
   1290     Transliterator* t =
   1291         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
   1292     if (t==0) {
   1293         errln("FAIL: createInstance returned NULL");
   1294         delete t;
   1295         return;
   1296     }
   1297 
   1298     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
   1299     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
   1300     expect(*t, s, s);
   1301     delete t;
   1302 }
   1303 
   1304 /**
   1305  * Test liberalized ID syntax.  1006c
   1306  */
   1307 void TransliteratorTest::TestLiberalizedID(void) {
   1308     // Some test cases have an expected getID() value of NULL.  This
   1309     // means I have disabled the test case for now.  This stuff is
   1310     // still under development, and I haven't decided whether to make
   1311     // getID() return canonical case yet.  It will all get rewritten
   1312     // with the move to Source-Target/Variant IDs anyway. [aliu]
   1313     const char* DATA[] = {
   1314         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
   1315         "  Null  ", "Null", "whitespace",
   1316         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
   1317         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
   1318     };
   1319     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
   1320     UParseError parseError;
   1321     UErrorCode status= U_ZERO_ERROR;
   1322     for (int32_t i=0; i<DATA_length; i+=3) {
   1323         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
   1324         if (t == 0) {
   1325             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
   1326                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
   1327         } else {
   1328             UnicodeString exp;
   1329             if (DATA[i+1]) {
   1330                 exp = UnicodeString(DATA[i+1], "");
   1331             }
   1332             // Don't worry about getID() if the expected char*
   1333             // is NULL -- see above.
   1334             if (exp.length() == 0 || exp == t->getID()) {
   1335                 logln(UnicodeString("Ok: ") + DATA[i+2] +
   1336                       " create ID \"" + DATA[i] + "\" => \"" +
   1337                       exp + "\"");
   1338             } else {
   1339                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
   1340                       " create ID \"" + DATA[i] + "\" => \"" +
   1341                       t->getID() + "\", exp \"" + exp + "\"");
   1342             }
   1343             delete t;
   1344         }
   1345     }
   1346 }
   1347 
   1348 /* test for Jitterbug 912 */
   1349 void TransliteratorTest::TestCreateInstance(){
   1350     const char* FORWARD = "F";
   1351     const char* REVERSE = "R";
   1352     const char* DATA[] = {
   1353         // Column 1: id
   1354         // Column 2: direction
   1355         // Column 3: expected ID, or "" if expect failure
   1356         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
   1357 
   1358         // JB#2689: bad compound causes crash
   1359         "InvalidSource-InvalidTarget", FORWARD, "",
   1360         "InvalidSource-InvalidTarget", REVERSE, "",
   1361         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
   1362         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
   1363         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
   1364         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
   1365 
   1366         NULL
   1367     };
   1368 
   1369     for (int32_t i=0; DATA[i]; i+=3) {
   1370         UParseError err;
   1371         UErrorCode ec = U_ZERO_ERROR;
   1372         UnicodeString id(DATA[i]);
   1373         UTransDirection dir = (DATA[i+1]==FORWARD)?
   1374             UTRANS_FORWARD:UTRANS_REVERSE;
   1375         UnicodeString expID(DATA[i+2]);
   1376         Transliterator* t =
   1377             Transliterator::createInstance(id,dir,err,ec);
   1378         UnicodeString newID;
   1379         if (t) {
   1380             newID = t->getID();
   1381         }
   1382         UBool ok = (newID == expID);
   1383         if (!t) {
   1384             newID = u_errorName(ec);
   1385         }
   1386         if (ok) {
   1387             logln((UnicodeString)"Ok: createInstance(" +
   1388                   id + "," + DATA[i+1] + ") => " + newID);
   1389         } else {
   1390             dataerrln((UnicodeString)"FAIL: createInstance(" +
   1391                   id + "," + DATA[i+1] + ") => " + newID +
   1392                   ", expected " + expID);
   1393         }
   1394         delete t;
   1395     }
   1396 }
   1397 
   1398 /**
   1399  * Test the normalization transliterator.
   1400  */
   1401 void TransliteratorTest::TestNormalizationTransliterator() {
   1402     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
   1403     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
   1404     const char* CANON[] = {
   1405         // Input               Decomposed            Composed
   1406         "cat",                "cat",                "cat"               ,
   1407         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
   1408 
   1409         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
   1410         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
   1411 
   1412         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
   1413         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
   1414         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
   1415 
   1416         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
   1417         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
   1418 
   1419         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
   1420         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
   1421         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
   1422 
   1423         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
   1424         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
   1425 
   1426         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
   1427         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
   1428 
   1429         "Henry IV",           "Henry IV",           "Henry IV"          ,
   1430         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
   1431 
   1432         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
   1433         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
   1434         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
   1435         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
   1436         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
   1437 
   1438         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
   1439         0 // end
   1440     };
   1441 
   1442     const char* COMPAT[] = {
   1443         // Input               Decomposed            Composed
   1444         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
   1445 
   1446         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
   1447         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
   1448 
   1449         "Henry IV",           "Henry IV",           "Henry IV"          ,
   1450         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
   1451 
   1452         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
   1453         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
   1454 
   1455         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
   1456         0 // end
   1457     };
   1458 
   1459     int32_t i;
   1460     UParseError parseError;
   1461     UErrorCode status = U_ZERO_ERROR;
   1462     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
   1463     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
   1464     if (!NFD || !NFC) {
   1465         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
   1466         delete NFD;
   1467         delete NFC;
   1468         return;
   1469     }
   1470     for (i=0; CANON[i]; i+=3) {
   1471         UnicodeString in = CharsToUnicodeString(CANON[i]);
   1472         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
   1473         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
   1474         expect(*NFD, in, expd);
   1475         expect(*NFC, in, expc);
   1476     }
   1477     delete NFD;
   1478     delete NFC;
   1479 
   1480     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
   1481     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
   1482     if (!NFKD || !NFKC) {
   1483         dataerrln("FAIL: createInstance failed");
   1484         delete NFKD;
   1485         delete NFKC;
   1486         return;
   1487     }
   1488     for (i=0; COMPAT[i]; i+=3) {
   1489         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
   1490         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
   1491         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
   1492         expect(*NFKD, in, expkd);
   1493         expect(*NFKC, in, expkc);
   1494     }
   1495     delete NFKD;
   1496     delete NFKC;
   1497 
   1498     UParseError pe;
   1499     status = U_ZERO_ERROR;
   1500     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
   1501                                                        UTRANS_FORWARD,
   1502                                                        pe, status);
   1503     if (t == 0) {
   1504         errln("FAIL: createInstance failed");
   1505     }
   1506     expect(*t, CharsToUnicodeString("\\u010dx"),
   1507            CharsToUnicodeString("c\\u030C"));
   1508     delete t;
   1509 }
   1510 
   1511 /**
   1512  * Test compound RBT rules.
   1513  */
   1514 void TransliteratorTest::TestCompoundRBT(void) {
   1515     // Careful with spacing and ';' here:  Phrase this exactly
   1516     // as toRules() is going to return it.  If toRules() changes
   1517     // with regard to spacing or ';', then adjust this string.
   1518     UnicodeString rule("::Hex-Any;\n"
   1519                        "::Any-Lower;\n"
   1520                        "a > '.A.';\n"
   1521                        "b > '.B.';\n"
   1522                        "::[^t]Any-Upper;", "");
   1523     UParseError parseError;
   1524     UErrorCode status = U_ZERO_ERROR;
   1525     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
   1526     if (t == 0) {
   1527         errln("FAIL: createFromRules failed");
   1528         return;
   1529     }
   1530     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
   1531            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
   1532     UnicodeString r;
   1533     t->toRules(r, TRUE);
   1534     if (r == rule) {
   1535         logln((UnicodeString)"OK: toRules() => " + r);
   1536     } else {
   1537         errln((UnicodeString)"FAIL: toRules() => " + r +
   1538               ", expected " + rule);
   1539     }
   1540     delete t;
   1541 
   1542     // Now test toRules
   1543     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
   1544     if (t == 0) {
   1545         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   1546         return;
   1547     }
   1548     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
   1549     t->toRules(r, TRUE);
   1550     if (r != exp) {
   1551         errln((UnicodeString)"FAIL: toRules() => " + r +
   1552               ", expected " + exp);
   1553     } else {
   1554         logln((UnicodeString)"OK: toRules() => " + r);
   1555     }
   1556     delete t;
   1557 
   1558     // Round trip the result of toRules
   1559     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
   1560     if (t == 0) {
   1561         errln("FAIL: createFromRules #2 failed");
   1562         return;
   1563     } else {
   1564         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
   1565     }
   1566 
   1567     // Test toRules again
   1568     t->toRules(r, TRUE);
   1569     if (r != exp) {
   1570         errln((UnicodeString)"FAIL: toRules() => " + r +
   1571               ", expected " + exp);
   1572     } else {
   1573         logln((UnicodeString)"OK: toRules() => " + r);
   1574     }
   1575 
   1576     delete t;
   1577 
   1578     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
   1579     // to what the regenerated ID will look like.
   1580     UnicodeString id("Upper(Lower);(NFKC)", "");
   1581     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
   1582     if (t == 0) {
   1583         errln("FAIL: createInstance #2 failed");
   1584         return;
   1585     }
   1586     if (t->getID() == id) {
   1587         logln((UnicodeString)"OK: created " + id);
   1588     } else {
   1589         errln((UnicodeString)"FAIL: createInstance(" + id +
   1590               ").getID() => " + t->getID());
   1591     }
   1592 
   1593     Transliterator *u = t->createInverse(status);
   1594     if (u == 0) {
   1595         errln("FAIL: createInverse failed");
   1596         delete t;
   1597         return;
   1598     }
   1599     exp = "NFKC();Lower(Upper)";
   1600     if (u->getID() == exp) {
   1601         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
   1602               u->getID());
   1603     } else {
   1604         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
   1605               u->getID());
   1606     }
   1607     delete t;
   1608     delete u;
   1609 }
   1610 
   1611 /**
   1612  * Compound filter semantics were orginially not implemented
   1613  * correctly.  Originally, each component filter f(i) is replaced by
   1614  * f'(i) = f(i) && g, where g is the filter for the compound
   1615  * transliterator.
   1616  *
   1617  * From Mark:
   1618  *
   1619  * Suppose and I have a transliterator X. Internally X is
   1620  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
   1621  *
   1622  * The compound should convert all greek characters (through latin) to
   1623  * cyrillic, then lowercase the result. The filter should say "don't
   1624  * touch 'A' in the original". But because an intermediate result
   1625  * happens to go through "A", the Greek Alpha gets hung up.
   1626  */
   1627 void TransliteratorTest::TestCompoundFilter(void) {
   1628     UParseError parseError;
   1629     UErrorCode status = U_ZERO_ERROR;
   1630     Transliterator *t = Transliterator::createInstance
   1631         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
   1632     if (t == 0) {
   1633         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   1634         return;
   1635     }
   1636     t->adoptFilter(new UnicodeSet("[^A]", status));
   1637     if (U_FAILURE(status)) {
   1638         errln("FAIL: UnicodeSet ct failed");
   1639         delete t;
   1640         return;
   1641     }
   1642 
   1643     // Only the 'A' at index 1 should remain unchanged
   1644     expect(*t,
   1645            CharsToUnicodeString("BA\\u039A\\u0391"),
   1646            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
   1647     delete t;
   1648 }
   1649 
   1650 void TransliteratorTest::TestRemove(void) {
   1651     UParseError parseError;
   1652     UErrorCode status = U_ZERO_ERROR;
   1653     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
   1654     if (t == 0) {
   1655         errln("FAIL: createInstance failed");
   1656         return;
   1657     }
   1658 
   1659     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
   1660 
   1661     // extra test for RemoveTransliterator::clone(), which at one point wasn't
   1662     // duplicating the filter
   1663     Transliterator* t2 = t->clone();
   1664     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
   1665 
   1666     delete t;
   1667     delete t2;
   1668 }
   1669 
   1670 void TransliteratorTest::TestToRules(void) {
   1671     const char* RBT = "rbt";
   1672     const char* SET = "set";
   1673     static const char* DATA[] = {
   1674         RBT,
   1675         "$a=\\u4E61; [$a] > A;",
   1676         "[\\u4E61] > A;",
   1677 
   1678         RBT,
   1679         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
   1680         "[[:Zs:][:Zl:]]{a} > A;",
   1681 
   1682         SET,
   1683         "[[:Zs:][:Zl:]]",
   1684         "[[:Zs:][:Zl:]]",
   1685 
   1686         SET,
   1687         "[:Ps:]",
   1688         "[:Ps:]",
   1689 
   1690         SET,
   1691         "[:L:]",
   1692         "[:L:]",
   1693 
   1694         SET,
   1695         "[[:L:]-[A]]",
   1696         "[[:L:]-[A]]",
   1697 
   1698         SET,
   1699         "[~[:Lu:][:Ll:]]",
   1700         "[~[:Lu:][:Ll:]]",
   1701 
   1702         SET,
   1703         "[~[a-z]]",
   1704         "[~[a-z]]",
   1705 
   1706         RBT,
   1707         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
   1708         "[^[:Zs:]]{a} > A;",
   1709 
   1710         RBT,
   1711         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
   1712         "[[a-z]-[:Zs:]]{a} > A;",
   1713 
   1714         RBT,
   1715         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
   1716         "[[:Zs:]&[a-z]]{a} > A;",
   1717 
   1718         RBT,
   1719         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
   1720         "[x[:Zs:]]{a} > A;",
   1721 
   1722         RBT,
   1723         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
   1724         "$macron = \\u0304 ;"
   1725         "$evowel = [aeiouyAEIOUY] ;"
   1726         "$iotasub = \\u0345 ;"
   1727         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
   1728         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
   1729 
   1730         RBT,
   1731         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
   1732         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
   1733     };
   1734     static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
   1735 
   1736     for (int32_t d=0; d < DATA_length; d+=3) {
   1737         if (DATA[d] == RBT) {
   1738             // Transliterator test
   1739             UParseError parseError;
   1740             UErrorCode status = U_ZERO_ERROR;
   1741             Transliterator *t = Transliterator::createFromRules("ID",
   1742                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
   1743             if (t == 0) {
   1744                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
   1745                 return;
   1746             }
   1747             UnicodeString rules, escapedRules;
   1748             t->toRules(rules, FALSE);
   1749             t->toRules(escapedRules, TRUE);
   1750             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
   1751             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
   1752             if (rules == expRules) {
   1753                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1754                       " => " + rules);
   1755             } else {
   1756                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1757                       " => " + rules + ", exp " + expRules);
   1758             }
   1759             if (escapedRules == expEscapedRules) {
   1760                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1761                       " => " + escapedRules);
   1762             } else {
   1763                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1764                       " => " + escapedRules + ", exp " + expEscapedRules);
   1765             }
   1766             delete t;
   1767 
   1768         } else {
   1769             // UnicodeSet test
   1770             UErrorCode status = U_ZERO_ERROR;
   1771             UnicodeString pat(DATA[d+1], -1, US_INV);
   1772             UnicodeString expToPat(DATA[d+2], -1, US_INV);
   1773             UnicodeSet set(pat, status);
   1774             if (U_FAILURE(status)) {
   1775                 errln("FAIL: UnicodeSet ct failed");
   1776                 return;
   1777             }
   1778             // Adjust spacing etc. as necessary.
   1779             UnicodeString toPat;
   1780             set.toPattern(toPat);
   1781             if (expToPat == toPat) {
   1782                 logln((UnicodeString)"Ok: " + pat +
   1783                       " => " + toPat);
   1784             } else {
   1785                 errln((UnicodeString)"FAIL: " + pat +
   1786                       " => " + prettify(toPat, TRUE) +
   1787                       ", exp " + prettify(pat, TRUE));
   1788             }
   1789         }
   1790     }
   1791 }
   1792 
   1793 void TransliteratorTest::TestContext() {
   1794     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
   1795     expect("de > x; {d}e > y;",
   1796            "de",
   1797            "ye",
   1798            &pos);
   1799 
   1800     expect("ab{c} > z;",
   1801            "xadabdabcy",
   1802            "xadabdabzy");
   1803 }
   1804 
   1805 void TransliteratorTest::TestSupplemental() {
   1806 
   1807     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
   1808                                 "a > $a; $s > i;"),
   1809            CharsToUnicodeString("ab\\U0001030Fx"),
   1810            CharsToUnicodeString("\\U00010300bix"));
   1811 
   1812     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
   1813                                 "$b=[A-Z\\U00010400-\\U0001044D];"
   1814                                 "($a)($b) > $2 $1;"),
   1815            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
   1816            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
   1817 
   1818     // k|ax\\U00010300xm
   1819 
   1820     // k|a\\U00010400\\U00010300xm
   1821     // ky|\\U00010400\\U00010300xm
   1822     // ky\\U00010400|\\U00010300xm
   1823 
   1824     // ky\\U00010400|\\U00010300\\U00010400m
   1825     // ky\\U00010400y|\\U00010400m
   1826     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
   1827                                 "$a {x} > | @ \\U00010400;"
   1828                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
   1829            CharsToUnicodeString("kax\\U00010300xm"),
   1830            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
   1831 
   1832     expectT("Any-Name",
   1833            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
   1834            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
   1835 
   1836     expectT("Any-Hex/Unicode",
   1837            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1838            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
   1839 
   1840     expectT("Any-Hex/C",
   1841            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1842            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
   1843 
   1844     expectT("Any-Hex/Perl",
   1845            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1846            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
   1847 
   1848     expectT("Any-Hex/Java",
   1849            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1850            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
   1851 
   1852     expectT("Any-Hex/XML",
   1853            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1854            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
   1855 
   1856     expectT("Any-Hex/XML10",
   1857            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1858            "&#66352;&#1113856;&#917601;&#160;");
   1859 
   1860     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
   1861            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1862            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
   1863 }
   1864 
   1865 void TransliteratorTest::TestQuantifier() {
   1866 
   1867     // Make sure @ in a quantified anteContext works
   1868     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
   1869            "AAAAAb",
   1870            "aaa(aac)");
   1871 
   1872     // Make sure @ in a quantified postContext works
   1873     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
   1874            "baaaaa",
   1875            "caa(aaa)");
   1876 
   1877     // Make sure @ in a quantified postContext with seg ref works
   1878     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
   1879            "baaaaa",
   1880            "baa(aaa)");
   1881 
   1882     // Make sure @ past ante context doesn't enter ante context
   1883     UTransPosition pos = {0, 5, 3, 5};
   1884     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
   1885            "xxxab",
   1886            "xxx(ac)",
   1887            &pos);
   1888 
   1889     // Make sure @ past post context doesn't pass limit
   1890     UTransPosition pos2 = {0, 4, 0, 2};
   1891     expect("{b} a+ > c @@ |; x > y; a > A;",
   1892            "baxx",
   1893            "caxx",
   1894            &pos2);
   1895 
   1896     // Make sure @ past post context doesn't enter post context
   1897     expect("{b} a+ > c @@ |; x > y; a > A;",
   1898            "baxx",
   1899            "cayy");
   1900 
   1901     expect("(ab)? c > d;",
   1902            "c abc ababc",
   1903            "d d abd");
   1904 
   1905     // NOTE: The (ab)+ when referenced just yields a single "ab",
   1906     // not the full sequence of them.  This accords with perl behavior.
   1907     expect("(ab)+ {x} > '(' $1 ')';",
   1908            "x abx ababxy",
   1909            "x ab(ab) abab(ab)y");
   1910 
   1911     expect("b+ > x;",
   1912            "ac abc abbc abbbc",
   1913            "ac axc axc axc");
   1914 
   1915     expect("[abc]+ > x;",
   1916            "qac abrc abbcs abtbbc",
   1917            "qx xrx xs xtx");
   1918 
   1919     expect("q{(ab)+} > x;",
   1920            "qa qab qaba qababc qaba",
   1921            "qa qx qxa qxc qxa");
   1922 
   1923     expect("q(ab)* > x;",
   1924            "qa qab qaba qababc",
   1925            "xa x xa xc");
   1926 
   1927     // NOTE: The (ab)+ when referenced just yields a single "ab",
   1928     // not the full sequence of them.  This accords with perl behavior.
   1929     expect("q(ab)* > '(' $1 ')';",
   1930            "qa qab qaba qababc",
   1931            "()a (ab) (ab)a (ab)c");
   1932 
   1933     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
   1934     // quoted string
   1935     expect("'ab'+ > x;",
   1936            "bb ab ababb",
   1937            "bb x xb");
   1938 
   1939     // $foo+ and $foo* -- the quantifier should apply to the entire
   1940     // variable reference
   1941     expect("$var = ab; $var+ > x;",
   1942            "bb ab ababb",
   1943            "bb x xb");
   1944 }
   1945 
   1946 class TestTrans : public Transliterator {
   1947 public:
   1948     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
   1949     }
   1950     virtual Transliterator* clone(void) const {
   1951         return new TestTrans(getID());
   1952     }
   1953     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
   1954         UBool /*isIncremental*/) const
   1955     {
   1956         offsets.start = offsets.limit;
   1957     }
   1958     virtual UClassID getDynamicClassID() const;
   1959     static UClassID U_EXPORT2 getStaticClassID();
   1960 };
   1961 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
   1962 
   1963 /**
   1964  * Test Source-Target/Variant.
   1965  */
   1966 void TransliteratorTest::TestSTV(void) {
   1967     int32_t ns = Transliterator::countAvailableSources();
   1968     if (ns < 0 || ns > 255) {
   1969         errln((UnicodeString)"FAIL: Bad source count: " + ns);
   1970         return;
   1971     }
   1972     int32_t i, j;
   1973     for (i=0; i<ns; ++i) {
   1974         UnicodeString source;
   1975         Transliterator::getAvailableSource(i, source);
   1976         logln((UnicodeString)"" + i + ": " + source);
   1977         if (source.length() == 0) {
   1978             errln("FAIL: empty source");
   1979             continue;
   1980         }
   1981         int32_t nt = Transliterator::countAvailableTargets(source);
   1982         if (nt < 0 || nt > 255) {
   1983             errln((UnicodeString)"FAIL: Bad target count: " + nt);
   1984             continue;
   1985         }
   1986         for (int32_t j=0; j<nt; ++j) {
   1987             UnicodeString target;
   1988             Transliterator::getAvailableTarget(j, source, target);
   1989             logln((UnicodeString)" " + j + ": " + target);
   1990             if (target.length() == 0) {
   1991                 errln("FAIL: empty target");
   1992                 continue;
   1993             }
   1994             int32_t nv = Transliterator::countAvailableVariants(source, target);
   1995             if (nv < 0 || nv > 255) {
   1996                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
   1997                 continue;
   1998             }
   1999             for (int32_t k=0; k<nv; ++k) {
   2000                 UnicodeString variant;
   2001                 Transliterator::getAvailableVariant(k, source, target, variant);
   2002                 if (variant.length() == 0) {
   2003                     logln((UnicodeString)"  " + k + ": <empty>");
   2004                 } else {
   2005                     logln((UnicodeString)"  " + k + ": " + variant);
   2006                 }
   2007             }
   2008         }
   2009     }
   2010 
   2011     // Test registration
   2012     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
   2013     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
   2014     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
   2015     for (i=0; i<3; ++i) {
   2016         Transliterator *t = new TestTrans(IDS[i]);
   2017         if (t == 0) {
   2018             errln("FAIL: out of memory");
   2019             return;
   2020         }
   2021         if (t->getID() != IDS[i]) {
   2022             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
   2023             delete t;
   2024             return;
   2025         }
   2026         Transliterator::registerInstance(t);
   2027         UErrorCode status = U_ZERO_ERROR;
   2028         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
   2029         if (t == NULL) {
   2030             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
   2031                   IDS[i]);
   2032         } else {
   2033             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
   2034                   IDS[i]);
   2035             delete t;
   2036         }
   2037         Transliterator::unregister(IDS[i]);
   2038         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
   2039         if (t != NULL) {
   2040             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
   2041                   IDS[i]);
   2042             delete t;
   2043         }
   2044     }
   2045 
   2046     // Make sure getAvailable API reflects removal
   2047     int32_t n = Transliterator::countAvailableIDs();
   2048     for (i=0; i<n; ++i) {
   2049         UnicodeString id = Transliterator::getAvailableID(i);
   2050         for (j=0; j<3; ++j) {
   2051             if (id.caseCompare(FULL_IDS[j],0)==0) {
   2052                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
   2053             }
   2054         }
   2055     }
   2056     n = Transliterator::countAvailableTargets("Any");
   2057     for (i=0; i<n; ++i) {
   2058         UnicodeString t;
   2059         Transliterator::getAvailableTarget(i, "Any", t);
   2060         if (t.caseCompare(IDS[0],0)==0) {
   2061             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
   2062         }
   2063     }
   2064     n = Transliterator::countAvailableSources();
   2065     for (i=0; i<n; ++i) {
   2066         UnicodeString s;
   2067         Transliterator::getAvailableSource(i, s);
   2068         for (j=0; j<3; ++j) {
   2069             if (SOURCES[j] == NULL) continue;
   2070             if (s.caseCompare(SOURCES[j],0)==0) {
   2071                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
   2072             }
   2073         }
   2074     }
   2075 }
   2076 
   2077 /**
   2078  * Test inverse of Greek-Latin; Title()
   2079  */
   2080 void TransliteratorTest::TestCompoundInverse(void) {
   2081     UParseError parseError;
   2082     UErrorCode status = U_ZERO_ERROR;
   2083     Transliterator *t = Transliterator::createInstance
   2084         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
   2085     if (t == 0) {
   2086         dataerrln("FAIL: createInstance - %s", u_errorName(status));
   2087         return;
   2088     }
   2089     UnicodeString exp("(Title);Latin-Greek");
   2090     if (t->getID() == exp) {
   2091         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
   2092               t->getID());
   2093     } else {
   2094         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
   2095               t->getID() + "\", expected \"" + exp + "\"");
   2096     }
   2097     delete t;
   2098 }
   2099 
   2100 /**
   2101  * Test NFD chaining with RBT
   2102  */
   2103 void TransliteratorTest::TestNFDChainRBT() {
   2104     UParseError pe;
   2105     UErrorCode ec = U_ZERO_ERROR;
   2106     Transliterator* t = Transliterator::createFromRules(
   2107                                "TEST", "::NFD; aa > Q; a > q;",
   2108                                UTRANS_FORWARD, pe, ec);
   2109     if (t == NULL || U_FAILURE(ec)) {
   2110         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
   2111         return;
   2112     }
   2113     expect(*t, "aa", "Q");
   2114     delete t;
   2115 
   2116     // TEMPORARY TESTS -- BEING DEBUGGED
   2117 //=-    UnicodeString s, s2;
   2118 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
   2119 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
   2120 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
   2121 //=-    expect(*t, s, s2);
   2122 //=-    delete t;
   2123 //=-
   2124 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
   2125 //=-    expect(*t, s2, s);
   2126 //=-    delete t;
   2127 //=-
   2128 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
   2129 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
   2130 //=-    expect(*t, s, s);
   2131 //=-    delete t;
   2132 
   2133 //    const char* source[] = {
   2134 //        /*
   2135 //        "\\u015Br\\u012Bmad",
   2136 //        "bhagavadg\\u012Bt\\u0101",
   2137 //        "adhy\\u0101ya",
   2138 //        "arjuna",
   2139 //        "vi\\u1E63\\u0101da",
   2140 //        "y\\u014Dga",
   2141 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
   2142 //        "uv\\u0101cr\\u0325",
   2143 //        */
   2144 //        "rmk\\u1E63\\u0113t",
   2145 //      //"dharmak\\u1E63\\u0113tr\\u0113",
   2146 //        /*
   2147 //        "kuruk\\u1E63\\u0113tr\\u0113",
   2148 //        "samav\\u0113t\\u0101",
   2149 //        "yuyutsava-\\u1E25",
   2150 //        "m\\u0101mak\\u0101-\\u1E25",
   2151 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
   2152 //        "kimakurvata",
   2153 //        "san\\u0304java",
   2154 //        */
   2155 //
   2156 //        0
   2157 //    };
   2158 //    const char* expected[] = {
   2159 //        /*
   2160 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
   2161 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
   2162 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
   2163 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
   2164 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
   2165 //        "\\u092f\\u094b\\u0917",
   2166 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
   2167 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
   2168 //        */
   2169 //        "\\u0927",
   2170 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2171 //        /*
   2172 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2173 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
   2174 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
   2175 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
   2176 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
   2177 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
   2178 //        "\\u0938\\u0902\\u091c\\u0935",
   2179 //        */
   2180 //        0
   2181 //    };
   2182 //    UErrorCode status = U_ZERO_ERROR;
   2183 //    UParseError parseError;
   2184 //    UnicodeString message;
   2185 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2186 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2187 //    if(U_FAILURE(status)){
   2188 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2189 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
   2190 //        delete latinToDevToLatin;
   2191 //        delete devToLatinToDev;
   2192 //        return;
   2193 //    }
   2194 //    UnicodeString gotResult;
   2195 //    for(int i= 0; source[i] != 0; i++){
   2196 //        gotResult = source[i];
   2197 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
   2198 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
   2199 //    }
   2200 //    delete latinToDevToLatin;
   2201 //    delete devToLatinToDev;
   2202 }
   2203 
   2204 /**
   2205  * Inverse of "Null" should be "Null". (J21)
   2206  */
   2207 void TransliteratorTest::TestNullInverse() {
   2208     UParseError pe;
   2209     UErrorCode ec = U_ZERO_ERROR;
   2210     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
   2211     if (t == 0 || U_FAILURE(ec)) {
   2212         errln("FAIL: createInstance");
   2213         return;
   2214     }
   2215     Transliterator *u = t->createInverse(ec);
   2216     if (u == 0 || U_FAILURE(ec)) {
   2217         errln("FAIL: createInverse");
   2218         delete t;
   2219         return;
   2220     }
   2221     if (u->getID() != "Null") {
   2222         errln("FAIL: Inverse of Null should be Null");
   2223     }
   2224     delete t;
   2225     delete u;
   2226 }
   2227 
   2228 /**
   2229  * Check ID of inverse of alias. (J22)
   2230  */
   2231 void TransliteratorTest::TestAliasInverseID() {
   2232     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
   2233     UParseError pe;
   2234     UErrorCode ec = U_ZERO_ERROR;
   2235     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
   2236     if (t == 0 || U_FAILURE(ec)) {
   2237         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
   2238         return;
   2239     }
   2240     Transliterator *u = t->createInverse(ec);
   2241     if (u == 0 || U_FAILURE(ec)) {
   2242         errln("FAIL: createInverse");
   2243         delete t;
   2244         return;
   2245     }
   2246     UnicodeString exp = "Hangul-Latin";
   2247     UnicodeString got = u->getID();
   2248     if (got != exp) {
   2249         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
   2250               ", expected " + exp);
   2251     }
   2252     delete t;
   2253     delete u;
   2254 }
   2255 
   2256 /**
   2257  * Test IDs of inverses of compound transliterators. (J20)
   2258  */
   2259 void TransliteratorTest::TestCompoundInverseID() {
   2260     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
   2261     UParseError pe;
   2262     UErrorCode ec = U_ZERO_ERROR;
   2263     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
   2264     if (t == 0 || U_FAILURE(ec)) {
   2265         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
   2266         return;
   2267     }
   2268     Transliterator *u = t->createInverse(ec);
   2269     if (u == 0 || U_FAILURE(ec)) {
   2270         errln("FAIL: createInverse");
   2271         delete t;
   2272         return;
   2273     }
   2274     UnicodeString exp = "NFD(NFC);Jamo-Latin";
   2275     UnicodeString got = u->getID();
   2276     if (got != exp) {
   2277         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
   2278               ", expected " + exp);
   2279     }
   2280     delete t;
   2281     delete u;
   2282 }
   2283 
   2284 /**
   2285  * Test undefined variable.
   2286 
   2287  */
   2288 void TransliteratorTest::TestUndefinedVariable() {
   2289     UnicodeString rule = "$initial } a <> \\u1161;";
   2290     UParseError pe;
   2291     UErrorCode ec = U_ZERO_ERROR;
   2292     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
   2293     delete t;
   2294     if (U_FAILURE(ec)) {
   2295         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
   2296               u_errorName(ec));
   2297         return;
   2298     }
   2299     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
   2300           u_errorName(ec));
   2301 }
   2302 
   2303 /**
   2304  * Test empty context.
   2305  */
   2306 void TransliteratorTest::TestEmptyContext() {
   2307     expect(" { a } > b;", "xay a ", "xby b ");
   2308 }
   2309 
   2310 /**
   2311 * Test compound filter ID syntax
   2312 */
   2313 void TransliteratorTest::TestCompoundFilterID(void) {
   2314     static const char* DATA[] = {
   2315         // Col. 1 = ID or rule set (latter must start with #)
   2316 
   2317         // = columns > 1 are null if expect col. 1 to be illegal =
   2318 
   2319         // Col. 2 = direction, "F..." or "R..."
   2320         // Col. 3 = source string
   2321         // Col. 4 = exp result
   2322 
   2323         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
   2324         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
   2325         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
   2326         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
   2327         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
   2328         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
   2329         NULL,
   2330     };
   2331 
   2332     for (int32_t i=0; DATA[i]; i+=4) {
   2333         UnicodeString id = CharsToUnicodeString(DATA[i]);
   2334         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
   2335             UTRANS_REVERSE : UTRANS_FORWARD;
   2336         UnicodeString source;
   2337         UnicodeString exp;
   2338         if (DATA[i+2] != NULL) {
   2339             source = CharsToUnicodeString(DATA[i+2]);
   2340             exp = CharsToUnicodeString(DATA[i+3]);
   2341         }
   2342         UBool expOk = (DATA[i+1] != NULL);
   2343         Transliterator* t = NULL;
   2344         UParseError pe;
   2345         UErrorCode ec = U_ZERO_ERROR;
   2346         if (id.charAt(0) == 0x23/*#*/) {
   2347             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
   2348         } else {
   2349             t = Transliterator::createInstance(id, direction, pe, ec);
   2350         }
   2351         UBool ok = (t != NULL && U_SUCCESS(ec));
   2352         UnicodeString transID;
   2353         if (t!=0) {
   2354             transID = t->getID();
   2355         }
   2356         else {
   2357             transID = UnicodeString("NULL", "");
   2358         }
   2359         if (ok == expOk) {
   2360             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
   2361                   u_errorName(ec));
   2362             if (source.length() != 0) {
   2363                 expect(*t, source, exp);
   2364             }
   2365             delete t;
   2366         } else {
   2367             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
   2368                   u_errorName(ec));
   2369         }
   2370     }
   2371 }
   2372 
   2373 /**
   2374  * Test new property set syntax
   2375  */
   2376 void TransliteratorTest::TestPropertySet() {
   2377     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
   2378     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
   2379            "[ a stitch ]\n[ in time ]\r[ saves 9]");
   2380 }
   2381 
   2382 /**
   2383  * Test various failure points of the new 2.0 engine.
   2384  */
   2385 void TransliteratorTest::TestNewEngine() {
   2386     UParseError pe;
   2387     UErrorCode ec = U_ZERO_ERROR;
   2388     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
   2389     if (t == 0 || U_FAILURE(ec)) {
   2390         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
   2391         return;
   2392     }
   2393     // Katakana should be untouched
   2394     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
   2395            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
   2396 
   2397     delete t;
   2398 
   2399 #if 1
   2400     // This test will only work if Transliterator.ROLLBACK is
   2401     // true.  Otherwise, this test will fail, revealing a
   2402     // limitation of global filters in incremental mode.
   2403     Transliterator *a =
   2404         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
   2405     Transliterator *A =
   2406         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
   2407     if (U_FAILURE(ec)) {
   2408         delete a;
   2409         delete A;
   2410         return;
   2411     }
   2412 
   2413     Transliterator* array[3];
   2414     array[0] = a;
   2415     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
   2416     array[2] = A;
   2417     if (U_FAILURE(ec)) {
   2418         errln("FAIL: createInstance NFD");
   2419         delete a;
   2420         delete A;
   2421         delete array[1];
   2422         return;
   2423     }
   2424 
   2425     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
   2426     if (U_FAILURE(ec)) {
   2427         errln("FAIL: UnicodeSet constructor");
   2428         delete a;
   2429         delete A;
   2430         delete array[1];
   2431         delete t;
   2432         return;
   2433     }
   2434 
   2435     expect(*t, "aAaA", "bAbA");
   2436 
   2437     assertTrue("countElements", t->countElements() == 3);
   2438     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
   2439     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
   2440     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
   2441     assertSuccess("getElement", ec);
   2442 
   2443     delete a;
   2444     delete A;
   2445     delete array[1];
   2446     delete t;
   2447 #endif
   2448 
   2449     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
   2450            "a",
   2451            "ax");
   2452 
   2453     UnicodeString gr = CharsToUnicodeString(
   2454         "$ddot = \\u0308 ;"
   2455         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
   2456         "$rough = \\u0314 ;"
   2457         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
   2458         "\\u03b1 <> a ;"
   2459         "$rough <> h ;");
   2460 
   2461     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
   2462 }
   2463 
   2464 /**
   2465  * Test quantified segment behavior.  We want:
   2466  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
   2467  */
   2468 void TransliteratorTest::TestQuantifiedSegment(void) {
   2469     // The normal case
   2470     expect("([abc]+) > x $1 x;", "cba", "xcbax");
   2471 
   2472     // The tricky case; the quantifier is around the segment
   2473     expect("([abc])+ > x $1 x;", "cba", "xax");
   2474 
   2475     // Tricky case in reverse direction
   2476     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
   2477 
   2478     // Check post-context segment
   2479     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
   2480 
   2481     // Test toRule/toPattern for non-quantified segment.
   2482     // Careful with spacing here.
   2483     UnicodeString r("([a-c]){q} > x $1 x;");
   2484     UParseError pe;
   2485     UErrorCode ec = U_ZERO_ERROR;
   2486     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
   2487     if (U_FAILURE(ec)) {
   2488         errln("FAIL: createFromRules");
   2489         delete t;
   2490         return;
   2491     }
   2492     UnicodeString rr;
   2493     t->toRules(rr, TRUE);
   2494     if (r != rr) {
   2495         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2496     } else {
   2497         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2498     }
   2499     delete t;
   2500 
   2501     // Test toRule/toPattern for quantified segment.
   2502     // Careful with spacing here.
   2503     r = "([a-c])+{q} > x $1 x;";
   2504     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
   2505     if (U_FAILURE(ec)) {
   2506         errln("FAIL: createFromRules");
   2507         delete t;
   2508         return;
   2509     }
   2510     t->toRules(rr, TRUE);
   2511     if (r != rr) {
   2512         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2513     } else {
   2514         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2515     }
   2516     delete t;
   2517 }
   2518 
   2519 //======================================================================
   2520 // Ram's tests
   2521 //======================================================================
   2522 void TransliteratorTest::TestDevanagariLatinRT(){
   2523     const int MAX_LEN= 52;
   2524     const char* const source[MAX_LEN] = {
   2525         "bh\\u0101rata",
   2526         "kra",
   2527         "k\\u1E63a",
   2528         "khra",
   2529         "gra",
   2530         "\\u1E45ra",
   2531         "cra",
   2532         "chra",
   2533         "j\\u00F1a",
   2534         "jhra",
   2535         "\\u00F1ra",
   2536         "\\u1E6Dya",
   2537         "\\u1E6Dhra",
   2538         "\\u1E0Dya",
   2539       //"r\\u0323ya", // \u095c is not valid in Devanagari
   2540         "\\u1E0Dhya",
   2541         "\\u1E5Bhra",
   2542         "\\u1E47ra",
   2543         "tta",
   2544         "thra",
   2545         "dda",
   2546         "dhra",
   2547         "nna",
   2548         "pra",
   2549         "phra",
   2550         "bra",
   2551         "bhra",
   2552         "mra",
   2553         "\\u1E49ra",
   2554       //"l\\u0331ra",
   2555         "yra",
   2556         "\\u1E8Fra",
   2557       //"l-",
   2558         "vra",
   2559         "\\u015Bra",
   2560         "\\u1E63ra",
   2561         "sra",
   2562         "hma",
   2563         "\\u1E6D\\u1E6Da",
   2564         "\\u1E6D\\u1E6Dha",
   2565         "\\u1E6Dh\\u1E6Dha",
   2566         "\\u1E0D\\u1E0Da",
   2567         "\\u1E0D\\u1E0Dha",
   2568         "\\u1E6Dya",
   2569         "\\u1E6Dhya",
   2570         "\\u1E0Dya",
   2571         "\\u1E0Dhya",
   2572         // Not roundtrippable --
   2573         // \\u0939\\u094d\\u094d\\u092E  - hma
   2574         // \\u0939\\u094d\\u092E         - hma
   2575         // CharsToUnicodeString("hma"),
   2576         "hya",
   2577         "\\u015Br\\u0325",
   2578         "\\u015Bca",
   2579         "\\u0115",
   2580         "san\\u0304j\\u012Bb s\\u0113nagupta",
   2581         "\\u0101nand vaddir\\u0101ju",
   2582         "\\u0101",
   2583         "a"
   2584     };
   2585     const char* const expected[MAX_LEN] = {
   2586         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
   2587         "\\u0915\\u094D\\u0930",          /* kra         */
   2588         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
   2589         "\\u0916\\u094D\\u0930",          /* khra        */
   2590         "\\u0917\\u094D\\u0930",          /* gra         */
   2591         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
   2592         "\\u091A\\u094D\\u0930",          /* cra         */
   2593         "\\u091B\\u094D\\u0930",          /* chra        */
   2594         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
   2595         "\\u091D\\u094D\\u0930",          /* jhra        */
   2596         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
   2597         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
   2598         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
   2599         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
   2600       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
   2601         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
   2602         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
   2603         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
   2604         "\\u0924\\u094D\\u0924",          /* tta         */
   2605         "\\u0925\\u094D\\u0930",          /* thra        */
   2606         "\\u0926\\u094D\\u0926",          /* dda         */
   2607         "\\u0927\\u094D\\u0930",          /* dhra        */
   2608         "\\u0928\\u094D\\u0928",          /* nna         */
   2609         "\\u092A\\u094D\\u0930",          /* pra         */
   2610         "\\u092B\\u094D\\u0930",          /* phra        */
   2611         "\\u092C\\u094D\\u0930",          /* bra         */
   2612         "\\u092D\\u094D\\u0930",          /* bhra        */
   2613         "\\u092E\\u094D\\u0930",          /* mra         */
   2614         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
   2615       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
   2616         "\\u092F\\u094D\\u0930",          /* yra         */
   2617         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
   2618       //"l-",
   2619         "\\u0935\\u094D\\u0930",          /* vra         */
   2620         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
   2621         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
   2622         "\\u0938\\u094D\\u0930",          /* sra         */
   2623         "\\u0939\\u094d\\u092E",          /* hma         */
   2624         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
   2625         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
   2626         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
   2627         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
   2628         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
   2629         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
   2630         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
   2631         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
   2632         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
   2633      // "hma",                         /* hma         */
   2634         "\\u0939\\u094D\\u092F",          /* hya         */
   2635         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
   2636         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
   2637         "\\u090d",                        /* e\\u0306    */
   2638         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
   2639         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
   2640         "\\u0906",
   2641         "\\u0905",
   2642     };
   2643     UErrorCode status = U_ZERO_ERROR;
   2644     UParseError parseError;
   2645     UnicodeString message;
   2646     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2647     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2648     if(U_FAILURE(status)){
   2649         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2650         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2651         return;
   2652     }
   2653     UnicodeString gotResult;
   2654     for(int i= 0; i<MAX_LEN; i++){
   2655         gotResult = source[i];
   2656         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
   2657         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
   2658     }
   2659     delete latinToDev;
   2660     delete devToLatin;
   2661 }
   2662 
   2663 void TransliteratorTest::TestTeluguLatinRT(){
   2664     const int MAX_LEN=10;
   2665     const char* const source[MAX_LEN] = {
   2666         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
   2667         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
   2668         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
   2669         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
   2670         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
   2671         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
   2672         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
   2673         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
   2674         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
   2675         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
   2676     };
   2677 
   2678     const char* const expected[MAX_LEN] = {
   2679         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
   2680         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
   2681         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
   2682         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
   2683         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
   2684         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
   2685         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
   2686         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
   2687         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
   2688         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
   2689     };
   2690 
   2691     UErrorCode status = U_ZERO_ERROR;
   2692     UParseError parseError;
   2693     UnicodeString message;
   2694     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
   2695     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
   2696     if(U_FAILURE(status)){
   2697         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2698         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2699         return;
   2700     }
   2701     UnicodeString gotResult;
   2702     for(int i= 0; i<MAX_LEN; i++){
   2703         gotResult = source[i];
   2704         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
   2705         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
   2706     }
   2707     delete latinToDev;
   2708     delete devToLatin;
   2709 }
   2710 
   2711 void TransliteratorTest::TestSanskritLatinRT(){
   2712     const int MAX_LEN =16;
   2713     const char* const source[MAX_LEN] = {
   2714         "rmk\\u1E63\\u0113t",
   2715         "\\u015Br\\u012Bmad",
   2716         "bhagavadg\\u012Bt\\u0101",
   2717         "adhy\\u0101ya",
   2718         "arjuna",
   2719         "vi\\u1E63\\u0101da",
   2720         "y\\u014Dga",
   2721         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
   2722         "uv\\u0101cr\\u0325",
   2723         "dharmak\\u1E63\\u0113tr\\u0113",
   2724         "kuruk\\u1E63\\u0113tr\\u0113",
   2725         "samav\\u0113t\\u0101",
   2726         "yuyutsava\\u1E25",
   2727         "m\\u0101mak\\u0101\\u1E25",
   2728     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
   2729         "kimakurvata",
   2730         "san\\u0304java",
   2731     };
   2732     const char* const expected[MAX_LEN] = {
   2733         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
   2734         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
   2735         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
   2736         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
   2737         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
   2738         "\\u0935\\u093f\\u0937\\u093e\\u0926",
   2739         "\\u092f\\u094b\\u0917",
   2740         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
   2741         "\\u0909\\u0935\\u093E\\u091A\\u0943",
   2742         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2743         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2744         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
   2745         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
   2746         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
   2747     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
   2748         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
   2749         "\\u0938\\u0902\\u091c\\u0935",
   2750     };
   2751     UErrorCode status = U_ZERO_ERROR;
   2752     UParseError parseError;
   2753     UnicodeString message;
   2754     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2755     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2756     if(U_FAILURE(status)){
   2757         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2758         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2759         return;
   2760     }
   2761     UnicodeString gotResult;
   2762     for(int i= 0; i<MAX_LEN; i++){
   2763         gotResult = source[i];
   2764         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
   2765         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
   2766     }
   2767     delete latinToDev;
   2768     delete devToLatin;
   2769 }
   2770 
   2771 
   2772 void TransliteratorTest::TestCompoundLatinRT(){
   2773     const char* const source[] = {
   2774         "rmk\\u1E63\\u0113t",
   2775         "\\u015Br\\u012Bmad",
   2776         "bhagavadg\\u012Bt\\u0101",
   2777         "adhy\\u0101ya",
   2778         "arjuna",
   2779         "vi\\u1E63\\u0101da",
   2780         "y\\u014Dga",
   2781         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
   2782         "uv\\u0101cr\\u0325",
   2783         "dharmak\\u1E63\\u0113tr\\u0113",
   2784         "kuruk\\u1E63\\u0113tr\\u0113",
   2785         "samav\\u0113t\\u0101",
   2786         "yuyutsava\\u1E25",
   2787         "m\\u0101mak\\u0101\\u1E25",
   2788      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
   2789         "kimakurvata",
   2790         "san\\u0304java"
   2791     };
   2792     const int MAX_LEN = UPRV_LENGTHOF(source);
   2793     const char* const expected[MAX_LEN] = {
   2794         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
   2795         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
   2796         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
   2797         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
   2798         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
   2799         "\\u0935\\u093f\\u0937\\u093e\\u0926",
   2800         "\\u092f\\u094b\\u0917",
   2801         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
   2802         "\\u0909\\u0935\\u093E\\u091A\\u0943",
   2803         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2804         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2805         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
   2806         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
   2807         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
   2808     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
   2809         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
   2810         "\\u0938\\u0902\\u091c\\u0935"
   2811     };
   2812     if(MAX_LEN != UPRV_LENGTHOF(expected)) {
   2813         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
   2814         return;
   2815     }
   2816 
   2817     UErrorCode status = U_ZERO_ERROR;
   2818     UParseError parseError;
   2819     UnicodeString message;
   2820     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2821     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2822     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
   2823     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
   2824 
   2825     if(U_FAILURE(status)){
   2826         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2827         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2828         return;
   2829     }
   2830     UnicodeString gotResult;
   2831     for(int i= 0; i<MAX_LEN; i++){
   2832         gotResult = source[i];
   2833         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
   2834         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
   2835         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
   2836 
   2837     }
   2838     delete(latinToDevToLatin);
   2839     delete(devToLatinToDev);
   2840     delete(devToTelToDev);
   2841     delete(latinToTelToLatin);
   2842 }
   2843 
   2844 /**
   2845  * Test Gurmukhi-Devanagari Tippi and Bindi
   2846  */
   2847 void TransliteratorTest::TestGurmukhiDevanagari(){
   2848     // the rule says:
   2849     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
   2850     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
   2851     UErrorCode status = U_ZERO_ERROR;
   2852     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
   2853     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
   2854     UParseError parseError;
   2855 
   2856     UnicodeSetIterator vIter(vowel);
   2857     UnicodeSetIterator nvIter(non_vowel);
   2858     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
   2859     if(U_FAILURE(status)) {
   2860       dataerrln("Error creating transliterator %s", u_errorName(status));
   2861       delete trans;
   2862       return;
   2863     }
   2864     UnicodeString src (" \\u0902", -1, US_INV);
   2865     UnicodeString expected(" \\u0A02", -1, US_INV);
   2866     src = src.unescape();
   2867     expected= expected.unescape();
   2868 
   2869     while(vIter.next()){
   2870         src.setCharAt(0,(UChar) vIter.getCodepoint());
   2871         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
   2872         expect(*trans,src,expected);
   2873     }
   2874 
   2875     expected.setCharAt(1,0x0A70);
   2876     while(nvIter.next()){
   2877         //src.setCharAt(0,(char) nvIter.codepoint);
   2878         src.setCharAt(0,(UChar)nvIter.getCodepoint());
   2879         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
   2880         expect(*trans,src,expected);
   2881     }
   2882     delete trans;
   2883 }
   2884 /**
   2885  * Test instantiation from a locale.
   2886  */
   2887 void TransliteratorTest::TestLocaleInstantiation(void) {
   2888     UParseError pe;
   2889     UErrorCode ec = U_ZERO_ERROR;
   2890     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
   2891     if (U_FAILURE(ec)) {
   2892         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
   2893         delete t;
   2894         return;
   2895     }
   2896     expect(*t, CharsToUnicodeString("\\u0430"), "a");
   2897     delete t;
   2898 
   2899     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
   2900     if (U_FAILURE(ec)) {
   2901         errln("FAIL: createInstance(en-el)");
   2902         delete t;
   2903         return;
   2904     }
   2905     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
   2906     delete t;
   2907 }
   2908 
   2909 /**
   2910  * Test title case handling of accent (should ignore accents)
   2911  */
   2912 void TransliteratorTest::TestTitleAccents(void) {
   2913     UParseError pe;
   2914     UErrorCode ec = U_ZERO_ERROR;
   2915     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
   2916     if (U_FAILURE(ec)) {
   2917         errln("FAIL: createInstance(Title)");
   2918         delete t;
   2919         return;
   2920     }
   2921     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
   2922     delete t;
   2923 }
   2924 
   2925 /**
   2926  * Basic test of a locale resource based rule.
   2927  */
   2928 void TransliteratorTest::TestLocaleResource() {
   2929     const char* DATA[] = {
   2930         // id                    from               to
   2931         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
   2932         "Latin-el",              "b",               "\\u03bc\\u03c0",
   2933         "Latin-Greek",           "b",               "\\u03B2",
   2934         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
   2935         "el-Latin",              "\\u03B2",         "v",
   2936         "Greek-Latin",           "\\u03B2",         "b",
   2937     };
   2938     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
   2939     for (int32_t i=0; i<DATA_length; i+=3) {
   2940         UParseError pe;
   2941         UErrorCode ec = U_ZERO_ERROR;
   2942         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
   2943         if (U_FAILURE(ec)) {
   2944             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
   2945             delete t;
   2946             continue;
   2947         }
   2948         expect(*t, CharsToUnicodeString(DATA[i+1]),
   2949                CharsToUnicodeString(DATA[i+2]));
   2950         delete t;
   2951     }
   2952 }
   2953 
   2954 /**
   2955  * Make sure parse errors reference the right line.
   2956  */
   2957 void TransliteratorTest::TestParseError() {
   2958     static const char* rule =
   2959         "a > b;\n"
   2960         "# more stuff\n"
   2961         "d << b;";
   2962     UErrorCode ec = U_ZERO_ERROR;
   2963     UParseError pe;
   2964     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   2965     delete t;
   2966     if (U_FAILURE(ec)) {
   2967         UnicodeString err(pe.preContext);
   2968         err.append((UChar)124/*|*/).append(pe.postContext);
   2969         if (err.indexOf("d << b") >= 0) {
   2970             logln("Ok: " + err);
   2971         } else {
   2972             errln("FAIL: " + err);
   2973         }
   2974     }
   2975     else {
   2976         errln("FAIL: no syntax error");
   2977     }
   2978     static const char* maskingRule =
   2979         "a>x;\n"
   2980         "# more stuff\n"
   2981         "ab>y;";
   2982     ec = U_ZERO_ERROR;
   2983     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
   2984     if (ec != U_RULE_MASK_ERROR) {
   2985         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
   2986     }
   2987     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
   2988         errln("FAIL: did not get expected precontext");
   2989     }
   2990     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
   2991         errln("FAIL: did not get expected postcontext");
   2992     }
   2993 }
   2994 
   2995 /**
   2996  * Make sure sets on output are disallowed.
   2997  */
   2998 void TransliteratorTest::TestOutputSet() {
   2999     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
   3000     UErrorCode ec = U_ZERO_ERROR;
   3001     UParseError pe;
   3002     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   3003     delete t;
   3004     if (U_FAILURE(ec)) {
   3005         UnicodeString err(pe.preContext);
   3006         err.append((UChar)124/*|*/).append(pe.postContext);
   3007         logln("Ok: " + err);
   3008         return;
   3009     }
   3010     errln("FAIL: No syntax error");
   3011 }
   3012 
   3013 /**
   3014  * Test the use variable range pragma, making sure that use of
   3015  * variable range characters is detected and flagged as an error.
   3016  */
   3017 void TransliteratorTest::TestVariableRange() {
   3018     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
   3019     UErrorCode ec = U_ZERO_ERROR;
   3020     UParseError pe;
   3021     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   3022     delete t;
   3023     if (U_FAILURE(ec)) {
   3024         UnicodeString err(pe.preContext);
   3025         err.append((UChar)124/*|*/).append(pe.postContext);
   3026         logln("Ok: " + err);
   3027         return;
   3028     }
   3029     errln("FAIL: No syntax error");
   3030 }
   3031 
   3032 /**
   3033  * Test invalid post context error handling
   3034  */
   3035 void TransliteratorTest::TestInvalidPostContext() {
   3036     UnicodeString rule = "a}b{c>d;";
   3037     UErrorCode ec = U_ZERO_ERROR;
   3038     UParseError pe;
   3039     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   3040     delete t;
   3041     if (U_FAILURE(ec)) {
   3042         UnicodeString err(pe.preContext);
   3043         err.append((UChar)124/*|*/).append(pe.postContext);
   3044         if (err.indexOf("a}b{c") >= 0) {
   3045             logln("Ok: " + err);
   3046         } else {
   3047             errln("FAIL: " + err);
   3048         }
   3049         return;
   3050     }
   3051     errln("FAIL: No syntax error");
   3052 }
   3053 
   3054 /**
   3055  * Test ID form variants
   3056  */
   3057 void TransliteratorTest::TestIDForms() {
   3058     const char* DATA[] = {
   3059         "NFC", NULL, "NFD",
   3060         "nfd", NULL, "NFC", // make sure case is ignored
   3061         "Any-NFKD", NULL, "Any-NFKC",
   3062         "Null", NULL, "Null",
   3063         "-nfkc", "nfkc", "NFKD",
   3064         "-nfkc/", "nfkc", "NFKD",
   3065         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
   3066         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
   3067         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
   3068         "Source-", NULL, NULL,
   3069         "Source/Variant-", NULL, NULL,
   3070         "Source-/Variant", NULL, NULL,
   3071         "/Variant", NULL, NULL,
   3072         "/Variant-", NULL, NULL,
   3073         "-/Variant", NULL, NULL,
   3074         "-/", NULL, NULL,
   3075         "-", NULL, NULL,
   3076         "/", NULL, NULL,
   3077     };
   3078     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
   3079 
   3080     for (int32_t i=0; i<DATA_length; i+=3) {
   3081         const char* ID = DATA[i];
   3082         const char* expID = DATA[i+1];
   3083         const char* expInvID = DATA[i+2];
   3084         UBool expValid = (expInvID != NULL);
   3085         if (expID == NULL) {
   3086             expID = ID;
   3087         }
   3088         UParseError pe;
   3089         UErrorCode ec = U_ZERO_ERROR;
   3090         Transliterator *t =
   3091             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
   3092         if (U_FAILURE(ec)) {
   3093             if (!expValid) {
   3094                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
   3095             } else {
   3096                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
   3097             }
   3098             delete t;
   3099             continue;
   3100         }
   3101         Transliterator *u = t->createInverse(ec);
   3102         if (U_FAILURE(ec)) {
   3103             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
   3104             delete t;
   3105             delete u;
   3106             continue;
   3107         }
   3108         if (t->getID() == expID &&
   3109             u->getID() == expInvID) {
   3110             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
   3111         } else {
   3112             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
   3113                   t->getID() + " x getInverse() => " + u->getID() +
   3114                   ", expected " + expInvID);
   3115         }
   3116         delete t;
   3117         delete u;
   3118     }
   3119 }
   3120 
   3121 static const UChar SPACE[]   = {32,0};
   3122 static const UChar NEWLINE[] = {10,0};
   3123 static const UChar RETURN[]  = {13,0};
   3124 static const UChar EMPTY[]   = {0};
   3125 
   3126 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
   3127                                     const UnicodeString& testRulesForward) {
   3128     UnicodeString rules2; t2.toRules(rules2, TRUE);
   3129     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
   3130     rules2.findAndReplace(SPACE, EMPTY);
   3131     rules2.findAndReplace(NEWLINE, EMPTY);
   3132     rules2.findAndReplace(RETURN, EMPTY);
   3133 
   3134     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
   3135 
   3136     if (rules2 != testRules) {
   3137         errln(label);
   3138         logln((UnicodeString)"GENERATED RULES: " + rules2);
   3139         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
   3140     }
   3141 }
   3142 
   3143 /**
   3144  * Mark's toRules test.
   3145  */
   3146 void TransliteratorTest::TestToRulesMark() {
   3147     const char* testRules =
   3148         "::[[:Latin:][:Mark:]];"
   3149         "::NFKD (NFC);"
   3150         "::Lower (Lower);"
   3151         "a <> \\u03B1;" // alpha
   3152         "::NFKC (NFD);"
   3153         "::Upper (Lower);"
   3154         "::Lower ();"
   3155         "::([[:Greek:][:Mark:]]);"
   3156         ;
   3157     const char* testRulesForward =
   3158         "::[[:Latin:][:Mark:]];"
   3159         "::NFKD(NFC);"
   3160         "::Lower(Lower);"
   3161         "a > \\u03B1;"
   3162         "::NFKC(NFD);"
   3163         "::Upper (Lower);"
   3164         "::Lower ();"
   3165         ;
   3166     const char* testRulesBackward =
   3167         "::[[:Greek:][:Mark:]];"
   3168         "::Lower (Upper);"
   3169         "::NFD(NFKC);"
   3170         "\\u03B1 > a;"
   3171         "::Lower(Lower);"
   3172         "::NFC(NFKD);"
   3173         ;
   3174     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
   3175     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
   3176 
   3177     UParseError pe;
   3178     UErrorCode ec = U_ZERO_ERROR;
   3179     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
   3180     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
   3181 
   3182     if (U_FAILURE(ec)) {
   3183         delete t2;
   3184         delete t3;
   3185         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
   3186         return;
   3187     }
   3188 
   3189     expect(*t2, source, target);
   3190     expect(*t3, target, source);
   3191 
   3192     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
   3193     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
   3194 
   3195     delete t2;
   3196     delete t3;
   3197 }
   3198 
   3199 /**
   3200  * Test Escape and Unescape transliterators.
   3201  */
   3202 void TransliteratorTest::TestEscape() {
   3203     UParseError pe;
   3204     UErrorCode ec;
   3205     Transliterator *t;
   3206 
   3207     ec = U_ZERO_ERROR;
   3208     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
   3209     if (U_FAILURE(ec)) {
   3210         errln((UnicodeString)"FAIL: createInstance");
   3211     } else {
   3212         expect(*t,
   3213                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
   3214                "@12Q");
   3215     }
   3216     delete t;
   3217 
   3218     ec = U_ZERO_ERROR;
   3219     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
   3220     if (U_FAILURE(ec)) {
   3221         errln((UnicodeString)"FAIL: createInstance");
   3222     } else {
   3223         expect(*t,
   3224                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
   3225                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
   3226     }
   3227     delete t;
   3228 
   3229     ec = U_ZERO_ERROR;
   3230     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
   3231     if (U_FAILURE(ec)) {
   3232         errln((UnicodeString)"FAIL: createInstance");
   3233     } else {
   3234         expect(*t,
   3235                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
   3236                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
   3237     }
   3238     delete t;
   3239 
   3240     ec = U_ZERO_ERROR;
   3241     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
   3242     if (U_FAILURE(ec)) {
   3243         errln((UnicodeString)"FAIL: createInstance");
   3244     } else {
   3245         expect(*t,
   3246                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
   3247                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
   3248     }
   3249     delete t;
   3250 }
   3251 
   3252 
   3253 void TransliteratorTest::TestAnchorMasking(){
   3254     UnicodeString rule ("^a > Q; a > q;");
   3255     UErrorCode status= U_ZERO_ERROR;
   3256     UParseError parseError;
   3257 
   3258     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
   3259     if(U_FAILURE(status)){
   3260         errln(UnicodeString("FAIL: ") + "ID" +
   3261               ".createFromRules() => bad rules" +
   3262               /*", parse error " + parseError.code +*/
   3263               ", line " + parseError.line +
   3264               ", offset " + parseError.offset +
   3265               ", context " + prettify(parseError.preContext, TRUE) +
   3266               ", rules: " + prettify(rule, TRUE));
   3267     }
   3268     delete t;
   3269 }
   3270 
   3271 /**
   3272  * Make sure display names of variants look reasonable.
   3273  */
   3274 void TransliteratorTest::TestDisplayName() {
   3275 #if UCONFIG_NO_FORMATTING
   3276     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
   3277     return;
   3278 #else
   3279     static const char* DATA[] = {
   3280         // ID, forward name, reverse name
   3281         // Update the text as necessary -- the important thing is
   3282         // not the text itself, but how various cases are handled.
   3283 
   3284         // Basic test
   3285         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
   3286 
   3287         // Variants
   3288         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
   3289 
   3290         // Target-only IDs
   3291         "NFC", "Any to NFC", "Any to NFD",
   3292     };
   3293 
   3294     int32_t DATA_length = UPRV_LENGTHOF(DATA);
   3295 
   3296     Locale US("en", "US");
   3297 
   3298     for (int32_t i=0; i<DATA_length; i+=3) {
   3299         UnicodeString name;
   3300         Transliterator::getDisplayName(DATA[i], US, name);
   3301         if (name != DATA[i+1]) {
   3302             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
   3303                   name + ", expected " + DATA[i+1]);
   3304         } else {
   3305             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
   3306         }
   3307         UErrorCode ec = U_ZERO_ERROR;
   3308         UParseError pe;
   3309         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
   3310         if (U_FAILURE(ec)) {
   3311             delete t;
   3312             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
   3313             continue;
   3314         }
   3315         name = Transliterator::getDisplayName(t->getID(), US, name);
   3316         if (name != DATA[i+2]) {
   3317             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
   3318                   name + ", expected " + DATA[i+2]);
   3319         } else {
   3320             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
   3321         }
   3322         delete t;
   3323     }
   3324 #endif
   3325 }
   3326 
   3327 void TransliteratorTest::TestSpecialCases(void) {
   3328     const UnicodeString registerRules[] = {
   3329         "Any-Dev1", "x > X; y > Y;",
   3330         "Any-Dev2", "XY > Z",
   3331         "Greek-Latin/FAKE",
   3332             CharsToUnicodeString
   3333             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
   3334         "" // END MARKER
   3335     };
   3336 
   3337     const UnicodeString testCases[] = {
   3338         // NORMALIZATION
   3339         // should add more test cases
   3340         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3341         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3342         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3343         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3344 
   3345         // mp -> b BUG
   3346         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
   3347         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
   3348 
   3349         // check for devanagari bug
   3350         "nfd;Dev1;Dev2;nfc", "xy", "Z",
   3351 
   3352         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
   3353         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3354                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
   3355 
   3356         //TODO: enable this test once Titlecase works right
   3357         /*
   3358         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3359                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
   3360                  */
   3361         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3362                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
   3363         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3364                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
   3365 
   3366         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
   3367         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
   3368 
   3369          // FORMS OF S
   3370         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
   3371                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
   3372         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
   3373                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
   3374         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
   3375                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
   3376         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
   3377                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
   3378         // Tatiana bug
   3379         // Upper: TAT\\u02B9\\u00C2NA
   3380         // Lower: tat\\u02B9\\u00E2na
   3381         // Title: Tat\\u02B9\\u00E2na
   3382         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
   3383                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
   3384         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
   3385                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
   3386         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
   3387                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
   3388 
   3389         "" // END MARKER
   3390     };
   3391 
   3392     UParseError pos;
   3393     int32_t i;
   3394     for (i = 0; registerRules[i].length()!=0; i+=2) {
   3395         UErrorCode status = U_ZERO_ERROR;
   3396 
   3397         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
   3398             registerRules[i+1], UTRANS_FORWARD, pos, status);
   3399         if (U_FAILURE(status)) {
   3400             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
   3401         } else {
   3402             Transliterator::registerInstance(t);
   3403         }
   3404     }
   3405     for (i = 0; testCases[i].length()!=0; i+=3) {
   3406         UErrorCode ec = U_ZERO_ERROR;
   3407         UParseError pe;
   3408         const UnicodeString& name = testCases[i];
   3409         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
   3410         if (U_FAILURE(ec)) {
   3411             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
   3412             delete t;
   3413             continue;
   3414         }
   3415         const UnicodeString& id = t->getID();
   3416         const UnicodeString& source = testCases[i+1];
   3417         UnicodeString target;
   3418 
   3419         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
   3420 
   3421         if (testCases[i+2].length() > 0) {
   3422             target = testCases[i+2];
   3423         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
   3424             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
   3425         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
   3426             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
   3427         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
   3428             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
   3429         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
   3430             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
   3431         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
   3432             target = source;
   3433             target.toLower(Locale::getUS());
   3434         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
   3435             target = source;
   3436             target.toUpper(Locale::getUS());
   3437         }
   3438         if (U_FAILURE(ec)) {
   3439             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
   3440             continue;
   3441         }
   3442 
   3443         expect(*t, source, target);
   3444         delete t;
   3445     }
   3446     for (i = 0; registerRules[i].length()!=0; i+=2) {
   3447         Transliterator::unregister(registerRules[i]);
   3448     }
   3449 }
   3450 
   3451 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
   3452     if (ch <= 0xFFFF) {
   3453         sprintf(buffer, "\\u%04x", (int)ch);
   3454     } else {
   3455         sprintf(buffer, "\\U%08x", (int)ch);
   3456     }
   3457     return buffer;
   3458 }
   3459 
   3460 void TransliteratorTest::TestSurrogateCasing (void) {
   3461     // check that casing handles surrogates
   3462     // titlecase is currently defective
   3463     char buffer[20];
   3464     UChar buffer2[20];
   3465     UChar32 dee;
   3466     U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
   3467     UnicodeString DEE(u_totitle(dee));
   3468     if (DEE != DESERET_DEE) {
   3469         err("Fails titlecase of surrogates");
   3470         err(Char32ToEscapedChars(dee, buffer));
   3471         err(", ");
   3472         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
   3473     }
   3474 
   3475     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
   3476     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
   3477     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
   3478     UErrorCode status= U_ZERO_ERROR;
   3479 
   3480     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
   3481     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
   3482         errln("Fails: Can't uppercase surrogates.");
   3483     }
   3484 
   3485     status= U_ZERO_ERROR;
   3486     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
   3487     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
   3488         errln("Fails: Can't lowercase surrogates.");
   3489     }
   3490 }
   3491 
   3492 static void _trans(Transliterator& t, const UnicodeString& src,
   3493                    UnicodeString& result) {
   3494     result = src;
   3495     t.transliterate(result);
   3496 }
   3497 
   3498 static void _trans(const UnicodeString& id, const UnicodeString& src,
   3499                    UnicodeString& result, UErrorCode ec) {
   3500     UParseError pe;
   3501     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
   3502     if (U_SUCCESS(ec)) {
   3503         _trans(*t, src, result);
   3504     }
   3505     delete t;
   3506 }
   3507 
   3508 static UnicodeString _findMatch(const UnicodeString& source,
   3509                                        const UnicodeString* pairs) {
   3510     UnicodeString empty;
   3511     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
   3512         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
   3513             return pairs[i+1];
   3514         }
   3515     }
   3516     return empty;
   3517 }
   3518 
   3519 // Check to see that incremental gets at least part way through a reasonable string.
   3520 
   3521 void TransliteratorTest::TestIncrementalProgress(void) {
   3522     UErrorCode ec = U_ZERO_ERROR;
   3523     UnicodeString latinTest = "The Quick Brown Fox.";
   3524     UnicodeString devaTest;
   3525     _trans("Latin-Devanagari", latinTest, devaTest, ec);
   3526     UnicodeString kataTest;
   3527     _trans("Latin-Katakana", latinTest, kataTest, ec);
   3528     if (U_FAILURE(ec)) {
   3529         errln("FAIL: Internal error");
   3530         return;
   3531     }
   3532     const UnicodeString tests[] = {
   3533         "Any", latinTest,
   3534         "Latin", latinTest,
   3535         "Halfwidth", latinTest,
   3536         "Devanagari", devaTest,
   3537         "Katakana", kataTest,
   3538         "" // END MARKER
   3539     };
   3540 
   3541     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
   3542     int32_t i = 0, j=0, k=0;
   3543     int32_t sources = Transliterator::countAvailableSources();
   3544     for (i = 0; i < sources; i++) {
   3545         UnicodeString source;
   3546         Transliterator::getAvailableSource(i, source);
   3547         UnicodeString test = _findMatch(source, tests);
   3548         if (test.length() == 0) {
   3549             logln((UnicodeString)"Skipping " + source + "-X");
   3550             continue;
   3551         }
   3552         int32_t targets = Transliterator::countAvailableTargets(source);
   3553         for (j = 0; j < targets; j++) {
   3554             UnicodeString target;
   3555             Transliterator::getAvailableTarget(j, source, target);
   3556             int32_t variants = Transliterator::countAvailableVariants(source, target);
   3557             for (k =0; k< variants; k++) {
   3558                 UnicodeString variant;
   3559                 UParseError err;
   3560                 UErrorCode status = U_ZERO_ERROR;
   3561 
   3562                 Transliterator::getAvailableVariant(k, source, target, variant);
   3563                 UnicodeString id = source + "-" + target + "/" + variant;
   3564 
   3565                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
   3566                 if (U_FAILURE(status)) {
   3567                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
   3568                     delete t;
   3569                     continue;
   3570                 }
   3571                 status = U_ZERO_ERROR;
   3572                 CheckIncrementalAux(t, test);
   3573 
   3574                 UnicodeString rev;
   3575                 _trans(*t, test, rev);
   3576                 Transliterator *inv = t->createInverse(status);
   3577                 if (U_FAILURE(status)) {
   3578                     // The following are forward-only, it is OK that creating an inverse will not work:
   3579                     // 1. Devanagari-Arabic
   3580                     // 2. Any-*/BGN
   3581                     // 2a. Any-*/BGN_1981
   3582                     // 3. Any-*/UNGEGN
   3583                     // 4. Any-*/MNS
   3584                     // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
   3585                     if (    id.compare((UnicodeString)"Devanagari-Arabic/") != 0
   3586                          && !(id.startsWith((UnicodeString)"Any-") &&
   3587                                 (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/BGN_1981") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS"))
   3588                              )
   3589 #if UCONFIG_NO_BREAK_ITERATION
   3590                          && id.compare((UnicodeString)"Latin-Thai/") != 0
   3591 #endif
   3592                        )
   3593                     {
   3594                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
   3595                     }
   3596                     delete t;
   3597                     delete inv;
   3598                     continue;
   3599                 }
   3600                 CheckIncrementalAux(inv, rev);
   3601                 delete t;
   3602                 delete inv;
   3603             }
   3604         }
   3605     }
   3606 }
   3607 
   3608 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
   3609                                                       const UnicodeString& input) {
   3610     UErrorCode ec = U_ZERO_ERROR;
   3611     UTransPosition pos;
   3612     UnicodeString test = input;
   3613 
   3614     pos.contextStart = 0;
   3615     pos.contextLimit = input.length();
   3616     pos.start = 0;
   3617     pos.limit = input.length();
   3618 
   3619     t->transliterate(test, pos, ec);
   3620     if (U_FAILURE(ec)) {
   3621         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
   3622         return;
   3623     }
   3624     UBool gotError = FALSE;
   3625     (void)gotError;    // Suppress set but not used warning.
   3626 
   3627     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
   3628 
   3629     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
   3630         errln((UnicodeString)"No Progress, " +
   3631               t->getID() + ": " + formatInput(test, input, pos));
   3632         gotError = TRUE;
   3633     } else {
   3634         logln((UnicodeString)"PASS Progress, " +
   3635               t->getID() + ": " + formatInput(test, input, pos));
   3636     }
   3637     t->finishTransliteration(test, pos);
   3638     if (pos.start != pos.limit) {
   3639         errln((UnicodeString)"Incomplete, " +
   3640               t->getID() + ": " + formatInput(test, input, pos));
   3641         gotError = TRUE;
   3642     }
   3643 }
   3644 
   3645 void TransliteratorTest::TestFunction() {
   3646     // Careful with spacing and ';' here:  Phrase this exactly
   3647     // as toRules() is going to return it.  If toRules() changes
   3648     // with regard to spacing or ';', then adjust this string.
   3649     UnicodeString rule =
   3650         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
   3651 
   3652     UParseError pe;
   3653     UErrorCode ec = U_ZERO_ERROR;
   3654     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3655     if (t == NULL) {
   3656         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
   3657         return;
   3658     }
   3659 
   3660     UnicodeString r;
   3661     t->toRules(r, TRUE);
   3662     if (r == rule) {
   3663         logln((UnicodeString)"OK: toRules() => " + r);
   3664     } else {
   3665         errln((UnicodeString)"FAIL: toRules() => " + r +
   3666               ", expected " + rule);
   3667     }
   3668 
   3669     expect(*t, "The Quick Brown Fox",
   3670            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
   3671 
   3672     delete t;
   3673 }
   3674 
   3675 void TransliteratorTest::TestInvalidBackRef(void) {
   3676     UnicodeString rule =  ". > $1;";
   3677     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
   3678     UParseError pe;
   3679     UErrorCode ec = U_ZERO_ERROR;
   3680     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3681     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
   3682 
   3683     if (t != NULL) {
   3684         errln("FAIL: createFromRules should have returned NULL");
   3685         delete t;
   3686     }
   3687 
   3688     if (t2 != NULL) {
   3689         errln("FAIL: createFromRules should have returned NULL");
   3690         delete t2;
   3691     }
   3692 
   3693     if (U_SUCCESS(ec)) {
   3694         errln("FAIL: Ok: . > $1; => no error");
   3695     } else {
   3696         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
   3697     }
   3698 }
   3699 
   3700 void TransliteratorTest::TestMulticharStringSet() {
   3701     // Basic testing
   3702     const char* rule =
   3703         "       [{aa}]       > x;"
   3704         "         a          > y;"
   3705         "       [b{bc}]      > z;"
   3706         "[{gd}] { e          > q;"
   3707         "         e } [{fg}] > r;" ;
   3708 
   3709     UParseError pe;
   3710     UErrorCode ec = U_ZERO_ERROR;
   3711     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3712     if (t == NULL || U_FAILURE(ec)) {
   3713         delete t;
   3714         errln("FAIL: createFromRules failed");
   3715         return;
   3716     }
   3717 
   3718     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
   3719            "y x yz z d gd de gdq gdqfg ddrfg");
   3720     delete t;
   3721 
   3722     // Overlapped string test.  Make sure that when multiple
   3723     // strings can match that the longest one is matched.
   3724     rule =
   3725         "    [a {ab} {abc}]    > x;"
   3726         "           b          > y;"
   3727         "           c          > z;"
   3728         " q [t {st} {rst}] { e > p;" ;
   3729 
   3730     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3731     if (t == NULL || U_FAILURE(ec)) {
   3732         delete t;
   3733         errln("FAIL: createFromRules failed");
   3734         return;
   3735     }
   3736 
   3737     expect(*t, "a ab abc qte qste qrste",
   3738            "x x x qtp qstp qrstp");
   3739     delete t;
   3740 }
   3741 
   3742 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   3743 // BEGIN TestUserFunction support factory
   3744 
   3745 Transliterator* _TUFF[4];
   3746 UnicodeString* _TUFID[4];
   3747 
   3748 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
   3749                                    Transliterator::Token context) {
   3750     return _TUFF[context.integer]->clone();
   3751 }
   3752 
   3753 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
   3754     _TUFF[n] = t;
   3755     _TUFID[n] = new UnicodeString(ID);
   3756     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
   3757 }
   3758 
   3759 static void _TUFUnreg(int32_t n) {
   3760     if (_TUFF[n] != NULL) {
   3761         Transliterator::unregister(*_TUFID[n]);
   3762         delete _TUFF[n];
   3763         delete _TUFID[n];
   3764     }
   3765 }
   3766 
   3767 // END TestUserFunction support factory
   3768 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   3769 
   3770 /**
   3771  * Test that user-registered transliterators can be used under function
   3772  * syntax.
   3773  */
   3774 void TransliteratorTest::TestUserFunction() {
   3775 
   3776     Transliterator* t;
   3777     UParseError pe;
   3778     UErrorCode ec = U_ZERO_ERROR;
   3779 
   3780     // Setup our factory
   3781     int32_t i;
   3782     for (i=0; i<4; ++i) {
   3783         _TUFF[i] = NULL;
   3784     }
   3785 
   3786     // There's no need to register inverses if we don't use them
   3787     t = Transliterator::createFromRules("gif",
   3788                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
   3789                                         UTRANS_FORWARD, pe, ec);
   3790     if (t == NULL || U_FAILURE(ec)) {
   3791         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
   3792         return;
   3793     }
   3794     _TUFReg("Any-gif", t, 0);
   3795 
   3796     t = Transliterator::createFromRules("RemoveCurly",
   3797                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
   3798                                         UTRANS_FORWARD, pe, ec);
   3799     if (t == NULL || U_FAILURE(ec)) {
   3800         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
   3801         goto FAIL;
   3802     }
   3803     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
   3804     _TUFReg("Any-RemoveCurly", t, 1);
   3805 
   3806     logln("Trying &hex");
   3807     t = Transliterator::createFromRules("hex2",
   3808                                         "(.) > &hex($1);",
   3809                                         UTRANS_FORWARD, pe, ec);
   3810     if (t == NULL || U_FAILURE(ec)) {
   3811         errln("FAIL: createFromRules");
   3812         goto FAIL;
   3813     }
   3814     logln("Registering");
   3815     _TUFReg("Any-hex2", t, 2);
   3816     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
   3817     if (t == NULL || U_FAILURE(ec)) {
   3818         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
   3819         goto FAIL;
   3820     }
   3821     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
   3822     delete t;
   3823 
   3824     logln("Trying &gif");
   3825     t = Transliterator::createFromRules("gif2",
   3826                                         "(.) > &Gif(&Hex2($1));",
   3827                                         UTRANS_FORWARD, pe, ec);
   3828     if (t == NULL || U_FAILURE(ec)) {
   3829         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
   3830         goto FAIL;
   3831     }
   3832     logln("Registering");
   3833     _TUFReg("Any-gif2", t, 3);
   3834     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
   3835     if (t == NULL || U_FAILURE(ec)) {
   3836         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
   3837         goto FAIL;
   3838     }
   3839     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
   3840            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
   3841     delete t;
   3842 
   3843     // Test that filters are allowed after &
   3844     t = Transliterator::createFromRules("test",
   3845                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
   3846                                         UTRANS_FORWARD, pe, ec);
   3847     if (t == NULL || U_FAILURE(ec)) {
   3848         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
   3849         goto FAIL;
   3850     }
   3851     expect(*t, "abc",
   3852            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
   3853     delete t;
   3854 
   3855  FAIL:
   3856     for (i=0; i<4; ++i) {
   3857         _TUFUnreg(i);
   3858     }
   3859 }
   3860 
   3861 /**
   3862  * Test the Any-X transliterators.
   3863  */
   3864 void TransliteratorTest::TestAnyX(void) {
   3865     UParseError parseError;
   3866     UErrorCode status = U_ZERO_ERROR;
   3867     Transliterator* anyLatin =
   3868         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
   3869     if (anyLatin==0) {
   3870         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
   3871         delete anyLatin;
   3872         return;
   3873     }
   3874 
   3875     expect(*anyLatin,
   3876            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
   3877            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
   3878 
   3879     delete anyLatin;
   3880 }
   3881 
   3882 /**
   3883  * Test Any-X transliterators with sample letters from all scripts.
   3884  */
   3885 void TransliteratorTest::TestAny(void) {
   3886     UErrorCode status = U_ZERO_ERROR;
   3887     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
   3888     //       function call parameters going on in this test.
   3889     UnicodeSet alphabetic("[:alphabetic:]", status);
   3890     if (U_FAILURE(status)) {
   3891         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
   3892         return;
   3893     }
   3894     alphabetic.freeze();
   3895 
   3896     UnicodeString testString;
   3897     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
   3898         const char *scriptName = uscript_getShortName((UScriptCode)i);
   3899         if (scriptName == NULL) {
   3900             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
   3901             return;
   3902         }
   3903 
   3904         UnicodeSet sample;
   3905         sample.applyPropertyAlias("script", scriptName, status);
   3906         if (U_FAILURE(status)) {
   3907             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
   3908             return;
   3909         }
   3910         sample.retainAll(alphabetic);
   3911         for (int32_t count=0; count<5; count++) {
   3912             UChar32 c = sample.charAt(count);
   3913             if (c == -1) {
   3914                 break;
   3915             }
   3916             testString.append(c);
   3917         }
   3918     }
   3919 
   3920     UParseError parseError;
   3921     Transliterator* anyLatin =
   3922         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
   3923     if (U_FAILURE(status)) {
   3924         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
   3925         return;
   3926     }
   3927 
   3928     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
   3929     anyLatin->transliterate(testString);
   3930     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
   3931     delete anyLatin;
   3932 }
   3933 
   3934 
   3935 /**
   3936  * Test the source and target set API.  These are only implemented
   3937  * for RBT and CompoundTransliterator at this time.
   3938  */
   3939 void TransliteratorTest::TestSourceTargetSet() {
   3940     UErrorCode ec = U_ZERO_ERROR;
   3941 
   3942     // Rules
   3943     const char* r =
   3944         "a > b; "
   3945         "r [x{lu}] > q;";
   3946 
   3947     // Expected source
   3948     UnicodeSet expSrc("[arx{lu}]", ec);
   3949 
   3950     // Expected target
   3951     UnicodeSet expTrg("[bq]", ec);
   3952 
   3953     UParseError pe;
   3954     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
   3955 
   3956     if (U_FAILURE(ec)) {
   3957         delete t;
   3958         errln("FAIL: Couldn't set up test");
   3959         return;
   3960     }
   3961 
   3962     UnicodeSet src; t->getSourceSet(src);
   3963     UnicodeSet trg; t->getTargetSet(trg);
   3964 
   3965     if (src == expSrc && trg == expTrg) {
   3966         UnicodeString a, b;
   3967         logln((UnicodeString)"Ok: " +
   3968               r + " => source = " + src.toPattern(a, TRUE) +
   3969               ", target = " + trg.toPattern(b, TRUE));
   3970     } else {
   3971         UnicodeString a, b, c, d;
   3972         errln((UnicodeString)"FAIL: " +
   3973               r + " => source = " + src.toPattern(a, TRUE) +
   3974               ", expected " + expSrc.toPattern(b, TRUE) +
   3975               "; target = " + trg.toPattern(c, TRUE) +
   3976               ", expected " + expTrg.toPattern(d, TRUE));
   3977     }
   3978 
   3979     delete t;
   3980 }
   3981 
   3982 /**
   3983  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
   3984  */
   3985 void TransliteratorTest::TestPatternWhiteSpace() {
   3986     // Rules
   3987     const char* r = "a > \\u200E b;";
   3988 
   3989     UErrorCode ec = U_ZERO_ERROR;
   3990     UParseError pe;
   3991     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
   3992 
   3993     if (U_FAILURE(ec)) {
   3994         errln("FAIL: Couldn't set up test");
   3995     } else {
   3996         expect(*t, "a", "b");
   3997     }
   3998     delete t;
   3999 
   4000     // UnicodeSet
   4001     ec = U_ZERO_ERROR;
   4002     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
   4003 
   4004     if (U_FAILURE(ec)) {
   4005         errln("FAIL: Couldn't set up test");
   4006     } else {
   4007         if (set.contains(0x200E)) {
   4008             errln("FAIL: U+200E not being ignored by UnicodeSet");
   4009         }
   4010     }
   4011 }
   4012 //======================================================================
   4013 // this method is in TestUScript.java
   4014 //======================================================================
   4015 void TransliteratorTest::TestAllCodepoints(){
   4016     UScriptCode code= USCRIPT_INVALID_CODE;
   4017     char id[256]={'\0'};
   4018     char abbr[256]={'\0'};
   4019     char newId[256]={'\0'};
   4020     char newAbbrId[256]={'\0'};
   4021     char oldId[256]={'\0'};
   4022     char oldAbbrId[256]={'\0'};
   4023 
   4024     UErrorCode status =U_ZERO_ERROR;
   4025     UParseError pe;
   4026 
   4027     for(uint32_t i = 0; i<=0x10ffff; i++){
   4028         code =  uscript_getScript(i,&status);
   4029         if(code == USCRIPT_INVALID_CODE){
   4030             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
   4031         }
   4032         const char* myId = uscript_getName(code);
   4033         if(!myId) {
   4034           dataerrln("Valid script code returned NULL name. Check your data!");
   4035           return;
   4036         }
   4037         uprv_strcpy(id,myId);
   4038         uprv_strcpy(abbr,uscript_getShortName(code));
   4039 
   4040         uprv_strcpy(newId,"[:");
   4041         uprv_strcat(newId,id);
   4042         uprv_strcat(newId,":];NFD");
   4043 
   4044         uprv_strcpy(newAbbrId,"[:");
   4045         uprv_strcat(newAbbrId,abbr);
   4046         uprv_strcat(newAbbrId,":];NFD");
   4047 
   4048         if(uprv_strcmp(newId,oldId)!=0){
   4049             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
   4050             if(t==NULL || U_FAILURE(status)){
   4051                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
   4052             }
   4053             delete t;
   4054         }
   4055         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
   4056             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
   4057             if(t==NULL || U_FAILURE(status)){
   4058                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
   4059             }
   4060             delete t;
   4061         }
   4062         uprv_strcpy(oldId,newId);
   4063         uprv_strcpy(oldAbbrId, newAbbrId);
   4064 
   4065     }
   4066 
   4067 }
   4068 
   4069 #define TEST_TRANSLIT_ID(id, cls) { \
   4070   UErrorCode ec = U_ZERO_ERROR; \
   4071   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
   4072   if (U_FAILURE(ec)) { \
   4073     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
   4074   } else { \
   4075     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
   4076       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
   4077     } \
   4078     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
   4079   } \
   4080   delete t; \
   4081 }
   4082 
   4083 #define TEST_TRANSLIT_RULE(rule, cls) { \
   4084   UErrorCode ec = U_ZERO_ERROR; \
   4085   UParseError pe; \
   4086   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
   4087   if (U_FAILURE(ec)) { \
   4088     errln("FAIL: Couldn't create " rule); \
   4089   } else { \
   4090     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
   4091       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
   4092     } \
   4093     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
   4094   } \
   4095   delete t; \
   4096 }
   4097 
   4098 void TransliteratorTest::TestBoilerplate() {
   4099     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
   4100     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
   4101     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
   4102     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
   4103     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
   4104     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
   4105     TEST_TRANSLIT_ID("Null", NullTransliterator);
   4106     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
   4107     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
   4108     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
   4109     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
   4110     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
   4111     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
   4112 }
   4113 
   4114 void TransliteratorTest::TestAlternateSyntax() {
   4115     // U+2206 == &
   4116     // U+2190 == <
   4117     // U+2192 == >
   4118     // U+2194 == <>
   4119     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
   4120            "abc",
   4121            "xbz");
   4122     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
   4123            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
   4124            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
   4125 }
   4126 
   4127 static const char* BEGIN_END_RULES[] = {
   4128     // [0]
   4129     "abc > xy;"
   4130     "aba > z;",
   4131 
   4132     // [1]
   4133 /*
   4134     "::BEGIN;"
   4135     "abc > xy;"
   4136     "::END;"
   4137     "::BEGIN;"
   4138     "aba > z;"
   4139     "::END;",
   4140 */
   4141     "", // test case commented out below, this is here to keep from messing up the indexes
   4142 
   4143     // [2]
   4144 /*
   4145     "abc > xy;"
   4146     "::BEGIN;"
   4147     "aba > z;"
   4148     "::END;",
   4149 */
   4150     "", // test case commented out below, this is here to keep from messing up the indexes
   4151 
   4152     // [3]
   4153 /*
   4154     "::BEGIN;"
   4155     "abc > xy;"
   4156     "::END;"
   4157     "aba > z;",
   4158 */
   4159     "", // test case commented out below, this is here to keep from messing up the indexes
   4160 
   4161     // [4]
   4162     "abc > xy;"
   4163     "::Null;"
   4164     "aba > z;",
   4165 
   4166     // [5]
   4167     "::Upper;"
   4168     "ABC > xy;"
   4169     "AB > x;"
   4170     "C > z;"
   4171     "::Upper;"
   4172     "XYZ > p;"
   4173     "XY > q;"
   4174     "Z > r;"
   4175     "::Upper;",
   4176 
   4177     // [6]
   4178     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4179     "$delim = [\\-$ws];"
   4180     "$ws $delim* > ' ';"
   4181     "'-' $delim* > '-';",
   4182 
   4183     // [7]
   4184     "::Null;"
   4185     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4186     "$delim = [\\-$ws];"
   4187     "$ws $delim* > ' ';"
   4188     "'-' $delim* > '-';",
   4189 
   4190     // [8]
   4191     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4192     "$delim = [\\-$ws];"
   4193     "$ws $delim* > ' ';"
   4194     "'-' $delim* > '-';"
   4195     "::Null;",
   4196 
   4197     // [9]
   4198     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4199     "$delim = [\\-$ws];"
   4200     "::Null;"
   4201     "$ws $delim* > ' ';"
   4202     "'-' $delim* > '-';",
   4203 
   4204     // [10]
   4205 /*
   4206     "::BEGIN;"
   4207     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4208     "$delim = [\\-$ws];"
   4209     "::END;"
   4210     "$ws $delim* > ' ';"
   4211     "'-' $delim* > '-';",
   4212 */
   4213     "", // test case commented out below, this is here to keep from messing up the indexes
   4214 
   4215     // [11]
   4216 /*
   4217     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4218     "$delim = [\\-$ws];"
   4219     "::BEGIN;"
   4220     "$ws $delim* > ' ';"
   4221     "'-' $delim* > '-';"
   4222     "::END;",
   4223 */
   4224     "", // test case commented out below, this is here to keep from messing up the indexes
   4225 
   4226     // [12]
   4227 /*
   4228     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4229     "$delim = [\\-$ws];"
   4230     "$ab = [ab];"
   4231     "::BEGIN;"
   4232     "$ws $delim* > ' ';"
   4233     "'-' $delim* > '-';"
   4234     "::END;"
   4235     "::BEGIN;"
   4236     "$ab { ' ' } $ab > '-';"
   4237     "c { ' ' > ;"
   4238     "::END;"
   4239     "::BEGIN;"
   4240     "'a-a' > a\\%|a;"
   4241     "::END;",
   4242 */
   4243     "", // test case commented out below, this is here to keep from messing up the indexes
   4244 
   4245     // [13]
   4246     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4247     "$delim = [\\-$ws];"
   4248     "$ab = [ab];"
   4249     "::Null;"
   4250     "$ws $delim* > ' ';"
   4251     "'-' $delim* > '-';"
   4252     "::Null;"
   4253     "$ab { ' ' } $ab > '-';"
   4254     "c { ' ' > ;"
   4255     "::Null;"
   4256     "'a-a' > a\\%|a;",
   4257 
   4258     // [14]
   4259 /*
   4260     "::[abc];"
   4261     "::BEGIN;"
   4262     "abc > xy;"
   4263     "::END;"
   4264     "::BEGIN;"
   4265     "aba > yz;"
   4266     "::END;"
   4267     "::Upper;",
   4268 */
   4269     "", // test case commented out below, this is here to keep from messing up the indexes
   4270 
   4271     // [15]
   4272     "::[abc];"
   4273     "abc > xy;"
   4274     "::Null;"
   4275     "aba > yz;"
   4276     "::Upper;",
   4277 
   4278     // [16]
   4279 /*
   4280     "::[abc];"
   4281     "::BEGIN;"
   4282     "abc <> xy;"
   4283     "::END;"
   4284     "::BEGIN;"
   4285     "aba <> yz;"
   4286     "::END;"
   4287     "::Upper(Lower);"
   4288     "::([XYZ]);"
   4289 */
   4290     "", // test case commented out below, this is here to keep from messing up the indexes
   4291 
   4292     // [17]
   4293     "::[abc];"
   4294     "abc <> xy;"
   4295     "::Null;"
   4296     "aba <> yz;"
   4297     "::Upper(Lower);"
   4298     "::([XYZ]);"
   4299 };
   4300 
   4301 /*
   4302 (This entire test is commented out below and will need some heavy revision when we re-add
   4303 the ::BEGIN/::END stuff)
   4304 static const char* BOGUS_BEGIN_END_RULES[] = {
   4305     // [7]
   4306     "::BEGIN;"
   4307     "abc > xy;"
   4308     "::BEGIN;"
   4309     "aba > z;"
   4310     "::END;"
   4311     "::END;",
   4312 
   4313     // [8]
   4314     "abc > xy;"
   4315     " aba > z;"
   4316     "::END;",
   4317 
   4318     // [9]
   4319     "::BEGIN;"
   4320     "::Upper;"
   4321     "::END;"
   4322 };
   4323 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
   4324 */
   4325 
   4326 static const char* BEGIN_END_TEST_CASES[] = {
   4327     // rules             input                   expected output
   4328     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
   4329 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
   4330 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
   4331 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
   4332     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
   4333     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
   4334 
   4335     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
   4336     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
   4337     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
   4338     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
   4339 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
   4340 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
   4341 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
   4342 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
   4343 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
   4344     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
   4345     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
   4346     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
   4347 
   4348 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
   4349     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
   4350 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
   4351     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
   4352 };
   4353 static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
   4354 
   4355 void TransliteratorTest::TestBeginEnd() {
   4356     // run through the list of test cases above
   4357     int32_t i = 0;
   4358     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
   4359         expect((UnicodeString)"Test case #" + (i / 3),
   4360                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
   4361                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
   4362                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
   4363     }
   4364 
   4365     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
   4366     UParseError parseError;
   4367     UErrorCode status = U_ZERO_ERROR;
   4368     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
   4369             UTRANS_REVERSE, parseError, status);
   4370     if (reversed == 0 || U_FAILURE(status)) {
   4371         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
   4372     } else {
   4373         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
   4374     }
   4375     delete reversed;
   4376 
   4377     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
   4378     // that all of them cause errors
   4379 /*
   4380 (commented out until we have the real ::BEGIN/::END stuff in place
   4381     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
   4382         UParseError parseError;
   4383         UErrorCode status = U_ZERO_ERROR;
   4384         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
   4385                 UTRANS_FORWARD, parseError, status);
   4386         if (!U_FAILURE(status)) {
   4387             delete t;
   4388             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
   4389         }
   4390     }
   4391 */
   4392 }
   4393 
   4394 void TransliteratorTest::TestBeginEndToRules() {
   4395     // run through the same list of test cases we used above, but this time, instead of just
   4396     // instantiating a Transliterator from the rules and running the test against it, we instantiate
   4397     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
   4398     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
   4399     // to (i.e., does the same thing as) the original rule set
   4400     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
   4401         UParseError parseError;
   4402         UErrorCode status = U_ZERO_ERROR;
   4403         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
   4404                 UTRANS_FORWARD, parseError, status);
   4405         if (U_FAILURE(status)) {
   4406             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
   4407         } else {
   4408             UnicodeString rules;
   4409             t->toRules(rules, TRUE);
   4410             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
   4411                     UTRANS_FORWARD, parseError, status);
   4412             if (U_FAILURE(status)) {
   4413                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
   4414                         parseError, status);
   4415                 delete t;
   4416             } else {
   4417                 expect(*t2,
   4418                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
   4419                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
   4420                 delete t;
   4421                 delete t2;
   4422             }
   4423         }
   4424     }
   4425 
   4426     // do the same thing for the reversible test case
   4427     UParseError parseError;
   4428     UErrorCode status = U_ZERO_ERROR;
   4429     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
   4430             UTRANS_REVERSE, parseError, status);
   4431     if (U_FAILURE(status)) {
   4432         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
   4433     } else {
   4434         UnicodeString rules;
   4435         reversed->toRules(rules, FALSE);
   4436         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
   4437                 parseError, status);
   4438         if (U_FAILURE(status)) {
   4439             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
   4440                     parseError, status);
   4441             delete reversed;
   4442         } else {
   4443             expect(*reversed2,
   4444                    UnicodeString("xy XY XYZ yz YZ"),
   4445                    UnicodeString("xy abc xaba yz aba"));
   4446             delete reversed;
   4447             delete reversed2;
   4448         }
   4449     }
   4450 }
   4451 
   4452 void TransliteratorTest::TestRegisterAlias() {
   4453     UnicodeString longID("Lower;[aeiou]Upper");
   4454     UnicodeString shortID("Any-CapVowels");
   4455     UnicodeString reallyShortID("CapVowels");
   4456 
   4457     Transliterator::registerAlias(shortID, longID);
   4458 
   4459     UErrorCode err = U_ZERO_ERROR;
   4460     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
   4461     if (U_FAILURE(err)) {
   4462         errln("Failed to instantiate transliterator with long ID");
   4463         Transliterator::unregister(shortID);
   4464         return;
   4465     }
   4466     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
   4467     if (U_FAILURE(err)) {
   4468         errln("Failed to instantiate transliterator with short ID");
   4469         delete t1;
   4470         Transliterator::unregister(shortID);
   4471         return;
   4472     }
   4473 
   4474     if (t1->getID() != longID)
   4475         errln("Transliterator instantiated with long ID doesn't have long ID");
   4476     if (t2->getID() != reallyShortID)
   4477         errln("Transliterator instantiated with short ID doesn't have short ID");
   4478 
   4479     UnicodeString rules1;
   4480     UnicodeString rules2;
   4481 
   4482     t1->toRules(rules1, TRUE);
   4483     t2->toRules(rules2, TRUE);
   4484     if (rules1 != rules2)
   4485         errln("Alias transliterators aren't the same");
   4486 
   4487     delete t1;
   4488     delete t2;
   4489     Transliterator::unregister(shortID);
   4490 
   4491     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
   4492     if (U_SUCCESS(err)) {
   4493         errln("Instantiation with short ID succeeded after short ID was unregistered");
   4494         delete t1;
   4495     }
   4496 
   4497     // try the same thing again, but this time with something other than
   4498     // an instance of CompoundTransliterator
   4499     UnicodeString realID("Latin-Greek");
   4500     UnicodeString fakeID("Latin-dlgkjdflkjdl");
   4501     Transliterator::registerAlias(fakeID, realID);
   4502 
   4503     err = U_ZERO_ERROR;
   4504     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
   4505     if (U_FAILURE(err)) {
   4506         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
   4507         Transliterator::unregister(realID);
   4508         return;
   4509     }
   4510     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
   4511     if (U_FAILURE(err)) {
   4512         errln("Failed to instantiate transliterator with fake ID");
   4513         delete t1;
   4514         Transliterator::unregister(realID);
   4515         return;
   4516     }
   4517 
   4518     t1->toRules(rules1, TRUE);
   4519     t2->toRules(rules2, TRUE);
   4520     if (rules1 != rules2)
   4521         errln("Alias transliterators aren't the same");
   4522 
   4523     delete t1;
   4524     delete t2;
   4525     Transliterator::unregister(fakeID);
   4526 }
   4527 
   4528 void TransliteratorTest::TestRuleStripping() {
   4529     /*
   4530 #
   4531 \uE001>\u0C01; # SIGN
   4532     */
   4533     static const UChar rule[] = {
   4534         0x0023,0x0020,0x000D,0x000A,
   4535         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
   4536     };
   4537     static const UChar expectedRule[] = {
   4538         0xE001,0x003E,0x0C01,0x003B,0
   4539     };
   4540     UChar result[UPRV_LENGTHOF(rule)];
   4541     UErrorCode status = U_ZERO_ERROR;
   4542     int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status);
   4543     if (len != u_strlen(expectedRule)) {
   4544         errln("utrans_stripRules return len = %d", len);
   4545     }
   4546     if (u_strncmp(expectedRule, result, len) != 0) {
   4547         errln("utrans_stripRules did not return expected string");
   4548     }
   4549 }
   4550 
   4551 /**
   4552  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
   4553  */
   4554 void TransliteratorTest::TestHalfwidthFullwidth(void) {
   4555     UParseError parseError;
   4556     UErrorCode status = U_ZERO_ERROR;
   4557     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
   4558     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
   4559     if (hf == 0 || fh == 0) {
   4560         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   4561         delete hf;
   4562         delete fh;
   4563         return;
   4564     }
   4565 
   4566     // Array of 2n items
   4567     // Each item is
   4568     //   "hf"|"fh"|"both",
   4569     //   <Halfwidth>,
   4570     //   <Fullwidth>
   4571     const char* DATA[] = {
   4572         "both",
   4573         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
   4574         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
   4575     };
   4576     int32_t DATA_length = UPRV_LENGTHOF(DATA);
   4577 
   4578     for (int32_t i=0; i<DATA_length; i+=3) {
   4579         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
   4580         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
   4581         switch (*DATA[i]) {
   4582         case 0x68: //'h': // Halfwidth-Fullwidth only
   4583             expect(*hf, h, f);
   4584             break;
   4585         case 0x66: //'f': // Fullwidth-Halfwidth only
   4586             expect(*fh, f, h);
   4587             break;
   4588         case 0x62: //'b': // both directions
   4589             expect(*hf, h, f);
   4590             expect(*fh, f, h);
   4591             break;
   4592         }
   4593     }
   4594     delete hf;
   4595     delete fh;
   4596 }
   4597 
   4598 
   4599     /**
   4600      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
   4601      *              TODO: confirm that the expected results are correct.
   4602      *              For now, test just confirms that C++ and Java give identical results.
   4603      */
   4604 void TransliteratorTest::TestThai(void) {
   4605 #if !UCONFIG_NO_BREAK_ITERATION
   4606     UParseError parseError;
   4607     UErrorCode status = U_ZERO_ERROR;
   4608     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
   4609     if (tr == 0) {
   4610         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   4611         return;
   4612     }
   4613     if (U_FAILURE(status)) {
   4614         errln("FAIL: createInstance failed with %s", u_errorName(status));
   4615         return;
   4616     }
   4617     const char *thaiText =
   4618         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
   4619         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
   4620         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
   4621         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
   4622         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
   4623         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
   4624         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
   4625         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
   4626         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
   4627         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
   4628         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
   4629         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
   4630         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
   4631         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
   4632         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
   4633         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
   4634         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
   4635         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
   4636         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
   4637         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
   4638         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
   4639         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
   4640         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
   4641         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
   4642         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
   4643         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
   4644         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
   4645         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
   4646         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
   4647         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
   4648 
   4649     const char *latinText =
   4650         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
   4651         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
   4652         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
   4653         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
   4654         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
   4655         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
   4656         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
   4657         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
   4658         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
   4659         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
   4660         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
   4661         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
   4662         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
   4663         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
   4664         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
   4665         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
   4666         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
   4667         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
   4668 
   4669 
   4670     UnicodeString  xlitText(thaiText);
   4671     xlitText = xlitText.unescape();
   4672     tr->transliterate(xlitText);
   4673 
   4674     UnicodeString expectedText(latinText);
   4675     expectedText = expectedText.unescape();
   4676     expect(*tr, xlitText, expectedText);
   4677 
   4678     delete tr;
   4679 #endif
   4680 }
   4681 
   4682 
   4683 //======================================================================
   4684 // Support methods
   4685 //======================================================================
   4686 void TransliteratorTest::expectT(const UnicodeString& id,
   4687                                  const UnicodeString& source,
   4688                                  const UnicodeString& expectedResult) {
   4689     UErrorCode ec = U_ZERO_ERROR;
   4690     UParseError pe;
   4691     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
   4692     if (U_FAILURE(ec)) {
   4693         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
   4694         delete t;
   4695         return;
   4696     }
   4697     expect(*t, source, expectedResult);
   4698     delete t;
   4699 }
   4700 
   4701 void TransliteratorTest::reportParseError(const UnicodeString& message,
   4702                                           const UParseError& parseError,
   4703                                           const UErrorCode& status) {
   4704     dataerrln(message +
   4705           /*", parse error " + parseError.code +*/
   4706           ", line " + parseError.line +
   4707           ", offset " + parseError.offset +
   4708           ", pre-context " + prettify(parseError.preContext, TRUE) +
   4709           ", post-context " + prettify(parseError.postContext,TRUE) +
   4710           ", Error: " + u_errorName(status));
   4711 }
   4712 
   4713 void TransliteratorTest::expect(const UnicodeString& rules,
   4714                                 const UnicodeString& source,
   4715                                 const UnicodeString& expectedResult,
   4716                                 UTransPosition *pos) {
   4717     expect("<ID>", rules, source, expectedResult, pos);
   4718 }
   4719 
   4720 void TransliteratorTest::expect(const UnicodeString& id,
   4721                                 const UnicodeString& rules,
   4722                                 const UnicodeString& source,
   4723                                 const UnicodeString& expectedResult,
   4724                                 UTransPosition *pos) {
   4725     UErrorCode status = U_ZERO_ERROR;
   4726     UParseError parseError;
   4727     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
   4728     if (U_FAILURE(status)) {
   4729         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
   4730     } else {
   4731         expect(*t, source, expectedResult, pos);
   4732     }
   4733     delete t;
   4734 }
   4735 
   4736 void TransliteratorTest::expect(const Transliterator& t,
   4737                                 const UnicodeString& source,
   4738                                 const UnicodeString& expectedResult,
   4739                                 const Transliterator& reverseTransliterator) {
   4740     expect(t, source, expectedResult);
   4741     expect(reverseTransliterator, expectedResult, source);
   4742 }
   4743 
   4744 void TransliteratorTest::expect(const Transliterator& t,
   4745                                 const UnicodeString& source,
   4746                                 const UnicodeString& expectedResult,
   4747                                 UTransPosition *pos) {
   4748     if (pos == 0) {
   4749         UnicodeString result(source);
   4750         t.transliterate(result);
   4751         expectAux(t.getID() + ":String", source, result, expectedResult);
   4752     }
   4753     UTransPosition index={0, 0, 0, 0};
   4754     if (pos != 0) {
   4755         index = *pos;
   4756     }
   4757 
   4758     UnicodeString rsource(source);
   4759     if (pos == 0) {
   4760         t.transliterate(rsource);
   4761     } else {
   4762         // Do it all at once -- below we do it incrementally
   4763         t.finishTransliteration(rsource, *pos);
   4764     }
   4765     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
   4766 
   4767     // Test keyboard (incremental) transliteration -- this result
   4768     // must be the same after we finalize (see below).
   4769     UnicodeString log;
   4770     rsource.remove();
   4771     if (pos != 0) {
   4772         rsource = source;
   4773         formatInput(log, rsource, index);
   4774         log.append(" -> ");
   4775         UErrorCode status = U_ZERO_ERROR;
   4776         t.transliterate(rsource, index, status);
   4777         formatInput(log, rsource, index);
   4778     } else {
   4779         for (int32_t i=0; i<source.length(); ++i) {
   4780             if (i != 0) {
   4781                 log.append(" + ");
   4782             }
   4783             log.append(source.charAt(i)).append(" -> ");
   4784             UErrorCode status = U_ZERO_ERROR;
   4785             t.transliterate(rsource, index, source.charAt(i), status);
   4786             formatInput(log, rsource, index);
   4787         }
   4788     }
   4789 
   4790     // As a final step in keyboard transliteration, we must call
   4791     // transliterate to finish off any pending partial matches that
   4792     // were waiting for more input.
   4793     t.finishTransliteration(rsource, index);
   4794     log.append(" => ").append(rsource);
   4795 
   4796     expectAux(t.getID() + ":Keyboard", log,
   4797               rsource == expectedResult,
   4798               expectedResult);
   4799 }
   4800 
   4801 
   4802 /**
   4803  * @param appendTo result is appended to this param.
   4804  * @param input the string being transliterated
   4805  * @param pos the index struct
   4806  */
   4807 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
   4808                                                const UnicodeString& input,
   4809                                                const UTransPosition& pos) {
   4810     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
   4811     // the {} indicate the context start and limit, and the ||
   4812     // indicate the start and limit.
   4813     if (0 <= pos.contextStart &&
   4814         pos.contextStart <= pos.start &&
   4815         pos.start <= pos.limit &&
   4816         pos.limit <= pos.contextLimit &&
   4817         pos.contextLimit <= input.length()) {
   4818 
   4819         UnicodeString a, b, c, d, e;
   4820         input.extractBetween(0, pos.contextStart, a);
   4821         input.extractBetween(pos.contextStart, pos.start, b);
   4822         input.extractBetween(pos.start, pos.limit, c);
   4823         input.extractBetween(pos.limit, pos.contextLimit, d);
   4824         input.extractBetween(pos.contextLimit, input.length(), e);
   4825         appendTo.append(a).append((UChar)123/*{*/).append(b).
   4826             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
   4827             append((UChar)125/*}*/).append(e);
   4828     } else {
   4829         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
   4830                         pos.contextStart + ", s=" + pos.start + ", l=" +
   4831                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
   4832                         input);
   4833     }
   4834     return appendTo;
   4835 }
   4836 
   4837 void TransliteratorTest::expectAux(const UnicodeString& tag,
   4838                                    const UnicodeString& source,
   4839                                    const UnicodeString& result,
   4840                                    const UnicodeString& expectedResult) {
   4841     expectAux(tag, source + " -> " + result,
   4842               result == expectedResult,
   4843               expectedResult);
   4844 }
   4845 
   4846 void TransliteratorTest::expectAux(const UnicodeString& tag,
   4847                                    const UnicodeString& summary, UBool pass,
   4848                                    const UnicodeString& expectedResult) {
   4849     if (pass) {
   4850         logln(UnicodeString("(")+tag+") " + prettify(summary));
   4851     } else {
   4852         dataerrln(UnicodeString("FAIL: (")+tag+") "
   4853               + prettify(summary)
   4854               + ", expected " + prettify(expectedResult));
   4855     }
   4856 }
   4857 
   4858 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
   4859