Home | History | Annotate | Download | only in intltest
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 1999-2011, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   Date        Name        Description
      7 *   11/10/99    aliu        Creation.
      8 **********************************************************************
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_TRANSLITERATION
     14 
     15 #include "transtst.h"
     16 #include "unicode/locid.h"
     17 #include "unicode/dtfmtsym.h"
     18 #include "unicode/normlzr.h"
     19 #include "unicode/translit.h"
     20 #include "unicode/uchar.h"
     21 #include "unicode/unifilt.h"
     22 #include "unicode/uniset.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/usetiter.h"
     25 #include "unicode/uscript.h"
     26 #include "unicode/utf16.h"
     27 #include "cpdtrans.h"
     28 #include "nultrans.h"
     29 #include "rbt.h"
     30 #include "rbt_pars.h"
     31 #include "anytrans.h"
     32 #include "esctrn.h"
     33 #include "name2uni.h"
     34 #include "nortrans.h"
     35 #include "remtrans.h"
     36 #include "titletrn.h"
     37 #include "tolowtrn.h"
     38 #include "toupptrn.h"
     39 #include "unesctrn.h"
     40 #include "uni2name.h"
     41 #include "cstring.h"
     42 #include "cmemory.h"
     43 #include <stdio.h>
     44 
     45 /***********************************************************************
     46 
     47                      HOW TO USE THIS TEST FILE
     48                                -or-
     49                   How I developed on two platforms
     50                 without losing (too much of) my mind
     51 
     52 
     53 1. Add new tests by copying/pasting/changing existing tests.  On Java,
     54    any public void method named Test...() taking no parameters becomes
     55    a test.  On C++, you need to modify the header and add a line to
     56    the runIndexedTest() dispatch method.
     57 
     58 2. Make liberal use of the expect() method; it is your friend.
     59 
     60 3. The tests in this file exactly match those in a sister file on the
     61    other side.  The two files are:
     62 
     63    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
     64    icu4c:  source/test/intltest/transtst.cpp
     65 
     66                   ==> THIS IS THE IMPORTANT PART <==
     67 
     68    When you add a test in this file, add it in TransliteratorTest.java
     69    too.  Give it the same name and put it in the same relative place.
     70    This makes maintenance a lot simpler for any poor soul who ends up
     71    trying to synchronize the tests between icu4j and icu4c.
     72 
     73 4. If you MUST enter a test that is NOT paralleled in the sister file,
     74    then add it in the special non-mirrored section.  These are
     75    labeled
     76 
     77      "icu4j ONLY"
     78 
     79    or
     80 
     81      "icu4c ONLY"
     82 
     83    Make sure you document the reason the test is here and not there.
     84 
     85 
     86 Thank you.
     87 The Management
     88 ***********************************************************************/
     89 
     90 // Define character constants thusly to be EBCDIC-friendly
     91 enum {
     92     LEFT_BRACE=((UChar)0x007B), /*{*/
     93     PIPE      =((UChar)0x007C), /*|*/
     94     ZERO      =((UChar)0x0030), /*0*/
     95     UPPER_A   =((UChar)0x0041)  /*A*/
     96 };
     97 
     98 TransliteratorTest::TransliteratorTest()
     99 :   DESERET_DEE((UChar32)0x10414),
    100     DESERET_dee((UChar32)0x1043C)
    101 {
    102 }
    103 
    104 TransliteratorTest::~TransliteratorTest() {}
    105 
    106 void
    107 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
    108                                    const char* &name, char* /*par*/) {
    109     switch (index) {
    110         TESTCASE(0,TestInstantiation);
    111         TESTCASE(1,TestSimpleRules);
    112         TESTCASE(2,TestRuleBasedInverse);
    113         TESTCASE(3,TestKeyboard);
    114         TESTCASE(4,TestKeyboard2);
    115         TESTCASE(5,TestKeyboard3);
    116         TESTCASE(6,TestArabic);
    117         TESTCASE(7,TestCompoundKana);
    118         TESTCASE(8,TestCompoundHex);
    119         TESTCASE(9,TestFiltering);
    120         TESTCASE(10,TestInlineSet);
    121         TESTCASE(11,TestPatternQuoting);
    122         TESTCASE(12,TestJ277);
    123         TESTCASE(13,TestJ243);
    124         TESTCASE(14,TestJ329);
    125         TESTCASE(15,TestSegments);
    126         TESTCASE(16,TestCursorOffset);
    127         TESTCASE(17,TestArbitraryVariableValues);
    128         TESTCASE(18,TestPositionHandling);
    129         TESTCASE(19,TestHiraganaKatakana);
    130         TESTCASE(20,TestCopyJ476);
    131         TESTCASE(21,TestAnchors);
    132         TESTCASE(22,TestInterIndic);
    133         TESTCASE(23,TestFilterIDs);
    134         TESTCASE(24,TestCaseMap);
    135         TESTCASE(25,TestNameMap);
    136         TESTCASE(26,TestLiberalizedID);
    137         TESTCASE(27,TestCreateInstance);
    138         TESTCASE(28,TestNormalizationTransliterator);
    139         TESTCASE(29,TestCompoundRBT);
    140         TESTCASE(30,TestCompoundFilter);
    141         TESTCASE(31,TestRemove);
    142         TESTCASE(32,TestToRules);
    143         TESTCASE(33,TestContext);
    144         TESTCASE(34,TestSupplemental);
    145         TESTCASE(35,TestQuantifier);
    146         TESTCASE(36,TestSTV);
    147         TESTCASE(37,TestCompoundInverse);
    148         TESTCASE(38,TestNFDChainRBT);
    149         TESTCASE(39,TestNullInverse);
    150         TESTCASE(40,TestAliasInverseID);
    151         TESTCASE(41,TestCompoundInverseID);
    152         TESTCASE(42,TestUndefinedVariable);
    153         TESTCASE(43,TestEmptyContext);
    154         TESTCASE(44,TestCompoundFilterID);
    155         TESTCASE(45,TestPropertySet);
    156         TESTCASE(46,TestNewEngine);
    157         TESTCASE(47,TestQuantifiedSegment);
    158         TESTCASE(48,TestDevanagariLatinRT);
    159         TESTCASE(49,TestTeluguLatinRT);
    160         TESTCASE(50,TestCompoundLatinRT);
    161         TESTCASE(51,TestSanskritLatinRT);
    162         TESTCASE(52,TestLocaleInstantiation);
    163         TESTCASE(53,TestTitleAccents);
    164         TESTCASE(54,TestLocaleResource);
    165         TESTCASE(55,TestParseError);
    166         TESTCASE(56,TestOutputSet);
    167         TESTCASE(57,TestVariableRange);
    168         TESTCASE(58,TestInvalidPostContext);
    169         TESTCASE(59,TestIDForms);
    170         TESTCASE(60,TestToRulesMark);
    171         TESTCASE(61,TestEscape);
    172         TESTCASE(62,TestAnchorMasking);
    173         TESTCASE(63,TestDisplayName);
    174         TESTCASE(64,TestSpecialCases);
    175 #if !UCONFIG_NO_FILE_IO
    176         TESTCASE(65,TestIncrementalProgress);
    177 #endif
    178         TESTCASE(66,TestSurrogateCasing);
    179         TESTCASE(67,TestFunction);
    180         TESTCASE(68,TestInvalidBackRef);
    181         TESTCASE(69,TestMulticharStringSet);
    182         TESTCASE(70,TestUserFunction);
    183         TESTCASE(71,TestAnyX);
    184         TESTCASE(72,TestSourceTargetSet);
    185         TESTCASE(73,TestGurmukhiDevanagari);
    186         TESTCASE(74,TestPatternWhiteSpace);
    187         TESTCASE(75,TestAllCodepoints);
    188         TESTCASE(76,TestBoilerplate);
    189         TESTCASE(77,TestAlternateSyntax);
    190         TESTCASE(78,TestBeginEnd);
    191         TESTCASE(79,TestBeginEndToRules);
    192         TESTCASE(80,TestRegisterAlias);
    193         TESTCASE(81,TestRuleStripping);
    194         TESTCASE(82,TestHalfwidthFullwidth);
    195         TESTCASE(83,TestThai);
    196         TESTCASE(84,TestAny);
    197         default: name = ""; break;
    198     }
    199 }
    200 
    201 static const UVersionInfo ICU_39 = {3,9,4,0};
    202 /**
    203  * Make sure every system transliterator can be instantiated.
    204  *
    205  * ALSO test that the result of toRules() for each rule is a valid
    206  * rule.  Do this here so we don't have to have another test that
    207  * instantiates everything as well.
    208  */
    209 void TransliteratorTest::TestInstantiation() {
    210     UErrorCode ec = U_ZERO_ERROR;
    211     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
    212     assertSuccess("getAvailableIDs()", ec);
    213     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
    214     int32_t n = Transliterator::countAvailableIDs();
    215     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
    216                avail->count(ec) == n);
    217     assertSuccess("count()", ec);
    218     UnicodeString name;
    219     for (int32_t i=0; i<n; ++i) {
    220         const UnicodeString& id = *avail->snext(ec);
    221         if (!assertSuccess("snext()", ec) ||
    222             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
    223             break;
    224         }
    225         UnicodeString id2 = Transliterator::getAvailableID(i);
    226         if (id.length() < 1) {
    227             errln(UnicodeString("FAIL: getAvailableID(") +
    228                   i + ") returned empty string");
    229             continue;
    230         }
    231         if (id != id2) {
    232             errln(UnicodeString("FAIL: getAvailableID(") +
    233                   i + ") != getAvailableIDs().snext()");
    234             continue;
    235         }
    236         UParseError parseError;
    237         UErrorCode status = U_ZERO_ERROR;
    238         Transliterator* t = Transliterator::createInstance(id,
    239                               UTRANS_FORWARD, parseError,status);
    240         name.truncate(0);
    241         Transliterator::getDisplayName(id, name);
    242         if (t == 0) {
    243 #if UCONFIG_NO_BREAK_ITERATION
    244             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
    245             if (id.compare((UnicodeString)"Thai-Latin") != 0)
    246 #endif
    247                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
    248                       /*", parse error " + parseError.code +*/
    249                       ", line " + parseError.line +
    250                       ", offset " + parseError.offset +
    251                       ", pre-context " + prettify(parseError.preContext, TRUE) +
    252                       ", post-context " +prettify(parseError.postContext,TRUE) +
    253                       ", Error: " + u_errorName(status));
    254                 // When createInstance fails, it deletes the failing
    255                 // entry from the available ID list.  We detect this
    256                 // here by looking for a change in countAvailableIDs.
    257             int32_t nn = Transliterator::countAvailableIDs();
    258             if (nn == (n - 1)) {
    259                 n = nn;
    260                 --i; // Compensate for deleted entry
    261             }
    262         } else {
    263             logln(UnicodeString("OK: ") + name + " (" + id + ")");
    264 
    265             // Now test toRules
    266             UnicodeString rules;
    267             t->toRules(rules, TRUE);
    268             Transliterator *u = Transliterator::createFromRules("x",
    269                                     rules, UTRANS_FORWARD, parseError,status);
    270             if (u == 0) {
    271                 errln(UnicodeString("FAIL: ") + id +
    272                       ".createFromRules() => bad rules" +
    273                       /*", parse error " + parseError.code +*/
    274                       ", line " + parseError.line +
    275                       ", offset " + parseError.offset +
    276                       ", context " + prettify(parseError.preContext, TRUE) +
    277                       ", rules: " + prettify(rules, TRUE));
    278             } else {
    279                 delete u;
    280             }
    281             delete t;
    282         }
    283     }
    284     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
    285     assertSuccess("snext()", ec);
    286     delete avail;
    287 
    288     // Now test the failure path
    289     UParseError parseError;
    290     UErrorCode status = U_ZERO_ERROR;
    291     UnicodeString id("<Not a valid Transliterator ID>");
    292     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
    293     if (t != 0) {
    294         errln("FAIL: " + id + " returned a transliterator");
    295         delete t;
    296     } else {
    297         logln("OK: Bogus ID handled properly");
    298     }
    299 }
    300 
    301 void TransliteratorTest::TestSimpleRules(void) {
    302     /* Example: rules 1. ab>x|y
    303      *                2. yc>z
    304      *
    305      * []|eabcd  start - no match, copy e to tranlated buffer
    306      * [e]|abcd  match rule 1 - copy output & adjust cursor
    307      * [ex|y]cd  match rule 2 - copy output & adjust cursor
    308      * [exz]|d   no match, copy d to transliterated buffer
    309      * [exzd]|   done
    310      */
    311     expect(UnicodeString("ab>x|y;", "") +
    312            "yc>z",
    313            "eabcd", "exzd");
    314 
    315     /* Another set of rules:
    316      *    1. ab>x|yzacw
    317      *    2. za>q
    318      *    3. qc>r
    319      *    4. cw>n
    320      *
    321      * []|ab       Rule 1
    322      * [x|yzacw]   No match
    323      * [xy|zacw]   Rule 2
    324      * [xyq|cw]    Rule 4
    325      * [xyqn]|     Done
    326      */
    327     expect(UnicodeString("ab>x|yzacw;") +
    328            "za>q;" +
    329            "qc>r;" +
    330            "cw>n",
    331            "ab", "xyqn");
    332 
    333     /* Test categories
    334      */
    335     UErrorCode status = U_ZERO_ERROR;
    336     UParseError parseError;
    337     Transliterator *t = Transliterator::createFromRules(
    338         "<ID>",
    339         UnicodeString("$dummy=").append((UChar)0xE100) +
    340         UnicodeString(";"
    341                       "$vowel=[aeiouAEIOU];"
    342                       "$lu=[:Lu:];"
    343                       "$vowel } $lu > '!';"
    344                       "$vowel > '&';"
    345                       "'!' { $lu > '^';"
    346                       "$lu > '*';"
    347                       "a > ERROR", ""),
    348         UTRANS_FORWARD, parseError,
    349         status);
    350     if (U_FAILURE(status)) {
    351         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
    352         return;
    353     }
    354     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
    355     delete t;
    356 }
    357 
    358 /**
    359  * Test inline set syntax and set variable syntax.
    360  */
    361 void TransliteratorTest::TestInlineSet(void) {
    362     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
    363     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
    364 
    365     expect(UnicodeString(
    366            "$digit = [0-9];"
    367            "$alpha = [a-zA-Z];"
    368            "$alphanumeric = [$digit $alpha];" // ***
    369            "$special = [^$alphanumeric];"     // ***
    370            "$alphanumeric > '-';"
    371            "$special > '*';", ""),
    372 
    373            "thx-1138", "---*----");
    374 }
    375 
    376 /**
    377  * Create some inverses and confirm that they work.  We have to be
    378  * careful how we do this, since the inverses will not be true
    379  * inverses -- we can't throw any random string at the composition
    380  * of the transliterators and expect the identity function.  F x
    381  * F' != I.  However, if we are careful about the input, we will
    382  * get the expected results.
    383  */
    384 void TransliteratorTest::TestRuleBasedInverse(void) {
    385     UnicodeString RULES =
    386         UnicodeString("abc>zyx;") +
    387         "ab>yz;" +
    388         "bc>zx;" +
    389         "ca>xy;" +
    390         "a>x;" +
    391         "b>y;" +
    392         "c>z;" +
    393 
    394         "abc<zyx;" +
    395         "ab<yz;" +
    396         "bc<zx;" +
    397         "ca<xy;" +
    398         "a<x;" +
    399         "b<y;" +
    400         "c<z;" +
    401 
    402         "";
    403 
    404     const char* DATA[] = {
    405         // Careful here -- random strings will not work.  If we keep
    406         // the left side to the domain and the right side to the range
    407         // we will be okay though (left, abc; right xyz).
    408         "a", "x",
    409         "abcacab", "zyxxxyy",
    410         "caccb", "xyzzy",
    411     };
    412 
    413     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
    414 
    415     UErrorCode status = U_ZERO_ERROR;
    416     UParseError parseError;
    417     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
    418                                 UTRANS_FORWARD, parseError, status);
    419     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
    420                                 UTRANS_REVERSE, parseError, status);
    421     if (U_FAILURE(status)) {
    422         errln("FAIL: RBT constructor failed");
    423         return;
    424     }
    425     for (int32_t i=0; i<DATA_length; i+=2) {
    426         expect(*fwd, DATA[i], DATA[i+1]);
    427         expect(*rev, DATA[i+1], DATA[i]);
    428     }
    429     delete fwd;
    430     delete rev;
    431 }
    432 
    433 /**
    434  * Basic test of keyboard.
    435  */
    436 void TransliteratorTest::TestKeyboard(void) {
    437     UParseError parseError;
    438     UErrorCode status = U_ZERO_ERROR;
    439     Transliterator *t = Transliterator::createFromRules("<ID>",
    440                               UnicodeString("psch>Y;")
    441                               +"ps>y;"
    442                               +"ch>x;"
    443                               +"a>A;",
    444                               UTRANS_FORWARD, parseError,
    445                               status);
    446     if (U_FAILURE(status)) {
    447         errln("FAIL: RBT constructor failed");
    448         return;
    449     }
    450     const char* DATA[] = {
    451         // insertion, buffer
    452         "a", "A",
    453         "p", "Ap",
    454         "s", "Aps",
    455         "c", "Apsc",
    456         "a", "AycA",
    457         "psch", "AycAY",
    458         0, "AycAY", // null means finishKeyboardTransliteration
    459     };
    460 
    461     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
    462     delete t;
    463 }
    464 
    465 /**
    466  * Basic test of keyboard with cursor.
    467  */
    468 void TransliteratorTest::TestKeyboard2(void) {
    469     UParseError parseError;
    470     UErrorCode status = U_ZERO_ERROR;
    471     Transliterator *t = Transliterator::createFromRules("<ID>",
    472                               UnicodeString("ych>Y;")
    473                               +"ps>|y;"
    474                               +"ch>x;"
    475                               +"a>A;",
    476                               UTRANS_FORWARD, parseError,
    477                               status);
    478     if (U_FAILURE(status)) {
    479         errln("FAIL: RBT constructor failed");
    480         return;
    481     }
    482     const char* DATA[] = {
    483         // insertion, buffer
    484         "a", "A",
    485         "p", "Ap",
    486         "s", "Aps", // modified for rollback - "Ay",
    487         "c", "Apsc", // modified for rollback - "Ayc",
    488         "a", "AycA",
    489         "p", "AycAp",
    490         "s", "AycAps", // modified for rollback - "AycAy",
    491         "c", "AycApsc", // modified for rollback - "AycAyc",
    492         "h", "AycAY",
    493         0, "AycAY", // null means finishKeyboardTransliteration
    494     };
    495 
    496     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
    497     delete t;
    498 }
    499 
    500 /**
    501  * Test keyboard transliteration with back-replacement.
    502  */
    503 void TransliteratorTest::TestKeyboard3(void) {
    504     // We want th>z but t>y.  Furthermore, during keyboard
    505     // transliteration we want t>y then yh>z if t, then h are
    506     // typed.
    507     UnicodeString RULES("t>|y;"
    508                         "yh>z;");
    509 
    510     const char* DATA[] = {
    511         // Column 1: characters to add to buffer (as if typed)
    512         // Column 2: expected appearance of buffer after
    513         //           keyboard xliteration.
    514         "a", "a",
    515         "b", "ab",
    516         "t", "abt", // modified for rollback - "aby",
    517         "c", "abyc",
    518         "t", "abyct", // modified for rollback - "abycy",
    519         "h", "abycz",
    520         0, "abycz", // null means finishKeyboardTransliteration
    521     };
    522 
    523     UParseError parseError;
    524     UErrorCode status = U_ZERO_ERROR;
    525     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
    526     if (U_FAILURE(status)) {
    527         errln("FAIL: RBT constructor failed");
    528         return;
    529     }
    530     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
    531     delete t;
    532 }
    533 
    534 void TransliteratorTest::keyboardAux(const Transliterator& t,
    535                                      const char* DATA[], int32_t DATA_length) {
    536     UErrorCode status = U_ZERO_ERROR;
    537     UTransPosition index={0, 0, 0, 0};
    538     UnicodeString s;
    539     for (int32_t i=0; i<DATA_length; i+=2) {
    540         UnicodeString log;
    541         if (DATA[i] != 0) {
    542             log = s + " + "
    543                 + DATA[i]
    544                 + " -> ";
    545             t.transliterate(s, index, DATA[i], status);
    546         } else {
    547             log = s + " => ";
    548             t.finishTransliteration(s, index);
    549         }
    550         // Show the start index '{' and the cursor '|'
    551         UnicodeString a, b, c;
    552         s.extractBetween(0, index.contextStart, a);
    553         s.extractBetween(index.contextStart, index.start, b);
    554         s.extractBetween(index.start, s.length(), c);
    555         log.append(a).
    556             append((UChar)LEFT_BRACE).
    557             append(b).
    558             append((UChar)PIPE).
    559             append(c);
    560         if (s == DATA[i+1] && U_SUCCESS(status)) {
    561             logln(log);
    562         } else {
    563             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
    564         }
    565     }
    566 }
    567 
    568 void TransliteratorTest::TestArabic(void) {
    569 // Test disabled for 2.0 until new Arabic transliterator can be written.
    570 //    /*
    571 //    const char* DATA[] = {
    572 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
    573 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
    574 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
    575 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
    576 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
    577 //                  "\u062c\u0645\u064a\u0644\u0629",
    578 //    };
    579 //    */
    580 //
    581 //    UChar ar_raw[] = {
    582 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
    583 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
    584 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
    585 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
    586 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
    587 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
    588 //    };
    589 //    UnicodeString ar(ar_raw);
    590 //    UErrorCode status=U_ZERO_ERROR;
    591 //    UParseError parseError;
    592 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
    593 //    if (t == 0) {
    594 //        errln("FAIL: createInstance failed");
    595 //        return;
    596 //    }
    597 //    expect(*t, "Arabic", ar);
    598 //    delete t;
    599 }
    600 
    601 /**
    602  * Compose the Kana transliterator forward and reverse and try
    603  * some strings that should come out unchanged.
    604  */
    605 void TransliteratorTest::TestCompoundKana(void) {
    606     UParseError parseError;
    607     UErrorCode status = U_ZERO_ERROR;
    608     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
    609     if (t == 0) {
    610         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
    611     } else {
    612         expect(*t, "aaaaa", "aaaaa");
    613         delete t;
    614     }
    615 }
    616 
    617 /**
    618  * Compose the hex transliterators forward and reverse.
    619  */
    620 void TransliteratorTest::TestCompoundHex(void) {
    621     UParseError parseError;
    622     UErrorCode status = U_ZERO_ERROR;
    623     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
    624     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
    625     Transliterator* transab[] = { a, b };
    626     Transliterator* transba[] = { b, a };
    627     if (a == 0 || b == 0) {
    628         errln("FAIL: construction failed");
    629         delete a;
    630         delete b;
    631         return;
    632     }
    633     // Do some basic tests of a
    634     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
    635     // Do some basic tests of b
    636     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
    637 
    638     Transliterator* ab = new CompoundTransliterator(transab, 2);
    639     UnicodeString s("abcde", "");
    640     expect(*ab, s, s);
    641 
    642     UnicodeString str(s);
    643     a->transliterate(str);
    644     Transliterator* ba = new CompoundTransliterator(transba, 2);
    645     expect(*ba, str, str);
    646 
    647     delete ab;
    648     delete ba;
    649     delete a;
    650     delete b;
    651 }
    652 
    653 int gTestFilterClassID = 0;
    654 /**
    655  * Used by TestFiltering().
    656  */
    657 class TestFilter : public UnicodeFilter {
    658     virtual UnicodeFunctor* clone() const {
    659         return new TestFilter(*this);
    660     }
    661     virtual UBool contains(UChar32 c) const {
    662         return c != (UChar)0x0063 /*c*/;
    663     }
    664     // Stubs
    665     virtual UnicodeString& toPattern(UnicodeString& result,
    666                                      UBool /*escapeUnprintable*/) const {
    667         return result;
    668     }
    669     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
    670         return FALSE;
    671     }
    672     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
    673 public:
    674     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
    675 };
    676 
    677 /**
    678  * Do some basic tests of filtering.
    679  */
    680 void TransliteratorTest::TestFiltering(void) {
    681     UParseError parseError;
    682     UErrorCode status = U_ZERO_ERROR;
    683     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
    684     if (hex == 0) {
    685         errln("FAIL: createInstance(Any-Hex) failed");
    686         return;
    687     }
    688     hex->adoptFilter(new TestFilter());
    689     UnicodeString s("abcde");
    690     hex->transliterate(s);
    691     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
    692     if (s == exp) {
    693         logln(UnicodeString("Ok:   \"") + exp + "\"");
    694     } else {
    695         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
    696     }
    697 
    698     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
    699     UnicodeFilter *f = hex->orphanFilter();
    700     if (f == NULL){
    701         errln("FAIL: orphanFilter() should get a UnicodeFilter");
    702     } else {
    703         delete f;
    704     }
    705     delete hex;
    706 }
    707 
    708 /**
    709  * Test anchors
    710  */
    711 void TransliteratorTest::TestAnchors(void) {
    712     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
    713            "aaa",
    714            "012");
    715     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
    716            "aaa",
    717            "012");
    718     expect(UnicodeString("^ab  > 01 ;"
    719            " ab  > |8 ;"
    720            "  b  > k ;"
    721            " 8x$ > 45 ;"
    722            " 8x  > 77 ;", ""),
    723 
    724            "ababbabxabx",
    725            "018k7745");
    726     expect(UnicodeString("$s = [z$] ;"
    727            "$s{ab    > 01 ;"
    728            "   ab    > |8 ;"
    729            "    b    > k ;"
    730            "   8x}$s > 45 ;"
    731            "   8x    > 77 ;", ""),
    732 
    733            "abzababbabxzabxabx",
    734            "01z018k45z01x45");
    735 }
    736 
    737 /**
    738  * Test pattern quoting and escape mechanisms.
    739  */
    740 void TransliteratorTest::TestPatternQuoting(void) {
    741     // Array of 3n items
    742     // Each item is <rules>, <input>, <expected output>
    743     const UnicodeString DATA[] = {
    744         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
    745         UnicodeString(UChar(0x4E01)),
    746         "[male adult]"
    747     };
    748 
    749     for (int32_t i=0; i<3; i+=3) {
    750         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
    751         UParseError parseError;
    752         UErrorCode status = U_ZERO_ERROR;
    753         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
    754         if (U_FAILURE(status)) {
    755             errln("RBT constructor failed");
    756         } else {
    757             expect(*t, DATA[i+1], DATA[i+2]);
    758         }
    759         delete t;
    760     }
    761 }
    762 
    763 /**
    764  * Regression test for bugs found in Greek transliteration.
    765  */
    766 void TransliteratorTest::TestJ277(void) {
    767     UErrorCode status = U_ZERO_ERROR;
    768     UParseError parseError;
    769     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
    770     if (gl == NULL) {
    771         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
    772         return;
    773     }
    774 
    775     UChar sigma = 0x3C3;
    776     UChar upsilon = 0x3C5;
    777     UChar nu = 0x3BD;
    778 //    UChar PHI = 0x3A6;
    779     UChar alpha = 0x3B1;
    780 //    UChar omega = 0x3C9;
    781 //    UChar omicron = 0x3BF;
    782 //    UChar epsilon = 0x3B5;
    783 
    784     // sigma upsilon nu -> syn
    785     UnicodeString syn;
    786     syn.append(sigma).append(upsilon).append(nu);
    787     expect(*gl, syn, "syn");
    788 
    789     // sigma alpha upsilon nu -> saun
    790     UnicodeString sayn;
    791     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
    792     expect(*gl, sayn, "saun");
    793 
    794     // Again, using a smaller rule set
    795     UnicodeString rules(
    796                 "$alpha   = \\u03B1;"
    797                 "$nu      = \\u03BD;"
    798                 "$sigma   = \\u03C3;"
    799                 "$ypsilon = \\u03C5;"
    800                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
    801                 "s <>           $sigma;"
    802                 "a <>           $alpha;"
    803                 "u <>  $vowel { $ypsilon;"
    804                 "y <>           $ypsilon;"
    805                 "n <>           $nu;",
    806                 "");
    807     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
    808     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
    809     expect(*mini, syn, "syn");
    810     expect(*mini, sayn, "saun");
    811     delete mini;
    812     mini = NULL;
    813 
    814 #if !UCONFIG_NO_FORMATTING
    815     // Transliterate the Greek locale data
    816     Locale el("el");
    817     DateFormatSymbols syms(el, status);
    818     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
    819     int32_t i, count;
    820     const UnicodeString* data = syms.getMonths(count);
    821     for (i=0; i<count; ++i) {
    822         if (data[i].length() == 0) {
    823             continue;
    824         }
    825         UnicodeString out(data[i]);
    826         gl->transliterate(out);
    827         UBool ok = TRUE;
    828         if (data[i].length() >= 2 && out.length() >= 2 &&
    829             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
    830             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
    831                 ok = FALSE;
    832             }
    833         }
    834         if (ok) {
    835             logln(prettify(data[i] + " -> " + out));
    836         } else {
    837             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
    838         }
    839     }
    840 #endif
    841 
    842     delete gl;
    843 }
    844 
    845 /**
    846  * Prefix, suffix support in hex transliterators
    847  */
    848 void TransliteratorTest::TestJ243(void) {
    849     UErrorCode ec = U_ZERO_ERROR;
    850 
    851     // Test default Hex-Any, which should handle
    852     // \u, \U, u+, and U+
    853     Transliterator *hex =
    854         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
    855     if (assertSuccess("getInstance", ec)) {
    856         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
    857     }
    858     delete hex;
    859 
    860 //    // Try a custom Hex-Unicode
    861 //    // \uXXXX and &#xXXXX;
    862 //    ec = U_ZERO_ERROR;
    863 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
    864 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
    865 //           "abcd5fx012&#x00033;");
    866 //    // Try custom Any-Hex (default is tested elsewhere)
    867 //    ec = U_ZERO_ERROR;
    868 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
    869 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
    870 }
    871 
    872 /**
    873  * Parsers need better syntax error messages.
    874  */
    875 void TransliteratorTest::TestJ329(void) {
    876 
    877     struct { UBool containsErrors; const char* rule; } DATA[] = {
    878         { FALSE, "a > b; c > d" },
    879         { TRUE,  "a > b; no operator; c > d" },
    880     };
    881     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
    882 
    883     for (int32_t i=0; i<DATA_length; ++i) {
    884         UErrorCode status = U_ZERO_ERROR;
    885         UParseError parseError;
    886         Transliterator *rbt = Transliterator::createFromRules("<ID>",
    887                                     DATA[i].rule,
    888                                     UTRANS_FORWARD,
    889                                     parseError,
    890                                     status);
    891         UBool gotError = U_FAILURE(status);
    892         UnicodeString desc(DATA[i].rule);
    893         desc.append(gotError ? " -> error" : " -> no error");
    894         if (gotError) {
    895             desc = desc + ", ParseError code=" + u_errorName(status) +
    896                 " line=" + parseError.line +
    897                 " offset=" + parseError.offset +
    898                 " context=" + parseError.preContext;
    899         }
    900         if (gotError == DATA[i].containsErrors) {
    901             logln(UnicodeString("Ok:   ") + desc);
    902         } else {
    903             errln(UnicodeString("FAIL: ") + desc);
    904         }
    905         delete rbt;
    906     }
    907 }
    908 
    909 /**
    910  * Test segments and segment references.
    911  */
    912 void TransliteratorTest::TestSegments(void) {
    913     // Array of 3n items
    914     // Each item is <rules>, <input>, <expected output>
    915     UnicodeString DATA[] = {
    916         "([a-z]) '.' ([0-9]) > $2 '-' $1",
    917         "abc.123.xyz.456",
    918         "ab1-c23.xy4-z56",
    919 
    920         // nested
    921         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
    922         "a1 b2",
    923         "a1.a.1 b2.b.2",
    924     };
    925     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
    926 
    927     for (int32_t i=0; i<DATA_length; i+=3) {
    928         logln("Pattern: " + prettify(DATA[i]));
    929         UParseError parseError;
    930         UErrorCode status = U_ZERO_ERROR;
    931         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
    932         if (U_FAILURE(status)) {
    933             errln("FAIL: RBT constructor");
    934         } else {
    935             expect(*t, DATA[i+1], DATA[i+2]);
    936         }
    937         delete t;
    938     }
    939 }
    940 
    941 /**
    942  * Test cursor positioning outside of the key
    943  */
    944 void TransliteratorTest::TestCursorOffset(void) {
    945     // Array of 3n items
    946     // Each item is <rules>, <input>, <expected output>
    947     UnicodeString DATA[] = {
    948         "pre {alpha} post > | @ ALPHA ;"
    949         "eALPHA > beta ;"
    950         "pre {beta} post > BETA @@ | ;"
    951         "post > xyz",
    952 
    953         "prealphapost prebetapost",
    954 
    955         "prbetaxyz preBETApost",
    956     };
    957     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
    958 
    959     for (int32_t i=0; i<DATA_length; i+=3) {
    960         logln("Pattern: " + prettify(DATA[i]));
    961         UParseError parseError;
    962         UErrorCode status = U_ZERO_ERROR;
    963         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
    964         if (U_FAILURE(status)) {
    965             errln("FAIL: RBT constructor");
    966         } else {
    967             expect(*t, DATA[i+1], DATA[i+2]);
    968         }
    969         delete t;
    970     }
    971 }
    972 
    973 /**
    974  * Test zero length and > 1 char length variable values.  Test
    975  * use of variable refs in UnicodeSets.
    976  */
    977 void TransliteratorTest::TestArbitraryVariableValues(void) {
    978     // Array of 3n items
    979     // Each item is <rules>, <input>, <expected output>
    980     UnicodeString DATA[] = {
    981         "$abe = ab;"
    982         "$pat = x[yY]z;"
    983         "$ll  = 'a-z';"
    984         "$llZ = [$ll];"
    985         "$llY = [$ll$pat];"
    986         "$emp = ;"
    987 
    988         "$abe > ABE;"
    989         "$pat > END;"
    990         "$llZ > 1;"
    991         "$llY > 2;"
    992         "7$emp 8 > 9;"
    993         "",
    994 
    995         "ab xYzxyz stY78",
    996         "ABE ENDEND 1129",
    997     };
    998     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
    999 
   1000     for (int32_t i=0; i<DATA_length; i+=3) {
   1001         logln("Pattern: " + prettify(DATA[i]));
   1002         UParseError parseError;
   1003         UErrorCode status = U_ZERO_ERROR;
   1004         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
   1005         if (U_FAILURE(status)) {
   1006             errln("FAIL: RBT constructor");
   1007         } else {
   1008             expect(*t, DATA[i+1], DATA[i+2]);
   1009         }
   1010         delete t;
   1011     }
   1012 }
   1013 
   1014 /**
   1015  * Confirm that the contextStart, contextLimit, start, and limit
   1016  * behave correctly. J474.
   1017  */
   1018 void TransliteratorTest::TestPositionHandling(void) {
   1019     // Array of 3n items
   1020     // Each item is <rules>, <input>, <expected output>
   1021     const char* DATA[] = {
   1022         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
   1023         "xtat txtb", // pos 0,9,0,9
   1024         "xTTaSS TTxUUb",
   1025 
   1026         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
   1027         "xtat txtb", // pos 2,9,3,8
   1028         "xtaSS TTxUUb",
   1029 
   1030         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
   1031         "xtat txtb", // pos 3,8,3,8
   1032         "xtaTT TTxTTb",
   1033     };
   1034 
   1035     // Array of 4n positions -- these go with the DATA array
   1036     // They are: contextStart, contextLimit, start, limit
   1037     int32_t POS[] = {
   1038         0, 9, 0, 9,
   1039         2, 9, 3, 8,
   1040         3, 8, 3, 8,
   1041     };
   1042 
   1043     int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
   1044     for (int32_t i=0; i<n; i++) {
   1045         UErrorCode status = U_ZERO_ERROR;
   1046         UParseError parseError;
   1047         Transliterator *t = Transliterator::createFromRules("<ID>",
   1048                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
   1049         if (U_FAILURE(status)) {
   1050             delete t;
   1051             errln("FAIL: RBT constructor");
   1052             return;
   1053         }
   1054         UTransPosition pos;
   1055         pos.contextStart= POS[4*i];
   1056         pos.contextLimit = POS[4*i+1];
   1057         pos.start = POS[4*i+2];
   1058         pos.limit = POS[4*i+3];
   1059         UnicodeString rsource(DATA[3*i+1]);
   1060         t->transliterate(rsource, pos, status);
   1061         if (U_FAILURE(status)) {
   1062             delete t;
   1063             errln("FAIL: transliterate");
   1064             return;
   1065         }
   1066         t->finishTransliteration(rsource, pos);
   1067         expectAux(DATA[3*i],
   1068                   DATA[3*i+1],
   1069                   rsource,
   1070                   DATA[3*i+2]);
   1071         delete t;
   1072     }
   1073 }
   1074 
   1075 /**
   1076  * Test the Hiragana-Katakana transliterator.
   1077  */
   1078 void TransliteratorTest::TestHiraganaKatakana(void) {
   1079     UParseError parseError;
   1080     UErrorCode status = U_ZERO_ERROR;
   1081     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
   1082     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
   1083     if (hk == 0 || kh == 0) {
   1084         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   1085         delete hk;
   1086         delete kh;
   1087         return;
   1088     }
   1089 
   1090     // Array of 3n items
   1091     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
   1092     const char* DATA[] = {
   1093         "both",
   1094         "\\u3042\\u3090\\u3099\\u3092\\u3050",
   1095         "\\u30A2\\u30F8\\u30F2\\u30B0",
   1096 
   1097         "kh",
   1098         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
   1099         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
   1100     };
   1101     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
   1102 
   1103     for (int32_t i=0; i<DATA_length; i+=3) {
   1104         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
   1105         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
   1106         switch (*DATA[i]) {
   1107         case 0x68: //'h': // Hiragana-Katakana
   1108             expect(*hk, h, k);
   1109             break;
   1110         case 0x6B: //'k': // Katakana-Hiragana
   1111             expect(*kh, k, h);
   1112             break;
   1113         case 0x62: //'b': // both
   1114             expect(*hk, h, k);
   1115             expect(*kh, k, h);
   1116             break;
   1117         }
   1118     }
   1119     delete hk;
   1120     delete kh;
   1121 }
   1122 
   1123 /**
   1124  * Test cloning / copy constructor of RBT.
   1125  */
   1126 void TransliteratorTest::TestCopyJ476(void) {
   1127     // The real test here is what happens when the destructors are
   1128     // called.  So we let one object get destructed, and check to
   1129     // see that its copy still works.
   1130     Transliterator *t2 = 0;
   1131     {
   1132         UParseError parseError;
   1133         UErrorCode status = U_ZERO_ERROR;
   1134         Transliterator *t1 = Transliterator::createFromRules("t1",
   1135             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
   1136         if (U_FAILURE(status)) {
   1137             errln("FAIL: RBT constructor");
   1138             return;
   1139         }
   1140         t2 = t1->clone(); // Call copy constructor under the covers.
   1141         expect(*t1, "abcfoofoo", "ABcbar");
   1142         delete t1;
   1143     }
   1144     expect(*t2, "abcfoofoo", "ABcbar");
   1145     delete t2;
   1146 }
   1147 
   1148 /**
   1149  * Test inter-Indic transliterators.  These are composed.
   1150  * ICU4C Jitterbug 483.
   1151  */
   1152 void TransliteratorTest::TestInterIndic(void) {
   1153     UnicodeString ID("Devanagari-Gujarati", "");
   1154     UErrorCode status = U_ZERO_ERROR;
   1155     UParseError parseError;
   1156     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
   1157     if (dg == 0) {
   1158         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
   1159         return;
   1160     }
   1161     UnicodeString id = dg->getID();
   1162     if (id != ID) {
   1163         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
   1164     }
   1165     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
   1166     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
   1167     expect(*dg, dev, guj);
   1168     delete dg;
   1169 }
   1170 
   1171 /**
   1172  * Test filter syntax in IDs. (J918)
   1173  */
   1174 void TransliteratorTest::TestFilterIDs(void) {
   1175     // Array of 3n strings:
   1176     // <id>, <inverse id>, <input>, <expected output>
   1177     const char* DATA[] = {
   1178         "[aeiou]Any-Hex", // ID
   1179         "[aeiou]Hex-Any", // expected inverse ID
   1180         "quizzical",      // src
   1181         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
   1182 
   1183         "[aeiou]Any-Hex;[^5]Hex-Any",
   1184         "[^5]Any-Hex;[aeiou]Hex-Any",
   1185         "quizzical",
   1186         "q\\u0075izzical",
   1187 
   1188         "[abc]Null",
   1189         "[abc]Null",
   1190         "xyz",
   1191         "xyz",
   1192     };
   1193     enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
   1194 
   1195     for (int i=0; i<DATA_length; i+=4) {
   1196         UnicodeString ID(DATA[i], "");
   1197         UnicodeString uID(DATA[i+1], "");
   1198         UnicodeString data2(DATA[i+2], "");
   1199         UnicodeString data3(DATA[i+3], "");
   1200         UParseError parseError;
   1201         UErrorCode status = U_ZERO_ERROR;
   1202         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
   1203         if (t == 0) {
   1204             errln("FAIL: createInstance(" + ID + ") returned NULL");
   1205             return;
   1206         }
   1207         expect(*t, data2, data3);
   1208 
   1209         // Check the ID
   1210         if (ID != t->getID()) {
   1211             errln("FAIL: createInstance(" + ID + ").getID() => " +
   1212                   t->getID());
   1213         }
   1214 
   1215         // Check the inverse
   1216         Transliterator *u = t->createInverse(status);
   1217         if (u == 0) {
   1218             errln("FAIL: " + ID + ".createInverse() returned NULL");
   1219         } else if (u->getID() != uID) {
   1220             errln("FAIL: " + ID + ".createInverse().getID() => " +
   1221                   u->getID() + ", expected " + uID);
   1222         }
   1223 
   1224         delete t;
   1225         delete u;
   1226     }
   1227 }
   1228 
   1229 /**
   1230  * Test the case mapping transliterators.
   1231  */
   1232 void TransliteratorTest::TestCaseMap(void) {
   1233     UParseError parseError;
   1234     UErrorCode status = U_ZERO_ERROR;
   1235     Transliterator* toUpper =
   1236         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
   1237     Transliterator* toLower =
   1238         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
   1239     Transliterator* toTitle =
   1240         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
   1241     if (toUpper==0 || toLower==0 || toTitle==0) {
   1242         errln("FAIL: createInstance returned NULL");
   1243         delete toUpper;
   1244         delete toLower;
   1245         delete toTitle;
   1246         return;
   1247     }
   1248 
   1249     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
   1250            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
   1251     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
   1252            "the quick brown foX jumped over the lazY dogs.");
   1253     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
   1254            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
   1255 
   1256     delete toUpper;
   1257     delete toLower;
   1258     delete toTitle;
   1259 }
   1260 
   1261 /**
   1262  * Test the name mapping transliterators.
   1263  */
   1264 void TransliteratorTest::TestNameMap(void) {
   1265     UParseError parseError;
   1266     UErrorCode status = U_ZERO_ERROR;
   1267     Transliterator* uni2name =
   1268         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
   1269     Transliterator* name2uni =
   1270         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
   1271     if (uni2name==0 || name2uni==0) {
   1272         errln("FAIL: createInstance returned NULL");
   1273         delete uni2name;
   1274         delete name2uni;
   1275         return;
   1276     }
   1277 
   1278     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
   1279     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
   1280            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
   1281     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
   1282            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
   1283 
   1284     delete uni2name;
   1285     delete name2uni;
   1286 
   1287     // round trip
   1288     Transliterator* t =
   1289         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
   1290     if (t==0) {
   1291         errln("FAIL: createInstance returned NULL");
   1292         delete t;
   1293         return;
   1294     }
   1295 
   1296     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
   1297     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
   1298     expect(*t, s, s);
   1299     delete t;
   1300 }
   1301 
   1302 /**
   1303  * Test liberalized ID syntax.  1006c
   1304  */
   1305 void TransliteratorTest::TestLiberalizedID(void) {
   1306     // Some test cases have an expected getID() value of NULL.  This
   1307     // means I have disabled the test case for now.  This stuff is
   1308     // still under development, and I haven't decided whether to make
   1309     // getID() return canonical case yet.  It will all get rewritten
   1310     // with the move to Source-Target/Variant IDs anyway. [aliu]
   1311     const char* DATA[] = {
   1312         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
   1313         "  Null  ", "Null", "whitespace",
   1314         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
   1315         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
   1316     };
   1317     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
   1318     UParseError parseError;
   1319     UErrorCode status= U_ZERO_ERROR;
   1320     for (int32_t i=0; i<DATA_length; i+=3) {
   1321         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
   1322         if (t == 0) {
   1323             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
   1324                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
   1325         } else {
   1326             UnicodeString exp;
   1327             if (DATA[i+1]) {
   1328                 exp = UnicodeString(DATA[i+1], "");
   1329             }
   1330             // Don't worry about getID() if the expected char*
   1331             // is NULL -- see above.
   1332             if (exp.length() == 0 || exp == t->getID()) {
   1333                 logln(UnicodeString("Ok: ") + DATA[i+2] +
   1334                       " create ID \"" + DATA[i] + "\" => \"" +
   1335                       exp + "\"");
   1336             } else {
   1337                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
   1338                       " create ID \"" + DATA[i] + "\" => \"" +
   1339                       t->getID() + "\", exp \"" + exp + "\"");
   1340             }
   1341             delete t;
   1342         }
   1343     }
   1344 }
   1345 
   1346 /* test for Jitterbug 912 */
   1347 void TransliteratorTest::TestCreateInstance(){
   1348     const char* FORWARD = "F";
   1349     const char* REVERSE = "R";
   1350     const char* DATA[] = {
   1351         // Column 1: id
   1352         // Column 2: direction
   1353         // Column 3: expected ID, or "" if expect failure
   1354         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
   1355 
   1356         // JB#2689: bad compound causes crash
   1357         "InvalidSource-InvalidTarget", FORWARD, "",
   1358         "InvalidSource-InvalidTarget", REVERSE, "",
   1359         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
   1360         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
   1361         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
   1362         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
   1363 
   1364         NULL
   1365     };
   1366 
   1367     for (int32_t i=0; DATA[i]; i+=3) {
   1368         UParseError err;
   1369         UErrorCode ec = U_ZERO_ERROR;
   1370         UnicodeString id(DATA[i]);
   1371         UTransDirection dir = (DATA[i+1]==FORWARD)?
   1372             UTRANS_FORWARD:UTRANS_REVERSE;
   1373         UnicodeString expID(DATA[i+2]);
   1374         Transliterator* t =
   1375             Transliterator::createInstance(id,dir,err,ec);
   1376         UnicodeString newID;
   1377         if (t) {
   1378             newID = t->getID();
   1379         }
   1380         UBool ok = (newID == expID);
   1381         if (!t) {
   1382             newID = u_errorName(ec);
   1383         }
   1384         if (ok) {
   1385             logln((UnicodeString)"Ok: createInstance(" +
   1386                   id + "," + DATA[i+1] + ") => " + newID);
   1387         } else {
   1388             dataerrln((UnicodeString)"FAIL: createInstance(" +
   1389                   id + "," + DATA[i+1] + ") => " + newID +
   1390                   ", expected " + expID);
   1391         }
   1392         delete t;
   1393     }
   1394 }
   1395 
   1396 /**
   1397  * Test the normalization transliterator.
   1398  */
   1399 void TransliteratorTest::TestNormalizationTransliterator() {
   1400     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
   1401     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
   1402     const char* CANON[] = {
   1403         // Input               Decomposed            Composed
   1404         "cat",                "cat",                "cat"               ,
   1405         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
   1406 
   1407         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
   1408         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
   1409 
   1410         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
   1411         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
   1412         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
   1413 
   1414         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
   1415         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
   1416 
   1417         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
   1418         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
   1419         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
   1420 
   1421         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
   1422         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
   1423 
   1424         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
   1425         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
   1426 
   1427         "Henry IV",           "Henry IV",           "Henry IV"          ,
   1428         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
   1429 
   1430         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
   1431         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
   1432         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
   1433         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
   1434         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
   1435 
   1436         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
   1437         0 // end
   1438     };
   1439 
   1440     const char* COMPAT[] = {
   1441         // Input               Decomposed            Composed
   1442         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
   1443 
   1444         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
   1445         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
   1446 
   1447         "Henry IV",           "Henry IV",           "Henry IV"          ,
   1448         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
   1449 
   1450         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
   1451         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
   1452 
   1453         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
   1454         0 // end
   1455     };
   1456 
   1457     int32_t i;
   1458     UParseError parseError;
   1459     UErrorCode status = U_ZERO_ERROR;
   1460     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
   1461     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
   1462     if (!NFD || !NFC) {
   1463         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
   1464         delete NFD;
   1465         delete NFC;
   1466         return;
   1467     }
   1468     for (i=0; CANON[i]; i+=3) {
   1469         UnicodeString in = CharsToUnicodeString(CANON[i]);
   1470         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
   1471         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
   1472         expect(*NFD, in, expd);
   1473         expect(*NFC, in, expc);
   1474     }
   1475     delete NFD;
   1476     delete NFC;
   1477 
   1478     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
   1479     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
   1480     if (!NFKD || !NFKC) {
   1481         errln("FAIL: createInstance failed");
   1482         delete NFKD;
   1483         delete NFKC;
   1484         return;
   1485     }
   1486     for (i=0; COMPAT[i]; i+=3) {
   1487         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
   1488         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
   1489         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
   1490         expect(*NFKD, in, expkd);
   1491         expect(*NFKC, in, expkc);
   1492     }
   1493     delete NFKD;
   1494     delete NFKC;
   1495 
   1496     UParseError pe;
   1497     status = U_ZERO_ERROR;
   1498     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
   1499                                                        UTRANS_FORWARD,
   1500                                                        pe, status);
   1501     if (t == 0) {
   1502         errln("FAIL: createInstance failed");
   1503     }
   1504     expect(*t, CharsToUnicodeString("\\u010dx"),
   1505            CharsToUnicodeString("c\\u030C"));
   1506     delete t;
   1507 }
   1508 
   1509 /**
   1510  * Test compound RBT rules.
   1511  */
   1512 void TransliteratorTest::TestCompoundRBT(void) {
   1513     // Careful with spacing and ';' here:  Phrase this exactly
   1514     // as toRules() is going to return it.  If toRules() changes
   1515     // with regard to spacing or ';', then adjust this string.
   1516     UnicodeString rule("::Hex-Any;\n"
   1517                        "::Any-Lower;\n"
   1518                        "a > '.A.';\n"
   1519                        "b > '.B.';\n"
   1520                        "::[^t]Any-Upper;", "");
   1521     UParseError parseError;
   1522     UErrorCode status = U_ZERO_ERROR;
   1523     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
   1524     if (t == 0) {
   1525         errln("FAIL: createFromRules failed");
   1526         return;
   1527     }
   1528     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
   1529            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
   1530     UnicodeString r;
   1531     t->toRules(r, TRUE);
   1532     if (r == rule) {
   1533         logln((UnicodeString)"OK: toRules() => " + r);
   1534     } else {
   1535         errln((UnicodeString)"FAIL: toRules() => " + r +
   1536               ", expected " + rule);
   1537     }
   1538     delete t;
   1539 
   1540     // Now test toRules
   1541     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
   1542     if (t == 0) {
   1543         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   1544         return;
   1545     }
   1546     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
   1547     t->toRules(r, TRUE);
   1548     if (r != exp) {
   1549         errln((UnicodeString)"FAIL: toRules() => " + r +
   1550               ", expected " + exp);
   1551     } else {
   1552         logln((UnicodeString)"OK: toRules() => " + r);
   1553     }
   1554     delete t;
   1555 
   1556     // Round trip the result of toRules
   1557     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
   1558     if (t == 0) {
   1559         errln("FAIL: createFromRules #2 failed");
   1560         return;
   1561     } else {
   1562         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
   1563     }
   1564 
   1565     // Test toRules again
   1566     t->toRules(r, TRUE);
   1567     if (r != exp) {
   1568         errln((UnicodeString)"FAIL: toRules() => " + r +
   1569               ", expected " + exp);
   1570     } else {
   1571         logln((UnicodeString)"OK: toRules() => " + r);
   1572     }
   1573 
   1574     delete t;
   1575 
   1576     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
   1577     // to what the regenerated ID will look like.
   1578     UnicodeString id("Upper(Lower);(NFKC)", "");
   1579     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
   1580     if (t == 0) {
   1581         errln("FAIL: createInstance #2 failed");
   1582         return;
   1583     }
   1584     if (t->getID() == id) {
   1585         logln((UnicodeString)"OK: created " + id);
   1586     } else {
   1587         errln((UnicodeString)"FAIL: createInstance(" + id +
   1588               ").getID() => " + t->getID());
   1589     }
   1590 
   1591     Transliterator *u = t->createInverse(status);
   1592     if (u == 0) {
   1593         errln("FAIL: createInverse failed");
   1594         delete t;
   1595         return;
   1596     }
   1597     exp = "NFKC();Lower(Upper)";
   1598     if (u->getID() == exp) {
   1599         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
   1600               u->getID());
   1601     } else {
   1602         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
   1603               u->getID());
   1604     }
   1605     delete t;
   1606     delete u;
   1607 }
   1608 
   1609 /**
   1610  * Compound filter semantics were orginially not implemented
   1611  * correctly.  Originally, each component filter f(i) is replaced by
   1612  * f'(i) = f(i) && g, where g is the filter for the compound
   1613  * transliterator.
   1614  *
   1615  * From Mark:
   1616  *
   1617  * Suppose and I have a transliterator X. Internally X is
   1618  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
   1619  *
   1620  * The compound should convert all greek characters (through latin) to
   1621  * cyrillic, then lowercase the result. The filter should say "don't
   1622  * touch 'A' in the original". But because an intermediate result
   1623  * happens to go through "A", the Greek Alpha gets hung up.
   1624  */
   1625 void TransliteratorTest::TestCompoundFilter(void) {
   1626     UParseError parseError;
   1627     UErrorCode status = U_ZERO_ERROR;
   1628     Transliterator *t = Transliterator::createInstance
   1629         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
   1630     if (t == 0) {
   1631         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   1632         return;
   1633     }
   1634     t->adoptFilter(new UnicodeSet("[^A]", status));
   1635     if (U_FAILURE(status)) {
   1636         errln("FAIL: UnicodeSet ct failed");
   1637         delete t;
   1638         return;
   1639     }
   1640 
   1641     // Only the 'A' at index 1 should remain unchanged
   1642     expect(*t,
   1643            CharsToUnicodeString("BA\\u039A\\u0391"),
   1644            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
   1645     delete t;
   1646 }
   1647 
   1648 void TransliteratorTest::TestRemove(void) {
   1649     UParseError parseError;
   1650     UErrorCode status = U_ZERO_ERROR;
   1651     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
   1652     if (t == 0) {
   1653         errln("FAIL: createInstance failed");
   1654         return;
   1655     }
   1656 
   1657     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
   1658 
   1659     // extra test for RemoveTransliterator::clone(), which at one point wasn't
   1660     // duplicating the filter
   1661     Transliterator* t2 = t->clone();
   1662     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
   1663 
   1664     delete t;
   1665     delete t2;
   1666 }
   1667 
   1668 void TransliteratorTest::TestToRules(void) {
   1669     const char* RBT = "rbt";
   1670     const char* SET = "set";
   1671     static const char* DATA[] = {
   1672         RBT,
   1673         "$a=\\u4E61; [$a] > A;",
   1674         "[\\u4E61] > A;",
   1675 
   1676         RBT,
   1677         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
   1678         "[[:Zs:][:Zl:]]{a} > A;",
   1679 
   1680         SET,
   1681         "[[:Zs:][:Zl:]]",
   1682         "[[:Zs:][:Zl:]]",
   1683 
   1684         SET,
   1685         "[:Ps:]",
   1686         "[:Ps:]",
   1687 
   1688         SET,
   1689         "[:L:]",
   1690         "[:L:]",
   1691 
   1692         SET,
   1693         "[[:L:]-[A]]",
   1694         "[[:L:]-[A]]",
   1695 
   1696         SET,
   1697         "[~[:Lu:][:Ll:]]",
   1698         "[~[:Lu:][:Ll:]]",
   1699 
   1700         SET,
   1701         "[~[a-z]]",
   1702         "[~[a-z]]",
   1703 
   1704         RBT,
   1705         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
   1706         "[^[:Zs:]]{a} > A;",
   1707 
   1708         RBT,
   1709         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
   1710         "[[a-z]-[:Zs:]]{a} > A;",
   1711 
   1712         RBT,
   1713         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
   1714         "[[:Zs:]&[a-z]]{a} > A;",
   1715 
   1716         RBT,
   1717         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
   1718         "[x[:Zs:]]{a} > A;",
   1719 
   1720         RBT,
   1721         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
   1722         "$macron = \\u0304 ;"
   1723         "$evowel = [aeiouyAEIOUY] ;"
   1724         "$iotasub = \\u0345 ;"
   1725         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
   1726         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
   1727 
   1728         RBT,
   1729         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
   1730         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
   1731     };
   1732     static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
   1733 
   1734     for (int32_t d=0; d < DATA_length; d+=3) {
   1735         if (DATA[d] == RBT) {
   1736             // Transliterator test
   1737             UParseError parseError;
   1738             UErrorCode status = U_ZERO_ERROR;
   1739             Transliterator *t = Transliterator::createFromRules("ID",
   1740                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
   1741             if (t == 0) {
   1742                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
   1743                 return;
   1744             }
   1745             UnicodeString rules, escapedRules;
   1746             t->toRules(rules, FALSE);
   1747             t->toRules(escapedRules, TRUE);
   1748             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
   1749             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
   1750             if (rules == expRules) {
   1751                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1752                       " => " + rules);
   1753             } else {
   1754                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1755                       " => " + rules + ", exp " + expRules);
   1756             }
   1757             if (escapedRules == expEscapedRules) {
   1758                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1759                       " => " + escapedRules);
   1760             } else {
   1761                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1762                       " => " + escapedRules + ", exp " + expEscapedRules);
   1763             }
   1764             delete t;
   1765 
   1766         } else {
   1767             // UnicodeSet test
   1768             UErrorCode status = U_ZERO_ERROR;
   1769             UnicodeString pat(DATA[d+1], -1, US_INV);
   1770             UnicodeString expToPat(DATA[d+2], -1, US_INV);
   1771             UnicodeSet set(pat, status);
   1772             if (U_FAILURE(status)) {
   1773                 errln("FAIL: UnicodeSet ct failed");
   1774                 return;
   1775             }
   1776             // Adjust spacing etc. as necessary.
   1777             UnicodeString toPat;
   1778             set.toPattern(toPat);
   1779             if (expToPat == toPat) {
   1780                 logln((UnicodeString)"Ok: " + pat +
   1781                       " => " + toPat);
   1782             } else {
   1783                 errln((UnicodeString)"FAIL: " + pat +
   1784                       " => " + prettify(toPat, TRUE) +
   1785                       ", exp " + prettify(pat, TRUE));
   1786             }
   1787         }
   1788     }
   1789 }
   1790 
   1791 void TransliteratorTest::TestContext() {
   1792     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
   1793     expect("de > x; {d}e > y;",
   1794            "de",
   1795            "ye",
   1796            &pos);
   1797 
   1798     expect("ab{c} > z;",
   1799            "xadabdabcy",
   1800            "xadabdabzy");
   1801 }
   1802 
   1803 void TransliteratorTest::TestSupplemental() {
   1804 
   1805     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
   1806                                 "a > $a; $s > i;"),
   1807            CharsToUnicodeString("ab\\U0001030Fx"),
   1808            CharsToUnicodeString("\\U00010300bix"));
   1809 
   1810     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
   1811                                 "$b=[A-Z\\U00010400-\\U0001044D];"
   1812                                 "($a)($b) > $2 $1;"),
   1813            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
   1814            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
   1815 
   1816     // k|ax\\U00010300xm
   1817 
   1818     // k|a\\U00010400\\U00010300xm
   1819     // ky|\\U00010400\\U00010300xm
   1820     // ky\\U00010400|\\U00010300xm
   1821 
   1822     // ky\\U00010400|\\U00010300\\U00010400m
   1823     // ky\\U00010400y|\\U00010400m
   1824     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
   1825                                 "$a {x} > | @ \\U00010400;"
   1826                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
   1827            CharsToUnicodeString("kax\\U00010300xm"),
   1828            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
   1829 
   1830     expectT("Any-Name",
   1831            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
   1832            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
   1833 
   1834     expectT("Any-Hex/Unicode",
   1835            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1836            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
   1837 
   1838     expectT("Any-Hex/C",
   1839            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1840            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
   1841 
   1842     expectT("Any-Hex/Perl",
   1843            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1844            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
   1845 
   1846     expectT("Any-Hex/Java",
   1847            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1848            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
   1849 
   1850     expectT("Any-Hex/XML",
   1851            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1852            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
   1853 
   1854     expectT("Any-Hex/XML10",
   1855            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1856            "&#66352;&#1113856;&#917601;&#160;");
   1857 
   1858     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
   1859            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1860            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
   1861 }
   1862 
   1863 void TransliteratorTest::TestQuantifier() {
   1864 
   1865     // Make sure @ in a quantified anteContext works
   1866     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
   1867            "AAAAAb",
   1868            "aaa(aac)");
   1869 
   1870     // Make sure @ in a quantified postContext works
   1871     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
   1872            "baaaaa",
   1873            "caa(aaa)");
   1874 
   1875     // Make sure @ in a quantified postContext with seg ref works
   1876     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
   1877            "baaaaa",
   1878            "baa(aaa)");
   1879 
   1880     // Make sure @ past ante context doesn't enter ante context
   1881     UTransPosition pos = {0, 5, 3, 5};
   1882     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
   1883            "xxxab",
   1884            "xxx(ac)",
   1885            &pos);
   1886 
   1887     // Make sure @ past post context doesn't pass limit
   1888     UTransPosition pos2 = {0, 4, 0, 2};
   1889     expect("{b} a+ > c @@ |; x > y; a > A;",
   1890            "baxx",
   1891            "caxx",
   1892            &pos2);
   1893 
   1894     // Make sure @ past post context doesn't enter post context
   1895     expect("{b} a+ > c @@ |; x > y; a > A;",
   1896            "baxx",
   1897            "cayy");
   1898 
   1899     expect("(ab)? c > d;",
   1900            "c abc ababc",
   1901            "d d abd");
   1902 
   1903     // NOTE: The (ab)+ when referenced just yields a single "ab",
   1904     // not the full sequence of them.  This accords with perl behavior.
   1905     expect("(ab)+ {x} > '(' $1 ')';",
   1906            "x abx ababxy",
   1907            "x ab(ab) abab(ab)y");
   1908 
   1909     expect("b+ > x;",
   1910            "ac abc abbc abbbc",
   1911            "ac axc axc axc");
   1912 
   1913     expect("[abc]+ > x;",
   1914            "qac abrc abbcs abtbbc",
   1915            "qx xrx xs xtx");
   1916 
   1917     expect("q{(ab)+} > x;",
   1918            "qa qab qaba qababc qaba",
   1919            "qa qx qxa qxc qxa");
   1920 
   1921     expect("q(ab)* > x;",
   1922            "qa qab qaba qababc",
   1923            "xa x xa xc");
   1924 
   1925     // NOTE: The (ab)+ when referenced just yields a single "ab",
   1926     // not the full sequence of them.  This accords with perl behavior.
   1927     expect("q(ab)* > '(' $1 ')';",
   1928            "qa qab qaba qababc",
   1929            "()a (ab) (ab)a (ab)c");
   1930 
   1931     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
   1932     // quoted string
   1933     expect("'ab'+ > x;",
   1934            "bb ab ababb",
   1935            "bb x xb");
   1936 
   1937     // $foo+ and $foo* -- the quantifier should apply to the entire
   1938     // variable reference
   1939     expect("$var = ab; $var+ > x;",
   1940            "bb ab ababb",
   1941            "bb x xb");
   1942 }
   1943 
   1944 class TestTrans : public Transliterator {
   1945 public:
   1946     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
   1947     }
   1948     virtual Transliterator* clone(void) const {
   1949         return new TestTrans(getID());
   1950     }
   1951     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
   1952         UBool /*isIncremental*/) const
   1953     {
   1954         offsets.start = offsets.limit;
   1955     }
   1956     virtual UClassID getDynamicClassID() const;
   1957     static UClassID U_EXPORT2 getStaticClassID();
   1958 };
   1959 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
   1960 
   1961 /**
   1962  * Test Source-Target/Variant.
   1963  */
   1964 void TransliteratorTest::TestSTV(void) {
   1965     int32_t ns = Transliterator::countAvailableSources();
   1966     if (ns < 0 || ns > 255) {
   1967         errln((UnicodeString)"FAIL: Bad source count: " + ns);
   1968         return;
   1969     }
   1970     int32_t i, j;
   1971     for (i=0; i<ns; ++i) {
   1972         UnicodeString source;
   1973         Transliterator::getAvailableSource(i, source);
   1974         logln((UnicodeString)"" + i + ": " + source);
   1975         if (source.length() == 0) {
   1976             errln("FAIL: empty source");
   1977             continue;
   1978         }
   1979         int32_t nt = Transliterator::countAvailableTargets(source);
   1980         if (nt < 0 || nt > 255) {
   1981             errln((UnicodeString)"FAIL: Bad target count: " + nt);
   1982             continue;
   1983         }
   1984         for (int32_t j=0; j<nt; ++j) {
   1985             UnicodeString target;
   1986             Transliterator::getAvailableTarget(j, source, target);
   1987             logln((UnicodeString)" " + j + ": " + target);
   1988             if (target.length() == 0) {
   1989                 errln("FAIL: empty target");
   1990                 continue;
   1991             }
   1992             int32_t nv = Transliterator::countAvailableVariants(source, target);
   1993             if (nv < 0 || nv > 255) {
   1994                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
   1995                 continue;
   1996             }
   1997             for (int32_t k=0; k<nv; ++k) {
   1998                 UnicodeString variant;
   1999                 Transliterator::getAvailableVariant(k, source, target, variant);
   2000                 if (variant.length() == 0) {
   2001                     logln((UnicodeString)"  " + k + ": <empty>");
   2002                 } else {
   2003                     logln((UnicodeString)"  " + k + ": " + variant);
   2004                 }
   2005             }
   2006         }
   2007     }
   2008 
   2009     // Test registration
   2010     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
   2011     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
   2012     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
   2013     for (i=0; i<3; ++i) {
   2014         Transliterator *t = new TestTrans(IDS[i]);
   2015         if (t == 0) {
   2016             errln("FAIL: out of memory");
   2017             return;
   2018         }
   2019         if (t->getID() != IDS[i]) {
   2020             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
   2021             delete t;
   2022             return;
   2023         }
   2024         Transliterator::registerInstance(t);
   2025         UErrorCode status = U_ZERO_ERROR;
   2026         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
   2027         if (t == NULL) {
   2028             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
   2029                   IDS[i]);
   2030         } else {
   2031             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
   2032                   IDS[i]);
   2033             delete t;
   2034         }
   2035         Transliterator::unregister(IDS[i]);
   2036         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
   2037         if (t != NULL) {
   2038             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
   2039                   IDS[i]);
   2040             delete t;
   2041         }
   2042     }
   2043 
   2044     // Make sure getAvailable API reflects removal
   2045     int32_t n = Transliterator::countAvailableIDs();
   2046     for (i=0; i<n; ++i) {
   2047         UnicodeString id = Transliterator::getAvailableID(i);
   2048         for (j=0; j<3; ++j) {
   2049             if (id.caseCompare(FULL_IDS[j],0)==0) {
   2050                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
   2051             }
   2052         }
   2053     }
   2054     n = Transliterator::countAvailableTargets("Any");
   2055     for (i=0; i<n; ++i) {
   2056         UnicodeString t;
   2057         Transliterator::getAvailableTarget(i, "Any", t);
   2058         if (t.caseCompare(IDS[0],0)==0) {
   2059             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
   2060         }
   2061     }
   2062     n = Transliterator::countAvailableSources();
   2063     for (i=0; i<n; ++i) {
   2064         UnicodeString s;
   2065         Transliterator::getAvailableSource(i, s);
   2066         for (j=0; j<3; ++j) {
   2067             if (SOURCES[j] == NULL) continue;
   2068             if (s.caseCompare(SOURCES[j],0)==0) {
   2069                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
   2070             }
   2071         }
   2072     }
   2073 }
   2074 
   2075 /**
   2076  * Test inverse of Greek-Latin; Title()
   2077  */
   2078 void TransliteratorTest::TestCompoundInverse(void) {
   2079     UParseError parseError;
   2080     UErrorCode status = U_ZERO_ERROR;
   2081     Transliterator *t = Transliterator::createInstance
   2082         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
   2083     if (t == 0) {
   2084         dataerrln("FAIL: createInstance - %s", u_errorName(status));
   2085         return;
   2086     }
   2087     UnicodeString exp("(Title);Latin-Greek");
   2088     if (t->getID() == exp) {
   2089         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
   2090               t->getID());
   2091     } else {
   2092         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
   2093               t->getID() + "\", expected \"" + exp + "\"");
   2094     }
   2095     delete t;
   2096 }
   2097 
   2098 /**
   2099  * Test NFD chaining with RBT
   2100  */
   2101 void TransliteratorTest::TestNFDChainRBT() {
   2102     UParseError pe;
   2103     UErrorCode ec = U_ZERO_ERROR;
   2104     Transliterator* t = Transliterator::createFromRules(
   2105                                "TEST", "::NFD; aa > Q; a > q;",
   2106                                UTRANS_FORWARD, pe, ec);
   2107     if (t == NULL || U_FAILURE(ec)) {
   2108         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
   2109         return;
   2110     }
   2111     expect(*t, "aa", "Q");
   2112     delete t;
   2113 
   2114     // TEMPORARY TESTS -- BEING DEBUGGED
   2115 //=-    UnicodeString s, s2;
   2116 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
   2117 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
   2118 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
   2119 //=-    expect(*t, s, s2);
   2120 //=-    delete t;
   2121 //=-
   2122 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
   2123 //=-    expect(*t, s2, s);
   2124 //=-    delete t;
   2125 //=-
   2126 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
   2127 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
   2128 //=-    expect(*t, s, s);
   2129 //=-    delete t;
   2130 
   2131 //    const char* source[] = {
   2132 //        /*
   2133 //        "\\u015Br\\u012Bmad",
   2134 //        "bhagavadg\\u012Bt\\u0101",
   2135 //        "adhy\\u0101ya",
   2136 //        "arjuna",
   2137 //        "vi\\u1E63\\u0101da",
   2138 //        "y\\u014Dga",
   2139 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
   2140 //        "uv\\u0101cr\\u0325",
   2141 //        */
   2142 //        "rmk\\u1E63\\u0113t",
   2143 //      //"dharmak\\u1E63\\u0113tr\\u0113",
   2144 //        /*
   2145 //        "kuruk\\u1E63\\u0113tr\\u0113",
   2146 //        "samav\\u0113t\\u0101",
   2147 //        "yuyutsava-\\u1E25",
   2148 //        "m\\u0101mak\\u0101-\\u1E25",
   2149 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
   2150 //        "kimakurvata",
   2151 //        "san\\u0304java",
   2152 //        */
   2153 //
   2154 //        0
   2155 //    };
   2156 //    const char* expected[] = {
   2157 //        /*
   2158 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
   2159 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
   2160 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
   2161 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
   2162 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
   2163 //        "\\u092f\\u094b\\u0917",
   2164 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
   2165 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
   2166 //        */
   2167 //        "\\u0927",
   2168 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2169 //        /*
   2170 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2171 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
   2172 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
   2173 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
   2174 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
   2175 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
   2176 //        "\\u0938\\u0902\\u091c\\u0935",
   2177 //        */
   2178 //        0
   2179 //    };
   2180 //    UErrorCode status = U_ZERO_ERROR;
   2181 //    UParseError parseError;
   2182 //    UnicodeString message;
   2183 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2184 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2185 //    if(U_FAILURE(status)){
   2186 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2187 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
   2188 //        delete latinToDevToLatin;
   2189 //        delete devToLatinToDev;
   2190 //        return;
   2191 //    }
   2192 //    UnicodeString gotResult;
   2193 //    for(int i= 0; source[i] != 0; i++){
   2194 //        gotResult = source[i];
   2195 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
   2196 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
   2197 //    }
   2198 //    delete latinToDevToLatin;
   2199 //    delete devToLatinToDev;
   2200 }
   2201 
   2202 /**
   2203  * Inverse of "Null" should be "Null". (J21)
   2204  */
   2205 void TransliteratorTest::TestNullInverse() {
   2206     UParseError pe;
   2207     UErrorCode ec = U_ZERO_ERROR;
   2208     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
   2209     if (t == 0 || U_FAILURE(ec)) {
   2210         errln("FAIL: createInstance");
   2211         return;
   2212     }
   2213     Transliterator *u = t->createInverse(ec);
   2214     if (u == 0 || U_FAILURE(ec)) {
   2215         errln("FAIL: createInverse");
   2216         delete t;
   2217         return;
   2218     }
   2219     if (u->getID() != "Null") {
   2220         errln("FAIL: Inverse of Null should be Null");
   2221     }
   2222     delete t;
   2223     delete u;
   2224 }
   2225 
   2226 /**
   2227  * Check ID of inverse of alias. (J22)
   2228  */
   2229 void TransliteratorTest::TestAliasInverseID() {
   2230     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
   2231     UParseError pe;
   2232     UErrorCode ec = U_ZERO_ERROR;
   2233     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
   2234     if (t == 0 || U_FAILURE(ec)) {
   2235         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
   2236         return;
   2237     }
   2238     Transliterator *u = t->createInverse(ec);
   2239     if (u == 0 || U_FAILURE(ec)) {
   2240         errln("FAIL: createInverse");
   2241         delete t;
   2242         return;
   2243     }
   2244     UnicodeString exp = "Hangul-Latin";
   2245     UnicodeString got = u->getID();
   2246     if (got != exp) {
   2247         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
   2248               ", expected " + exp);
   2249     }
   2250     delete t;
   2251     delete u;
   2252 }
   2253 
   2254 /**
   2255  * Test IDs of inverses of compound transliterators. (J20)
   2256  */
   2257 void TransliteratorTest::TestCompoundInverseID() {
   2258     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
   2259     UParseError pe;
   2260     UErrorCode ec = U_ZERO_ERROR;
   2261     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
   2262     if (t == 0 || U_FAILURE(ec)) {
   2263         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
   2264         return;
   2265     }
   2266     Transliterator *u = t->createInverse(ec);
   2267     if (u == 0 || U_FAILURE(ec)) {
   2268         errln("FAIL: createInverse");
   2269         delete t;
   2270         return;
   2271     }
   2272     UnicodeString exp = "NFD(NFC);Jamo-Latin";
   2273     UnicodeString got = u->getID();
   2274     if (got != exp) {
   2275         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
   2276               ", expected " + exp);
   2277     }
   2278     delete t;
   2279     delete u;
   2280 }
   2281 
   2282 /**
   2283  * Test undefined variable.
   2284 
   2285  */
   2286 void TransliteratorTest::TestUndefinedVariable() {
   2287     UnicodeString rule = "$initial } a <> \\u1161;";
   2288     UParseError pe;
   2289     UErrorCode ec = U_ZERO_ERROR;
   2290     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
   2291     delete t;
   2292     if (U_FAILURE(ec)) {
   2293         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
   2294               u_errorName(ec));
   2295         return;
   2296     }
   2297     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
   2298           u_errorName(ec));
   2299 }
   2300 
   2301 /**
   2302  * Test empty context.
   2303  */
   2304 void TransliteratorTest::TestEmptyContext() {
   2305     expect(" { a } > b;", "xay a ", "xby b ");
   2306 }
   2307 
   2308 /**
   2309 * Test compound filter ID syntax
   2310 */
   2311 void TransliteratorTest::TestCompoundFilterID(void) {
   2312     static const char* DATA[] = {
   2313         // Col. 1 = ID or rule set (latter must start with #)
   2314 
   2315         // = columns > 1 are null if expect col. 1 to be illegal =
   2316 
   2317         // Col. 2 = direction, "F..." or "R..."
   2318         // Col. 3 = source string
   2319         // Col. 4 = exp result
   2320 
   2321         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
   2322         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
   2323         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
   2324         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
   2325         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
   2326         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
   2327         NULL,
   2328     };
   2329 
   2330     for (int32_t i=0; DATA[i]; i+=4) {
   2331         UnicodeString id = CharsToUnicodeString(DATA[i]);
   2332         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
   2333             UTRANS_REVERSE : UTRANS_FORWARD;
   2334         UnicodeString source;
   2335         UnicodeString exp;
   2336         if (DATA[i+2] != NULL) {
   2337             source = CharsToUnicodeString(DATA[i+2]);
   2338             exp = CharsToUnicodeString(DATA[i+3]);
   2339         }
   2340         UBool expOk = (DATA[i+1] != NULL);
   2341         Transliterator* t = NULL;
   2342         UParseError pe;
   2343         UErrorCode ec = U_ZERO_ERROR;
   2344         if (id.charAt(0) == 0x23/*#*/) {
   2345             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
   2346         } else {
   2347             t = Transliterator::createInstance(id, direction, pe, ec);
   2348         }
   2349         UBool ok = (t != NULL && U_SUCCESS(ec));
   2350         UnicodeString transID;
   2351         if (t!=0) {
   2352             transID = t->getID();
   2353         }
   2354         else {
   2355             transID = UnicodeString("NULL", "");
   2356         }
   2357         if (ok == expOk) {
   2358             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
   2359                   u_errorName(ec));
   2360             if (source.length() != 0) {
   2361                 expect(*t, source, exp);
   2362             }
   2363             delete t;
   2364         } else {
   2365             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
   2366                   u_errorName(ec));
   2367         }
   2368     }
   2369 }
   2370 
   2371 /**
   2372  * Test new property set syntax
   2373  */
   2374 void TransliteratorTest::TestPropertySet() {
   2375     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
   2376     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
   2377            "[ a stitch ]\n[ in time ]\r[ saves 9]");
   2378 }
   2379 
   2380 /**
   2381  * Test various failure points of the new 2.0 engine.
   2382  */
   2383 void TransliteratorTest::TestNewEngine() {
   2384     UParseError pe;
   2385     UErrorCode ec = U_ZERO_ERROR;
   2386     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
   2387     if (t == 0 || U_FAILURE(ec)) {
   2388         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
   2389         return;
   2390     }
   2391     // Katakana should be untouched
   2392     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
   2393            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
   2394 
   2395     delete t;
   2396 
   2397 #if 1
   2398     // This test will only work if Transliterator.ROLLBACK is
   2399     // true.  Otherwise, this test will fail, revealing a
   2400     // limitation of global filters in incremental mode.
   2401     Transliterator *a =
   2402         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
   2403     Transliterator *A =
   2404         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
   2405     if (U_FAILURE(ec)) {
   2406         delete a;
   2407         delete A;
   2408         return;
   2409     }
   2410 
   2411     Transliterator* array[3];
   2412     array[0] = a;
   2413     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
   2414     array[2] = A;
   2415     if (U_FAILURE(ec)) {
   2416         errln("FAIL: createInstance NFD");
   2417         delete a;
   2418         delete A;
   2419         delete array[1];
   2420         return;
   2421     }
   2422 
   2423     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
   2424     if (U_FAILURE(ec)) {
   2425         errln("FAIL: UnicodeSet constructor");
   2426         delete a;
   2427         delete A;
   2428         delete array[1];
   2429         delete t;
   2430         return;
   2431     }
   2432 
   2433     expect(*t, "aAaA", "bAbA");
   2434 
   2435     assertTrue("countElements", t->countElements() == 3);
   2436     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
   2437     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
   2438     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
   2439     assertSuccess("getElement", ec);
   2440 
   2441     delete a;
   2442     delete A;
   2443     delete array[1];
   2444     delete t;
   2445 #endif
   2446 
   2447     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
   2448            "a",
   2449            "ax");
   2450 
   2451     UnicodeString gr = CharsToUnicodeString(
   2452         "$ddot = \\u0308 ;"
   2453         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
   2454         "$rough = \\u0314 ;"
   2455         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
   2456         "\\u03b1 <> a ;"
   2457         "$rough <> h ;");
   2458 
   2459     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
   2460 }
   2461 
   2462 /**
   2463  * Test quantified segment behavior.  We want:
   2464  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
   2465  */
   2466 void TransliteratorTest::TestQuantifiedSegment(void) {
   2467     // The normal case
   2468     expect("([abc]+) > x $1 x;", "cba", "xcbax");
   2469 
   2470     // The tricky case; the quantifier is around the segment
   2471     expect("([abc])+ > x $1 x;", "cba", "xax");
   2472 
   2473     // Tricky case in reverse direction
   2474     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
   2475 
   2476     // Check post-context segment
   2477     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
   2478 
   2479     // Test toRule/toPattern for non-quantified segment.
   2480     // Careful with spacing here.
   2481     UnicodeString r("([a-c]){q} > x $1 x;");
   2482     UParseError pe;
   2483     UErrorCode ec = U_ZERO_ERROR;
   2484     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
   2485     if (U_FAILURE(ec)) {
   2486         errln("FAIL: createFromRules");
   2487         delete t;
   2488         return;
   2489     }
   2490     UnicodeString rr;
   2491     t->toRules(rr, TRUE);
   2492     if (r != rr) {
   2493         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2494     } else {
   2495         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2496     }
   2497     delete t;
   2498 
   2499     // Test toRule/toPattern for quantified segment.
   2500     // Careful with spacing here.
   2501     r = "([a-c])+{q} > x $1 x;";
   2502     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
   2503     if (U_FAILURE(ec)) {
   2504         errln("FAIL: createFromRules");
   2505         delete t;
   2506         return;
   2507     }
   2508     t->toRules(rr, TRUE);
   2509     if (r != rr) {
   2510         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2511     } else {
   2512         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2513     }
   2514     delete t;
   2515 }
   2516 
   2517 //======================================================================
   2518 // Ram's tests
   2519 //======================================================================
   2520 void TransliteratorTest::TestDevanagariLatinRT(){
   2521     const int MAX_LEN= 52;
   2522     const char* const source[MAX_LEN] = {
   2523         "bh\\u0101rata",
   2524         "kra",
   2525         "k\\u1E63a",
   2526         "khra",
   2527         "gra",
   2528         "\\u1E45ra",
   2529         "cra",
   2530         "chra",
   2531         "j\\u00F1a",
   2532         "jhra",
   2533         "\\u00F1ra",
   2534         "\\u1E6Dya",
   2535         "\\u1E6Dhra",
   2536         "\\u1E0Dya",
   2537       //"r\\u0323ya", // \u095c is not valid in Devanagari
   2538         "\\u1E0Dhya",
   2539         "\\u1E5Bhra",
   2540         "\\u1E47ra",
   2541         "tta",
   2542         "thra",
   2543         "dda",
   2544         "dhra",
   2545         "nna",
   2546         "pra",
   2547         "phra",
   2548         "bra",
   2549         "bhra",
   2550         "mra",
   2551         "\\u1E49ra",
   2552       //"l\\u0331ra",
   2553         "yra",
   2554         "\\u1E8Fra",
   2555       //"l-",
   2556         "vra",
   2557         "\\u015Bra",
   2558         "\\u1E63ra",
   2559         "sra",
   2560         "hma",
   2561         "\\u1E6D\\u1E6Da",
   2562         "\\u1E6D\\u1E6Dha",
   2563         "\\u1E6Dh\\u1E6Dha",
   2564         "\\u1E0D\\u1E0Da",
   2565         "\\u1E0D\\u1E0Dha",
   2566         "\\u1E6Dya",
   2567         "\\u1E6Dhya",
   2568         "\\u1E0Dya",
   2569         "\\u1E0Dhya",
   2570         // Not roundtrippable --
   2571         // \\u0939\\u094d\\u094d\\u092E  - hma
   2572         // \\u0939\\u094d\\u092E         - hma
   2573         // CharsToUnicodeString("hma"),
   2574         "hya",
   2575         "\\u015Br\\u0325",
   2576         "\\u015Bca",
   2577         "\\u0115",
   2578         "san\\u0304j\\u012Bb s\\u0113nagupta",
   2579         "\\u0101nand vaddir\\u0101ju",
   2580         "\\u0101",
   2581         "a"
   2582     };
   2583     const char* const expected[MAX_LEN] = {
   2584         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
   2585         "\\u0915\\u094D\\u0930",          /* kra         */
   2586         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
   2587         "\\u0916\\u094D\\u0930",          /* khra        */
   2588         "\\u0917\\u094D\\u0930",          /* gra         */
   2589         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
   2590         "\\u091A\\u094D\\u0930",          /* cra         */
   2591         "\\u091B\\u094D\\u0930",          /* chra        */
   2592         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
   2593         "\\u091D\\u094D\\u0930",          /* jhra        */
   2594         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
   2595         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
   2596         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
   2597         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
   2598       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
   2599         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
   2600         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
   2601         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
   2602         "\\u0924\\u094D\\u0924",          /* tta         */
   2603         "\\u0925\\u094D\\u0930",          /* thra        */
   2604         "\\u0926\\u094D\\u0926",          /* dda         */
   2605         "\\u0927\\u094D\\u0930",          /* dhra        */
   2606         "\\u0928\\u094D\\u0928",          /* nna         */
   2607         "\\u092A\\u094D\\u0930",          /* pra         */
   2608         "\\u092B\\u094D\\u0930",          /* phra        */
   2609         "\\u092C\\u094D\\u0930",          /* bra         */
   2610         "\\u092D\\u094D\\u0930",          /* bhra        */
   2611         "\\u092E\\u094D\\u0930",          /* mra         */
   2612         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
   2613       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
   2614         "\\u092F\\u094D\\u0930",          /* yra         */
   2615         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
   2616       //"l-",
   2617         "\\u0935\\u094D\\u0930",          /* vra         */
   2618         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
   2619         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
   2620         "\\u0938\\u094D\\u0930",          /* sra         */
   2621         "\\u0939\\u094d\\u092E",          /* hma         */
   2622         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
   2623         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
   2624         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
   2625         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
   2626         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
   2627         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
   2628         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
   2629         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
   2630         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
   2631      // "hma",                         /* hma         */
   2632         "\\u0939\\u094D\\u092F",          /* hya         */
   2633         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
   2634         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
   2635         "\\u090d",                        /* e\\u0306    */
   2636         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
   2637         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
   2638         "\\u0906",
   2639         "\\u0905",
   2640     };
   2641     UErrorCode status = U_ZERO_ERROR;
   2642     UParseError parseError;
   2643     UnicodeString message;
   2644     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2645     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2646     if(U_FAILURE(status)){
   2647         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2648         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2649         return;
   2650     }
   2651     UnicodeString gotResult;
   2652     for(int i= 0; i<MAX_LEN; i++){
   2653         gotResult = source[i];
   2654         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
   2655         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
   2656     }
   2657     delete latinToDev;
   2658     delete devToLatin;
   2659 }
   2660 
   2661 void TransliteratorTest::TestTeluguLatinRT(){
   2662     const int MAX_LEN=10;
   2663     const char* const source[MAX_LEN] = {
   2664         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
   2665         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
   2666         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
   2667         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
   2668         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
   2669         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
   2670         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
   2671         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
   2672         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
   2673         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
   2674     };
   2675 
   2676     const char* const expected[MAX_LEN] = {
   2677         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
   2678         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
   2679         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
   2680         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
   2681         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
   2682         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
   2683         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
   2684         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
   2685         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
   2686         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
   2687     };
   2688 
   2689     UErrorCode status = U_ZERO_ERROR;
   2690     UParseError parseError;
   2691     UnicodeString message;
   2692     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
   2693     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
   2694     if(U_FAILURE(status)){
   2695         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2696         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2697         return;
   2698     }
   2699     UnicodeString gotResult;
   2700     for(int i= 0; i<MAX_LEN; i++){
   2701         gotResult = source[i];
   2702         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
   2703         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
   2704     }
   2705     delete latinToDev;
   2706     delete devToLatin;
   2707 }
   2708 
   2709 void TransliteratorTest::TestSanskritLatinRT(){
   2710     const int MAX_LEN =16;
   2711     const char* const source[MAX_LEN] = {
   2712         "rmk\\u1E63\\u0113t",
   2713         "\\u015Br\\u012Bmad",
   2714         "bhagavadg\\u012Bt\\u0101",
   2715         "adhy\\u0101ya",
   2716         "arjuna",
   2717         "vi\\u1E63\\u0101da",
   2718         "y\\u014Dga",
   2719         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
   2720         "uv\\u0101cr\\u0325",
   2721         "dharmak\\u1E63\\u0113tr\\u0113",
   2722         "kuruk\\u1E63\\u0113tr\\u0113",
   2723         "samav\\u0113t\\u0101",
   2724         "yuyutsava\\u1E25",
   2725         "m\\u0101mak\\u0101\\u1E25",
   2726     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
   2727         "kimakurvata",
   2728         "san\\u0304java",
   2729     };
   2730     const char* const expected[MAX_LEN] = {
   2731         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
   2732         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
   2733         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
   2734         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
   2735         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
   2736         "\\u0935\\u093f\\u0937\\u093e\\u0926",
   2737         "\\u092f\\u094b\\u0917",
   2738         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
   2739         "\\u0909\\u0935\\u093E\\u091A\\u0943",
   2740         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2741         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2742         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
   2743         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
   2744         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
   2745     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
   2746         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
   2747         "\\u0938\\u0902\\u091c\\u0935",
   2748     };
   2749     UErrorCode status = U_ZERO_ERROR;
   2750     UParseError parseError;
   2751     UnicodeString message;
   2752     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2753     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2754     if(U_FAILURE(status)){
   2755         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2756         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2757         return;
   2758     }
   2759     UnicodeString gotResult;
   2760     for(int i= 0; i<MAX_LEN; i++){
   2761         gotResult = source[i];
   2762         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
   2763         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
   2764     }
   2765     delete latinToDev;
   2766     delete devToLatin;
   2767 }
   2768 
   2769 
   2770 void TransliteratorTest::TestCompoundLatinRT(){
   2771     const char* const source[] = {
   2772         "rmk\\u1E63\\u0113t",
   2773         "\\u015Br\\u012Bmad",
   2774         "bhagavadg\\u012Bt\\u0101",
   2775         "adhy\\u0101ya",
   2776         "arjuna",
   2777         "vi\\u1E63\\u0101da",
   2778         "y\\u014Dga",
   2779         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
   2780         "uv\\u0101cr\\u0325",
   2781         "dharmak\\u1E63\\u0113tr\\u0113",
   2782         "kuruk\\u1E63\\u0113tr\\u0113",
   2783         "samav\\u0113t\\u0101",
   2784         "yuyutsava\\u1E25",
   2785         "m\\u0101mak\\u0101\\u1E25",
   2786      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
   2787         "kimakurvata",
   2788         "san\\u0304java"
   2789     };
   2790     const int MAX_LEN = sizeof(source)/sizeof(source[0]);
   2791     const char* const expected[MAX_LEN] = {
   2792         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
   2793         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
   2794         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
   2795         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
   2796         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
   2797         "\\u0935\\u093f\\u0937\\u093e\\u0926",
   2798         "\\u092f\\u094b\\u0917",
   2799         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
   2800         "\\u0909\\u0935\\u093E\\u091A\\u0943",
   2801         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2802         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2803         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
   2804         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
   2805         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
   2806     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
   2807         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
   2808         "\\u0938\\u0902\\u091c\\u0935"
   2809     };
   2810     if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
   2811         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
   2812         return;
   2813     }
   2814 
   2815     UErrorCode status = U_ZERO_ERROR;
   2816     UParseError parseError;
   2817     UnicodeString message;
   2818     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2819     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2820     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
   2821     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
   2822 
   2823     if(U_FAILURE(status)){
   2824         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2825         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2826         return;
   2827     }
   2828     UnicodeString gotResult;
   2829     for(int i= 0; i<MAX_LEN; i++){
   2830         gotResult = source[i];
   2831         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
   2832         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
   2833         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
   2834 
   2835     }
   2836     delete(latinToDevToLatin);
   2837     delete(devToLatinToDev);
   2838     delete(devToTelToDev);
   2839     delete(latinToTelToLatin);
   2840 }
   2841 
   2842 /**
   2843  * Test Gurmukhi-Devanagari Tippi and Bindi
   2844  */
   2845 void TransliteratorTest::TestGurmukhiDevanagari(){
   2846     // the rule says:
   2847     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
   2848     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
   2849     UErrorCode status = U_ZERO_ERROR;
   2850     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
   2851     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
   2852     UParseError parseError;
   2853 
   2854     UnicodeSetIterator vIter(vowel);
   2855     UnicodeSetIterator nvIter(non_vowel);
   2856     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
   2857     if(U_FAILURE(status)) {
   2858       dataerrln("Error creating transliterator %s", u_errorName(status));
   2859       delete trans;
   2860       return;
   2861     }
   2862     UnicodeString src (" \\u0902", -1, US_INV);
   2863     UnicodeString expected(" \\u0A02", -1, US_INV);
   2864     src = src.unescape();
   2865     expected= expected.unescape();
   2866 
   2867     while(vIter.next()){
   2868         src.setCharAt(0,(UChar) vIter.getCodepoint());
   2869         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
   2870         expect(*trans,src,expected);
   2871     }
   2872 
   2873     expected.setCharAt(1,0x0A70);
   2874     while(nvIter.next()){
   2875         //src.setCharAt(0,(char) nvIter.codepoint);
   2876         src.setCharAt(0,(UChar)nvIter.getCodepoint());
   2877         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
   2878         expect(*trans,src,expected);
   2879     }
   2880     delete trans;
   2881 }
   2882 /**
   2883  * Test instantiation from a locale.
   2884  */
   2885 void TransliteratorTest::TestLocaleInstantiation(void) {
   2886     UParseError pe;
   2887     UErrorCode ec = U_ZERO_ERROR;
   2888     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
   2889     if (U_FAILURE(ec)) {
   2890         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
   2891         delete t;
   2892         return;
   2893     }
   2894     expect(*t, CharsToUnicodeString("\\u0430"), "a");
   2895     delete t;
   2896 
   2897     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
   2898     if (U_FAILURE(ec)) {
   2899         errln("FAIL: createInstance(en-el)");
   2900         delete t;
   2901         return;
   2902     }
   2903     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
   2904     delete t;
   2905 }
   2906 
   2907 /**
   2908  * Test title case handling of accent (should ignore accents)
   2909  */
   2910 void TransliteratorTest::TestTitleAccents(void) {
   2911     UParseError pe;
   2912     UErrorCode ec = U_ZERO_ERROR;
   2913     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
   2914     if (U_FAILURE(ec)) {
   2915         errln("FAIL: createInstance(Title)");
   2916         delete t;
   2917         return;
   2918     }
   2919     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
   2920     delete t;
   2921 }
   2922 
   2923 /**
   2924  * Basic test of a locale resource based rule.
   2925  */
   2926 void TransliteratorTest::TestLocaleResource() {
   2927     const char* DATA[] = {
   2928         // id                    from               to
   2929         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
   2930         "Latin-el",              "b",               "\\u03bc\\u03c0",
   2931         "Latin-Greek",           "b",               "\\u03B2",
   2932         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
   2933         "el-Latin",              "\\u03B2",         "v",
   2934         "Greek-Latin",           "\\u03B2",         "b",
   2935     };
   2936     const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
   2937     for (int32_t i=0; i<DATA_length; i+=3) {
   2938         UParseError pe;
   2939         UErrorCode ec = U_ZERO_ERROR;
   2940         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
   2941         if (U_FAILURE(ec)) {
   2942             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
   2943             delete t;
   2944             continue;
   2945         }
   2946         expect(*t, CharsToUnicodeString(DATA[i+1]),
   2947                CharsToUnicodeString(DATA[i+2]));
   2948         delete t;
   2949     }
   2950 }
   2951 
   2952 /**
   2953  * Make sure parse errors reference the right line.
   2954  */
   2955 void TransliteratorTest::TestParseError() {
   2956     static const char* rule =
   2957         "a > b;\n"
   2958         "# more stuff\n"
   2959         "d << b;";
   2960     UErrorCode ec = U_ZERO_ERROR;
   2961     UParseError pe;
   2962     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   2963     delete t;
   2964     if (U_FAILURE(ec)) {
   2965         UnicodeString err(pe.preContext);
   2966         err.append((UChar)124/*|*/).append(pe.postContext);
   2967         if (err.indexOf("d << b") >= 0) {
   2968             logln("Ok: " + err);
   2969         } else {
   2970             errln("FAIL: " + err);
   2971         }
   2972     }
   2973     else {
   2974         errln("FAIL: no syntax error");
   2975     }
   2976     static const char* maskingRule =
   2977         "a>x;\n"
   2978         "# more stuff\n"
   2979         "ab>y;";
   2980     ec = U_ZERO_ERROR;
   2981     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
   2982     if (ec != U_RULE_MASK_ERROR) {
   2983         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
   2984     }
   2985     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
   2986         errln("FAIL: did not get expected precontext");
   2987     }
   2988     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
   2989         errln("FAIL: did not get expected postcontext");
   2990     }
   2991 }
   2992 
   2993 /**
   2994  * Make sure sets on output are disallowed.
   2995  */
   2996 void TransliteratorTest::TestOutputSet() {
   2997     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
   2998     UErrorCode ec = U_ZERO_ERROR;
   2999     UParseError pe;
   3000     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   3001     delete t;
   3002     if (U_FAILURE(ec)) {
   3003         UnicodeString err(pe.preContext);
   3004         err.append((UChar)124/*|*/).append(pe.postContext);
   3005         logln("Ok: " + err);
   3006         return;
   3007     }
   3008     errln("FAIL: No syntax error");
   3009 }
   3010 
   3011 /**
   3012  * Test the use variable range pragma, making sure that use of
   3013  * variable range characters is detected and flagged as an error.
   3014  */
   3015 void TransliteratorTest::TestVariableRange() {
   3016     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
   3017     UErrorCode ec = U_ZERO_ERROR;
   3018     UParseError pe;
   3019     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   3020     delete t;
   3021     if (U_FAILURE(ec)) {
   3022         UnicodeString err(pe.preContext);
   3023         err.append((UChar)124/*|*/).append(pe.postContext);
   3024         logln("Ok: " + err);
   3025         return;
   3026     }
   3027     errln("FAIL: No syntax error");
   3028 }
   3029 
   3030 /**
   3031  * Test invalid post context error handling
   3032  */
   3033 void TransliteratorTest::TestInvalidPostContext() {
   3034     UnicodeString rule = "a}b{c>d;";
   3035     UErrorCode ec = U_ZERO_ERROR;
   3036     UParseError pe;
   3037     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   3038     delete t;
   3039     if (U_FAILURE(ec)) {
   3040         UnicodeString err(pe.preContext);
   3041         err.append((UChar)124/*|*/).append(pe.postContext);
   3042         if (err.indexOf("a}b{c") >= 0) {
   3043             logln("Ok: " + err);
   3044         } else {
   3045             errln("FAIL: " + err);
   3046         }
   3047         return;
   3048     }
   3049     errln("FAIL: No syntax error");
   3050 }
   3051 
   3052 /**
   3053  * Test ID form variants
   3054  */
   3055 void TransliteratorTest::TestIDForms() {
   3056     const char* DATA[] = {
   3057         "NFC", NULL, "NFD",
   3058         "nfd", NULL, "NFC", // make sure case is ignored
   3059         "Any-NFKD", NULL, "Any-NFKC",
   3060         "Null", NULL, "Null",
   3061         "-nfkc", "nfkc", "NFKD",
   3062         "-nfkc/", "nfkc", "NFKD",
   3063         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
   3064         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
   3065         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
   3066         "Source-", NULL, NULL,
   3067         "Source/Variant-", NULL, NULL,
   3068         "Source-/Variant", NULL, NULL,
   3069         "/Variant", NULL, NULL,
   3070         "/Variant-", NULL, NULL,
   3071         "-/Variant", NULL, NULL,
   3072         "-/", NULL, NULL,
   3073         "-", NULL, NULL,
   3074         "/", NULL, NULL,
   3075     };
   3076     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
   3077 
   3078     for (int32_t i=0; i<DATA_length; i+=3) {
   3079         const char* ID = DATA[i];
   3080         const char* expID = DATA[i+1];
   3081         const char* expInvID = DATA[i+2];
   3082         UBool expValid = (expInvID != NULL);
   3083         if (expID == NULL) {
   3084             expID = ID;
   3085         }
   3086         UParseError pe;
   3087         UErrorCode ec = U_ZERO_ERROR;
   3088         Transliterator *t =
   3089             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
   3090         if (U_FAILURE(ec)) {
   3091             if (!expValid) {
   3092                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
   3093             } else {
   3094                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
   3095             }
   3096             delete t;
   3097             continue;
   3098         }
   3099         Transliterator *u = t->createInverse(ec);
   3100         if (U_FAILURE(ec)) {
   3101             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
   3102             delete t;
   3103             delete u;
   3104             continue;
   3105         }
   3106         if (t->getID() == expID &&
   3107             u->getID() == expInvID) {
   3108             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
   3109         } else {
   3110             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
   3111                   t->getID() + " x getInverse() => " + u->getID() +
   3112                   ", expected " + expInvID);
   3113         }
   3114         delete t;
   3115         delete u;
   3116     }
   3117 }
   3118 
   3119 static const UChar SPACE[]   = {32,0};
   3120 static const UChar NEWLINE[] = {10,0};
   3121 static const UChar RETURN[]  = {13,0};
   3122 static const UChar EMPTY[]   = {0};
   3123 
   3124 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
   3125                                     const UnicodeString& testRulesForward) {
   3126     UnicodeString rules2; t2.toRules(rules2, TRUE);
   3127     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
   3128     rules2.findAndReplace(SPACE, EMPTY);
   3129     rules2.findAndReplace(NEWLINE, EMPTY);
   3130     rules2.findAndReplace(RETURN, EMPTY);
   3131 
   3132     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
   3133 
   3134     if (rules2 != testRules) {
   3135         errln(label);
   3136         logln((UnicodeString)"GENERATED RULES: " + rules2);
   3137         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
   3138     }
   3139 }
   3140 
   3141 /**
   3142  * Mark's toRules test.
   3143  */
   3144 void TransliteratorTest::TestToRulesMark() {
   3145     const char* testRules =
   3146         "::[[:Latin:][:Mark:]];"
   3147         "::NFKD (NFC);"
   3148         "::Lower (Lower);"
   3149         "a <> \\u03B1;" // alpha
   3150         "::NFKC (NFD);"
   3151         "::Upper (Lower);"
   3152         "::Lower ();"
   3153         "::([[:Greek:][:Mark:]]);"
   3154         ;
   3155     const char* testRulesForward =
   3156         "::[[:Latin:][:Mark:]];"
   3157         "::NFKD(NFC);"
   3158         "::Lower(Lower);"
   3159         "a > \\u03B1;"
   3160         "::NFKC(NFD);"
   3161         "::Upper (Lower);"
   3162         "::Lower ();"
   3163         ;
   3164     const char* testRulesBackward =
   3165         "::[[:Greek:][:Mark:]];"
   3166         "::Lower (Upper);"
   3167         "::NFD(NFKC);"
   3168         "\\u03B1 > a;"
   3169         "::Lower(Lower);"
   3170         "::NFC(NFKD);"
   3171         ;
   3172     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
   3173     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
   3174 
   3175     UParseError pe;
   3176     UErrorCode ec = U_ZERO_ERROR;
   3177     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
   3178     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
   3179 
   3180     if (U_FAILURE(ec)) {
   3181         delete t2;
   3182         delete t3;
   3183         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
   3184         return;
   3185     }
   3186 
   3187     expect(*t2, source, target);
   3188     expect(*t3, target, source);
   3189 
   3190     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
   3191     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
   3192 
   3193     delete t2;
   3194     delete t3;
   3195 }
   3196 
   3197 /**
   3198  * Test Escape and Unescape transliterators.
   3199  */
   3200 void TransliteratorTest::TestEscape() {
   3201     UParseError pe;
   3202     UErrorCode ec;
   3203     Transliterator *t;
   3204 
   3205     ec = U_ZERO_ERROR;
   3206     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
   3207     if (U_FAILURE(ec)) {
   3208         errln((UnicodeString)"FAIL: createInstance");
   3209     } else {
   3210         expect(*t,
   3211                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
   3212                "@12Q");
   3213     }
   3214     delete t;
   3215 
   3216     ec = U_ZERO_ERROR;
   3217     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
   3218     if (U_FAILURE(ec)) {
   3219         errln((UnicodeString)"FAIL: createInstance");
   3220     } else {
   3221         expect(*t,
   3222                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
   3223                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
   3224     }
   3225     delete t;
   3226 
   3227     ec = U_ZERO_ERROR;
   3228     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
   3229     if (U_FAILURE(ec)) {
   3230         errln((UnicodeString)"FAIL: createInstance");
   3231     } else {
   3232         expect(*t,
   3233                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
   3234                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
   3235     }
   3236     delete t;
   3237 
   3238     ec = U_ZERO_ERROR;
   3239     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
   3240     if (U_FAILURE(ec)) {
   3241         errln((UnicodeString)"FAIL: createInstance");
   3242     } else {
   3243         expect(*t,
   3244                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
   3245                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
   3246     }
   3247     delete t;
   3248 }
   3249 
   3250 
   3251 void TransliteratorTest::TestAnchorMasking(){
   3252     UnicodeString rule ("^a > Q; a > q;");
   3253     UErrorCode status= U_ZERO_ERROR;
   3254     UParseError parseError;
   3255 
   3256     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
   3257     if(U_FAILURE(status)){
   3258         errln(UnicodeString("FAIL: ") + "ID" +
   3259               ".createFromRules() => bad rules" +
   3260               /*", parse error " + parseError.code +*/
   3261               ", line " + parseError.line +
   3262               ", offset " + parseError.offset +
   3263               ", context " + prettify(parseError.preContext, TRUE) +
   3264               ", rules: " + prettify(rule, TRUE));
   3265     }
   3266     delete t;
   3267 }
   3268 
   3269 /**
   3270  * Make sure display names of variants look reasonable.
   3271  */
   3272 void TransliteratorTest::TestDisplayName() {
   3273 #if UCONFIG_NO_FORMATTING
   3274     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
   3275     return;
   3276 #else
   3277     static const char* DATA[] = {
   3278         // ID, forward name, reverse name
   3279         // Update the text as necessary -- the important thing is
   3280         // not the text itself, but how various cases are handled.
   3281 
   3282         // Basic test
   3283         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
   3284 
   3285         // Variants
   3286         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
   3287 
   3288         // Target-only IDs
   3289         "NFC", "Any to NFC", "Any to NFD",
   3290     };
   3291 
   3292     int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
   3293 
   3294     Locale US("en", "US");
   3295 
   3296     for (int32_t i=0; i<DATA_length; i+=3) {
   3297         UnicodeString name;
   3298         Transliterator::getDisplayName(DATA[i], US, name);
   3299         if (name != DATA[i+1]) {
   3300             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
   3301                   name + ", expected " + DATA[i+1]);
   3302         } else {
   3303             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
   3304         }
   3305         UErrorCode ec = U_ZERO_ERROR;
   3306         UParseError pe;
   3307         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
   3308         if (U_FAILURE(ec)) {
   3309             delete t;
   3310             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
   3311             continue;
   3312         }
   3313         name = Transliterator::getDisplayName(t->getID(), US, name);
   3314         if (name != DATA[i+2]) {
   3315             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
   3316                   name + ", expected " + DATA[i+2]);
   3317         } else {
   3318             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
   3319         }
   3320         delete t;
   3321     }
   3322 #endif
   3323 }
   3324 
   3325 void TransliteratorTest::TestSpecialCases(void) {
   3326     const UnicodeString registerRules[] = {
   3327         "Any-Dev1", "x > X; y > Y;",
   3328         "Any-Dev2", "XY > Z",
   3329         "Greek-Latin/FAKE",
   3330             CharsToUnicodeString
   3331             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
   3332         "" // END MARKER
   3333     };
   3334 
   3335     const UnicodeString testCases[] = {
   3336         // NORMALIZATION
   3337         // should add more test cases
   3338         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3339         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3340         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3341         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3342 
   3343         // mp -> b BUG
   3344         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
   3345         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
   3346 
   3347         // check for devanagari bug
   3348         "nfd;Dev1;Dev2;nfc", "xy", "Z",
   3349 
   3350         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
   3351         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3352                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
   3353 
   3354         //TODO: enable this test once Titlecase works right
   3355         /*
   3356         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3357                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
   3358                  */
   3359         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3360                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
   3361         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3362                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
   3363 
   3364         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
   3365         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
   3366 
   3367          // FORMS OF S
   3368         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
   3369                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
   3370         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
   3371                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
   3372         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
   3373                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
   3374         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
   3375                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
   3376         // Tatiana bug
   3377         // Upper: TAT\\u02B9\\u00C2NA
   3378         // Lower: tat\\u02B9\\u00E2na
   3379         // Title: Tat\\u02B9\\u00E2na
   3380         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
   3381                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
   3382         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
   3383                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
   3384         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
   3385                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
   3386 
   3387         "" // END MARKER
   3388     };
   3389 
   3390     UParseError pos;
   3391     int32_t i;
   3392     for (i = 0; registerRules[i].length()!=0; i+=2) {
   3393         UErrorCode status = U_ZERO_ERROR;
   3394 
   3395         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
   3396             registerRules[i+1], UTRANS_FORWARD, pos, status);
   3397         if (U_FAILURE(status)) {
   3398             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
   3399         } else {
   3400             Transliterator::registerInstance(t);
   3401         }
   3402     }
   3403     for (i = 0; testCases[i].length()!=0; i+=3) {
   3404         UErrorCode ec = U_ZERO_ERROR;
   3405         UParseError pe;
   3406         const UnicodeString& name = testCases[i];
   3407         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
   3408         if (U_FAILURE(ec)) {
   3409             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
   3410             delete t;
   3411             continue;
   3412         }
   3413         const UnicodeString& id = t->getID();
   3414         const UnicodeString& source = testCases[i+1];
   3415         UnicodeString target;
   3416 
   3417         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
   3418 
   3419         if (testCases[i+2].length() > 0) {
   3420             target = testCases[i+2];
   3421         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
   3422             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
   3423         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
   3424             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
   3425         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
   3426             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
   3427         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
   3428             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
   3429         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
   3430             target = source;
   3431             target.toLower(Locale::getUS());
   3432         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
   3433             target = source;
   3434             target.toUpper(Locale::getUS());
   3435         }
   3436         if (U_FAILURE(ec)) {
   3437             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
   3438             continue;
   3439         }
   3440 
   3441         expect(*t, source, target);
   3442         delete t;
   3443     }
   3444     for (i = 0; registerRules[i].length()!=0; i+=2) {
   3445         Transliterator::unregister(registerRules[i]);
   3446     }
   3447 }
   3448 
   3449 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
   3450     if (ch <= 0xFFFF) {
   3451         sprintf(buffer, "\\u%04x", (int)ch);
   3452     } else {
   3453         sprintf(buffer, "\\U%08x", (int)ch);
   3454     }
   3455     return buffer;
   3456 }
   3457 
   3458 void TransliteratorTest::TestSurrogateCasing (void) {
   3459     // check that casing handles surrogates
   3460     // titlecase is currently defective
   3461     char buffer[20];
   3462     UChar buffer2[20];
   3463     UChar32 dee;
   3464     U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
   3465     UnicodeString DEE(u_totitle(dee));
   3466     if (DEE != DESERET_DEE) {
   3467         err("Fails titlecase of surrogates");
   3468         err(Char32ToEscapedChars(dee, buffer));
   3469         err(", ");
   3470         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
   3471     }
   3472 
   3473     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
   3474     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
   3475     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
   3476     UErrorCode status= U_ZERO_ERROR;
   3477 
   3478     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
   3479     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
   3480         errln("Fails: Can't uppercase surrogates.");
   3481     }
   3482 
   3483     status= U_ZERO_ERROR;
   3484     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
   3485     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
   3486         errln("Fails: Can't lowercase surrogates.");
   3487     }
   3488 }
   3489 
   3490 static void _trans(Transliterator& t, const UnicodeString& src,
   3491                    UnicodeString& result) {
   3492     result = src;
   3493     t.transliterate(result);
   3494 }
   3495 
   3496 static void _trans(const UnicodeString& id, const UnicodeString& src,
   3497                    UnicodeString& result, UErrorCode ec) {
   3498     UParseError pe;
   3499     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
   3500     if (U_SUCCESS(ec)) {
   3501         _trans(*t, src, result);
   3502     }
   3503     delete t;
   3504 }
   3505 
   3506 static UnicodeString _findMatch(const UnicodeString& source,
   3507                                        const UnicodeString* pairs) {
   3508     UnicodeString empty;
   3509     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
   3510         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
   3511             return pairs[i+1];
   3512         }
   3513     }
   3514     return empty;
   3515 }
   3516 
   3517 // Check to see that incremental gets at least part way through a reasonable string.
   3518 
   3519 void TransliteratorTest::TestIncrementalProgress(void) {
   3520     UErrorCode ec = U_ZERO_ERROR;
   3521     UnicodeString latinTest = "The Quick Brown Fox.";
   3522     UnicodeString devaTest;
   3523     _trans("Latin-Devanagari", latinTest, devaTest, ec);
   3524     UnicodeString kataTest;
   3525     _trans("Latin-Katakana", latinTest, kataTest, ec);
   3526     if (U_FAILURE(ec)) {
   3527         errln("FAIL: Internal error");
   3528         return;
   3529     }
   3530     const UnicodeString tests[] = {
   3531         "Any", latinTest,
   3532         "Latin", latinTest,
   3533         "Halfwidth", latinTest,
   3534         "Devanagari", devaTest,
   3535         "Katakana", kataTest,
   3536         "" // END MARKER
   3537     };
   3538 
   3539     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
   3540     int32_t i = 0, j=0, k=0;
   3541     int32_t sources = Transliterator::countAvailableSources();
   3542     for (i = 0; i < sources; i++) {
   3543         UnicodeString source;
   3544         Transliterator::getAvailableSource(i, source);
   3545         UnicodeString test = _findMatch(source, tests);
   3546         if (test.length() == 0) {
   3547             logln((UnicodeString)"Skipping " + source + "-X");
   3548             continue;
   3549         }
   3550         int32_t targets = Transliterator::countAvailableTargets(source);
   3551         for (j = 0; j < targets; j++) {
   3552             UnicodeString target;
   3553             Transliterator::getAvailableTarget(j, source, target);
   3554             int32_t variants = Transliterator::countAvailableVariants(source, target);
   3555             for (k =0; k< variants; k++) {
   3556                 UnicodeString variant;
   3557                 UParseError err;
   3558                 UErrorCode status = U_ZERO_ERROR;
   3559 
   3560                 Transliterator::getAvailableVariant(k, source, target, variant);
   3561                 UnicodeString id = source + "-" + target + "/" + variant;
   3562 
   3563                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
   3564                 if (U_FAILURE(status)) {
   3565                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
   3566                     delete t;
   3567                     continue;
   3568                 }
   3569                 status = U_ZERO_ERROR;
   3570                 CheckIncrementalAux(t, test);
   3571 
   3572                 UnicodeString rev;
   3573                 _trans(*t, test, rev);
   3574                 Transliterator *inv = t->createInverse(status);
   3575                 if (U_FAILURE(status)) {
   3576 #if UCONFIG_NO_BREAK_ITERATION
   3577                     // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
   3578                     if (id.compare((UnicodeString)"Latin-Thai/") != 0)
   3579 #endif
   3580                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
   3581 
   3582                     delete t;
   3583                     delete inv;
   3584                     continue;
   3585                 }
   3586                 CheckIncrementalAux(inv, rev);
   3587                 delete t;
   3588                 delete inv;
   3589             }
   3590         }
   3591     }
   3592 }
   3593 
   3594 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
   3595                                                       const UnicodeString& input) {
   3596     UErrorCode ec = U_ZERO_ERROR;
   3597     UTransPosition pos;
   3598     UnicodeString test = input;
   3599 
   3600     pos.contextStart = 0;
   3601     pos.contextLimit = input.length();
   3602     pos.start = 0;
   3603     pos.limit = input.length();
   3604 
   3605     t->transliterate(test, pos, ec);
   3606     if (U_FAILURE(ec)) {
   3607         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
   3608         return;
   3609     }
   3610     UBool gotError = FALSE;
   3611 
   3612     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
   3613 
   3614     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
   3615         errln((UnicodeString)"No Progress, " +
   3616               t->getID() + ": " + formatInput(test, input, pos));
   3617         gotError = TRUE;
   3618     } else {
   3619         logln((UnicodeString)"PASS Progress, " +
   3620               t->getID() + ": " + formatInput(test, input, pos));
   3621     }
   3622     t->finishTransliteration(test, pos);
   3623     if (pos.start != pos.limit) {
   3624         errln((UnicodeString)"Incomplete, " +
   3625               t->getID() + ": " + formatInput(test, input, pos));
   3626         gotError = TRUE;
   3627     }
   3628 }
   3629 
   3630 void TransliteratorTest::TestFunction() {
   3631     // Careful with spacing and ';' here:  Phrase this exactly
   3632     // as toRules() is going to return it.  If toRules() changes
   3633     // with regard to spacing or ';', then adjust this string.
   3634     UnicodeString rule =
   3635         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
   3636 
   3637     UParseError pe;
   3638     UErrorCode ec = U_ZERO_ERROR;
   3639     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3640     if (t == NULL) {
   3641         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
   3642         return;
   3643     }
   3644 
   3645     UnicodeString r;
   3646     t->toRules(r, TRUE);
   3647     if (r == rule) {
   3648         logln((UnicodeString)"OK: toRules() => " + r);
   3649     } else {
   3650         errln((UnicodeString)"FAIL: toRules() => " + r +
   3651               ", expected " + rule);
   3652     }
   3653 
   3654     expect(*t, "The Quick Brown Fox",
   3655            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
   3656 
   3657     delete t;
   3658 }
   3659 
   3660 void TransliteratorTest::TestInvalidBackRef(void) {
   3661     UnicodeString rule =  ". > $1;";
   3662     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
   3663     UParseError pe;
   3664     UErrorCode ec = U_ZERO_ERROR;
   3665     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3666     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
   3667 
   3668     if (t != NULL) {
   3669         errln("FAIL: createFromRules should have returned NULL");
   3670         delete t;
   3671     }
   3672 
   3673     if (t2 != NULL) {
   3674         errln("FAIL: createFromRules should have returned NULL");
   3675         delete t2;
   3676     }
   3677 
   3678     if (U_SUCCESS(ec)) {
   3679         errln("FAIL: Ok: . > $1; => no error");
   3680     } else {
   3681         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
   3682     }
   3683 }
   3684 
   3685 void TransliteratorTest::TestMulticharStringSet() {
   3686     // Basic testing
   3687     const char* rule =
   3688         "       [{aa}]       > x;"
   3689         "         a          > y;"
   3690         "       [b{bc}]      > z;"
   3691         "[{gd}] { e          > q;"
   3692         "         e } [{fg}] > r;" ;
   3693 
   3694     UParseError pe;
   3695     UErrorCode ec = U_ZERO_ERROR;
   3696     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3697     if (t == NULL || U_FAILURE(ec)) {
   3698         delete t;
   3699         errln("FAIL: createFromRules failed");
   3700         return;
   3701     }
   3702 
   3703     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
   3704            "y x yz z d gd de gdq gdqfg ddrfg");
   3705     delete t;
   3706 
   3707     // Overlapped string test.  Make sure that when multiple
   3708     // strings can match that the longest one is matched.
   3709     rule =
   3710         "    [a {ab} {abc}]    > x;"
   3711         "           b          > y;"
   3712         "           c          > z;"
   3713         " q [t {st} {rst}] { e > p;" ;
   3714 
   3715     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3716     if (t == NULL || U_FAILURE(ec)) {
   3717         delete t;
   3718         errln("FAIL: createFromRules failed");
   3719         return;
   3720     }
   3721 
   3722     expect(*t, "a ab abc qte qste qrste",
   3723            "x x x qtp qstp qrstp");
   3724     delete t;
   3725 }
   3726 
   3727 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   3728 // BEGIN TestUserFunction support factory
   3729 
   3730 Transliterator* _TUFF[4];
   3731 UnicodeString* _TUFID[4];
   3732 
   3733 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
   3734                                    Transliterator::Token context) {
   3735     return _TUFF[context.integer]->clone();
   3736 }
   3737 
   3738 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
   3739     _TUFF[n] = t;
   3740     _TUFID[n] = new UnicodeString(ID);
   3741     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
   3742 }
   3743 
   3744 static void _TUFUnreg(int32_t n) {
   3745     if (_TUFF[n] != NULL) {
   3746         Transliterator::unregister(*_TUFID[n]);
   3747         delete _TUFF[n];
   3748         delete _TUFID[n];
   3749     }
   3750 }
   3751 
   3752 // END TestUserFunction support factory
   3753 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   3754 
   3755 /**
   3756  * Test that user-registered transliterators can be used under function
   3757  * syntax.
   3758  */
   3759 void TransliteratorTest::TestUserFunction() {
   3760 
   3761     Transliterator* t;
   3762     UParseError pe;
   3763     UErrorCode ec = U_ZERO_ERROR;
   3764 
   3765     // Setup our factory
   3766     int32_t i;
   3767     for (i=0; i<4; ++i) {
   3768         _TUFF[i] = NULL;
   3769     }
   3770 
   3771     // There's no need to register inverses if we don't use them
   3772     t = Transliterator::createFromRules("gif",
   3773                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
   3774                                         UTRANS_FORWARD, pe, ec);
   3775     if (t == NULL || U_FAILURE(ec)) {
   3776         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
   3777         return;
   3778     }
   3779     _TUFReg("Any-gif", t, 0);
   3780 
   3781     t = Transliterator::createFromRules("RemoveCurly",
   3782                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
   3783                                         UTRANS_FORWARD, pe, ec);
   3784     if (t == NULL || U_FAILURE(ec)) {
   3785         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
   3786         goto FAIL;
   3787     }
   3788     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
   3789     _TUFReg("Any-RemoveCurly", t, 1);
   3790 
   3791     logln("Trying &hex");
   3792     t = Transliterator::createFromRules("hex2",
   3793                                         "(.) > &hex($1);",
   3794                                         UTRANS_FORWARD, pe, ec);
   3795     if (t == NULL || U_FAILURE(ec)) {
   3796         errln("FAIL: createFromRules");
   3797         goto FAIL;
   3798     }
   3799     logln("Registering");
   3800     _TUFReg("Any-hex2", t, 2);
   3801     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
   3802     if (t == NULL || U_FAILURE(ec)) {
   3803         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
   3804         goto FAIL;
   3805     }
   3806     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
   3807     delete t;
   3808 
   3809     logln("Trying &gif");
   3810     t = Transliterator::createFromRules("gif2",
   3811                                         "(.) > &Gif(&Hex2($1));",
   3812                                         UTRANS_FORWARD, pe, ec);
   3813     if (t == NULL || U_FAILURE(ec)) {
   3814         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
   3815         goto FAIL;
   3816     }
   3817     logln("Registering");
   3818     _TUFReg("Any-gif2", t, 3);
   3819     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
   3820     if (t == NULL || U_FAILURE(ec)) {
   3821         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
   3822         goto FAIL;
   3823     }
   3824     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
   3825            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
   3826     delete t;
   3827 
   3828     // Test that filters are allowed after &
   3829     t = Transliterator::createFromRules("test",
   3830                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
   3831                                         UTRANS_FORWARD, pe, ec);
   3832     if (t == NULL || U_FAILURE(ec)) {
   3833         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
   3834         goto FAIL;
   3835     }
   3836     expect(*t, "abc",
   3837            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
   3838     delete t;
   3839 
   3840  FAIL:
   3841     for (i=0; i<4; ++i) {
   3842         _TUFUnreg(i);
   3843     }
   3844 }
   3845 
   3846 /**
   3847  * Test the Any-X transliterators.
   3848  */
   3849 void TransliteratorTest::TestAnyX(void) {
   3850     UParseError parseError;
   3851     UErrorCode status = U_ZERO_ERROR;
   3852     Transliterator* anyLatin =
   3853         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
   3854     if (anyLatin==0) {
   3855         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
   3856         delete anyLatin;
   3857         return;
   3858     }
   3859 
   3860     expect(*anyLatin,
   3861            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
   3862            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
   3863 
   3864     delete anyLatin;
   3865 }
   3866 
   3867 /**
   3868  * Test Any-X transliterators with sample letters from all scripts.
   3869  */
   3870 void TransliteratorTest::TestAny(void) {
   3871     UErrorCode status = U_ZERO_ERROR;
   3872     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
   3873     //       function call parameters going on in this test.
   3874     UnicodeSet alphabetic("[:alphabetic:]", status);
   3875     if (U_FAILURE(status)) {
   3876         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
   3877         return;
   3878     }
   3879     alphabetic.freeze();
   3880 
   3881     UnicodeString testString;
   3882     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
   3883         const char *scriptName = uscript_getShortName((UScriptCode)i);
   3884         if (scriptName == NULL) {
   3885             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
   3886             return;
   3887         }
   3888 
   3889         UnicodeSet sample;
   3890         sample.applyPropertyAlias("script", scriptName, status);
   3891         if (U_FAILURE(status)) {
   3892             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
   3893             return;
   3894         }
   3895         sample.retainAll(alphabetic);
   3896         for (int32_t count=0; count<5; count++) {
   3897             UChar32 c = sample.charAt(count);
   3898             if (c == -1) {
   3899                 break;
   3900             }
   3901             testString.append(c);
   3902         }
   3903     }
   3904 
   3905     UParseError parseError;
   3906     Transliterator* anyLatin =
   3907         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
   3908     if (U_FAILURE(status)) {
   3909         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
   3910         return;
   3911     }
   3912 
   3913     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
   3914     anyLatin->transliterate(testString);
   3915     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
   3916     delete anyLatin;
   3917 }
   3918 
   3919 
   3920 /**
   3921  * Test the source and target set API.  These are only implemented
   3922  * for RBT and CompoundTransliterator at this time.
   3923  */
   3924 void TransliteratorTest::TestSourceTargetSet() {
   3925     UErrorCode ec = U_ZERO_ERROR;
   3926 
   3927     // Rules
   3928     const char* r =
   3929         "a > b; "
   3930         "r [x{lu}] > q;";
   3931 
   3932     // Expected source
   3933     UnicodeSet expSrc("[arx{lu}]", ec);
   3934 
   3935     // Expected target
   3936     UnicodeSet expTrg("[bq]", ec);
   3937 
   3938     UParseError pe;
   3939     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
   3940 
   3941     if (U_FAILURE(ec)) {
   3942         delete t;
   3943         errln("FAIL: Couldn't set up test");
   3944         return;
   3945     }
   3946 
   3947     UnicodeSet src; t->getSourceSet(src);
   3948     UnicodeSet trg; t->getTargetSet(trg);
   3949 
   3950     if (src == expSrc && trg == expTrg) {
   3951         UnicodeString a, b;
   3952         logln((UnicodeString)"Ok: " +
   3953               r + " => source = " + src.toPattern(a, TRUE) +
   3954               ", target = " + trg.toPattern(b, TRUE));
   3955     } else {
   3956         UnicodeString a, b, c, d;
   3957         errln((UnicodeString)"FAIL: " +
   3958               r + " => source = " + src.toPattern(a, TRUE) +
   3959               ", expected " + expSrc.toPattern(b, TRUE) +
   3960               "; target = " + trg.toPattern(c, TRUE) +
   3961               ", expected " + expTrg.toPattern(d, TRUE));
   3962     }
   3963 
   3964     delete t;
   3965 }
   3966 
   3967 /**
   3968  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
   3969  */
   3970 void TransliteratorTest::TestPatternWhiteSpace() {
   3971     // Rules
   3972     const char* r = "a > \\u200E b;";
   3973 
   3974     UErrorCode ec = U_ZERO_ERROR;
   3975     UParseError pe;
   3976     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
   3977 
   3978     if (U_FAILURE(ec)) {
   3979         errln("FAIL: Couldn't set up test");
   3980     } else {
   3981         expect(*t, "a", "b");
   3982     }
   3983     delete t;
   3984 
   3985     // UnicodeSet
   3986     ec = U_ZERO_ERROR;
   3987     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
   3988 
   3989     if (U_FAILURE(ec)) {
   3990         errln("FAIL: Couldn't set up test");
   3991     } else {
   3992         if (set.contains(0x200E)) {
   3993             errln("FAIL: U+200E not being ignored by UnicodeSet");
   3994         }
   3995     }
   3996 }
   3997 //======================================================================
   3998 // this method is in TestUScript.java
   3999 //======================================================================
   4000 void TransliteratorTest::TestAllCodepoints(){
   4001     UScriptCode code= USCRIPT_INVALID_CODE;
   4002     char id[256]={'\0'};
   4003     char abbr[256]={'\0'};
   4004     char newId[256]={'\0'};
   4005     char newAbbrId[256]={'\0'};
   4006     char oldId[256]={'\0'};
   4007     char oldAbbrId[256]={'\0'};
   4008 
   4009     UErrorCode status =U_ZERO_ERROR;
   4010     UParseError pe;
   4011 
   4012     for(uint32_t i = 0; i<=0x10ffff; i++){
   4013         code =  uscript_getScript(i,&status);
   4014         if(code == USCRIPT_INVALID_CODE){
   4015             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
   4016         }
   4017         const char* myId = uscript_getName(code);
   4018         if(!myId) {
   4019           dataerrln("Valid script code returned NULL name. Check your data!");
   4020           return;
   4021         }
   4022         uprv_strcpy(id,myId);
   4023         uprv_strcpy(abbr,uscript_getShortName(code));
   4024 
   4025         uprv_strcpy(newId,"[:");
   4026         uprv_strcat(newId,id);
   4027         uprv_strcat(newId,":];NFD");
   4028 
   4029         uprv_strcpy(newAbbrId,"[:");
   4030         uprv_strcat(newAbbrId,abbr);
   4031         uprv_strcat(newAbbrId,":];NFD");
   4032 
   4033         if(uprv_strcmp(newId,oldId)!=0){
   4034             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
   4035             if(t==NULL || U_FAILURE(status)){
   4036                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
   4037             }
   4038             delete t;
   4039         }
   4040         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
   4041             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
   4042             if(t==NULL || U_FAILURE(status)){
   4043                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
   4044             }
   4045             delete t;
   4046         }
   4047         uprv_strcpy(oldId,newId);
   4048         uprv_strcpy(oldAbbrId, newAbbrId);
   4049 
   4050     }
   4051 
   4052 }
   4053 
   4054 #define TEST_TRANSLIT_ID(id, cls) { \
   4055   UErrorCode ec = U_ZERO_ERROR; \
   4056   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
   4057   if (U_FAILURE(ec)) { \
   4058     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
   4059   } else { \
   4060     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
   4061       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
   4062     } \
   4063     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
   4064   } \
   4065   delete t; \
   4066 }
   4067 
   4068 #define TEST_TRANSLIT_RULE(rule, cls) { \
   4069   UErrorCode ec = U_ZERO_ERROR; \
   4070   UParseError pe; \
   4071   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
   4072   if (U_FAILURE(ec)) { \
   4073     errln("FAIL: Couldn't create " rule); \
   4074   } else { \
   4075     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
   4076       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
   4077     } \
   4078     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
   4079   } \
   4080   delete t; \
   4081 }
   4082 
   4083 void TransliteratorTest::TestBoilerplate() {
   4084     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
   4085     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
   4086     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
   4087     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
   4088     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
   4089     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
   4090     TEST_TRANSLIT_ID("Null", NullTransliterator);
   4091     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
   4092     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
   4093     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
   4094     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
   4095     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
   4096     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
   4097 }
   4098 
   4099 void TransliteratorTest::TestAlternateSyntax() {
   4100     // U+2206 == &
   4101     // U+2190 == <
   4102     // U+2192 == >
   4103     // U+2194 == <>
   4104     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
   4105            "abc",
   4106            "xbz");
   4107     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
   4108            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
   4109            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
   4110 }
   4111 
   4112 static const char* BEGIN_END_RULES[] = {
   4113     // [0]
   4114     "abc > xy;"
   4115     "aba > z;",
   4116 
   4117     // [1]
   4118 /*
   4119     "::BEGIN;"
   4120     "abc > xy;"
   4121     "::END;"
   4122     "::BEGIN;"
   4123     "aba > z;"
   4124     "::END;",
   4125 */
   4126     "", // test case commented out below, this is here to keep from messing up the indexes
   4127 
   4128     // [2]
   4129 /*
   4130     "abc > xy;"
   4131     "::BEGIN;"
   4132     "aba > z;"
   4133     "::END;",
   4134 */
   4135     "", // test case commented out below, this is here to keep from messing up the indexes
   4136 
   4137     // [3]
   4138 /*
   4139     "::BEGIN;"
   4140     "abc > xy;"
   4141     "::END;"
   4142     "aba > z;",
   4143 */
   4144     "", // test case commented out below, this is here to keep from messing up the indexes
   4145 
   4146     // [4]
   4147     "abc > xy;"
   4148     "::Null;"
   4149     "aba > z;",
   4150 
   4151     // [5]
   4152     "::Upper;"
   4153     "ABC > xy;"
   4154     "AB > x;"
   4155     "C > z;"
   4156     "::Upper;"
   4157     "XYZ > p;"
   4158     "XY > q;"
   4159     "Z > r;"
   4160     "::Upper;",
   4161 
   4162     // [6]
   4163     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4164     "$delim = [\\-$ws];"
   4165     "$ws $delim* > ' ';"
   4166     "'-' $delim* > '-';",
   4167 
   4168     // [7]
   4169     "::Null;"
   4170     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4171     "$delim = [\\-$ws];"
   4172     "$ws $delim* > ' ';"
   4173     "'-' $delim* > '-';",
   4174 
   4175     // [8]
   4176     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4177     "$delim = [\\-$ws];"
   4178     "$ws $delim* > ' ';"
   4179     "'-' $delim* > '-';"
   4180     "::Null;",
   4181 
   4182     // [9]
   4183     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4184     "$delim = [\\-$ws];"
   4185     "::Null;"
   4186     "$ws $delim* > ' ';"
   4187     "'-' $delim* > '-';",
   4188 
   4189     // [10]
   4190 /*
   4191     "::BEGIN;"
   4192     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4193     "$delim = [\\-$ws];"
   4194     "::END;"
   4195     "$ws $delim* > ' ';"
   4196     "'-' $delim* > '-';",
   4197 */
   4198     "", // test case commented out below, this is here to keep from messing up the indexes
   4199 
   4200     // [11]
   4201 /*
   4202     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4203     "$delim = [\\-$ws];"
   4204     "::BEGIN;"
   4205     "$ws $delim* > ' ';"
   4206     "'-' $delim* > '-';"
   4207     "::END;",
   4208 */
   4209     "", // test case commented out below, this is here to keep from messing up the indexes
   4210 
   4211     // [12]
   4212 /*
   4213     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4214     "$delim = [\\-$ws];"
   4215     "$ab = [ab];"
   4216     "::BEGIN;"
   4217     "$ws $delim* > ' ';"
   4218     "'-' $delim* > '-';"
   4219     "::END;"
   4220     "::BEGIN;"
   4221     "$ab { ' ' } $ab > '-';"
   4222     "c { ' ' > ;"
   4223     "::END;"
   4224     "::BEGIN;"
   4225     "'a-a' > a\\%|a;"
   4226     "::END;",
   4227 */
   4228     "", // test case commented out below, this is here to keep from messing up the indexes
   4229 
   4230     // [13]
   4231     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4232     "$delim = [\\-$ws];"
   4233     "$ab = [ab];"
   4234     "::Null;"
   4235     "$ws $delim* > ' ';"
   4236     "'-' $delim* > '-';"
   4237     "::Null;"
   4238     "$ab { ' ' } $ab > '-';"
   4239     "c { ' ' > ;"
   4240     "::Null;"
   4241     "'a-a' > a\\%|a;",
   4242 
   4243     // [14]
   4244 /*
   4245     "::[abc];"
   4246     "::BEGIN;"
   4247     "abc > xy;"
   4248     "::END;"
   4249     "::BEGIN;"
   4250     "aba > yz;"
   4251     "::END;"
   4252     "::Upper;",
   4253 */
   4254     "", // test case commented out below, this is here to keep from messing up the indexes
   4255 
   4256     // [15]
   4257     "::[abc];"
   4258     "abc > xy;"
   4259     "::Null;"
   4260     "aba > yz;"
   4261     "::Upper;",
   4262 
   4263     // [16]
   4264 /*
   4265     "::[abc];"
   4266     "::BEGIN;"
   4267     "abc <> xy;"
   4268     "::END;"
   4269     "::BEGIN;"
   4270     "aba <> yz;"
   4271     "::END;"
   4272     "::Upper(Lower);"
   4273     "::([XYZ]);"
   4274 */
   4275     "", // test case commented out below, this is here to keep from messing up the indexes
   4276 
   4277     // [17]
   4278     "::[abc];"
   4279     "abc <> xy;"
   4280     "::Null;"
   4281     "aba <> yz;"
   4282     "::Upper(Lower);"
   4283     "::([XYZ]);"
   4284 };
   4285 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
   4286 
   4287 /*
   4288 (This entire test is commented out below and will need some heavy revision when we re-add
   4289 the ::BEGIN/::END stuff)
   4290 static const char* BOGUS_BEGIN_END_RULES[] = {
   4291     // [7]
   4292     "::BEGIN;"
   4293     "abc > xy;"
   4294     "::BEGIN;"
   4295     "aba > z;"
   4296     "::END;"
   4297     "::END;",
   4298 
   4299     // [8]
   4300     "abc > xy;"
   4301     " aba > z;"
   4302     "::END;",
   4303 
   4304     // [9]
   4305     "::BEGIN;"
   4306     "::Upper;"
   4307     "::END;"
   4308 };
   4309 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
   4310 */
   4311 
   4312 static const char* BEGIN_END_TEST_CASES[] = {
   4313     // rules             input                   expected output
   4314     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
   4315 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
   4316 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
   4317 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
   4318     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
   4319     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
   4320 
   4321     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
   4322     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
   4323     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
   4324     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
   4325 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
   4326 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
   4327 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
   4328 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
   4329 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
   4330     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
   4331     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
   4332     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
   4333 
   4334 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
   4335     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
   4336 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
   4337     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
   4338 };
   4339 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
   4340 
   4341 void TransliteratorTest::TestBeginEnd() {
   4342     // run through the list of test cases above
   4343     int32_t i = 0;
   4344     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
   4345         expect((UnicodeString)"Test case #" + (i / 3),
   4346                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
   4347                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
   4348                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
   4349     }
   4350 
   4351     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
   4352     UParseError parseError;
   4353     UErrorCode status = U_ZERO_ERROR;
   4354     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
   4355             UTRANS_REVERSE, parseError, status);
   4356     if (reversed == 0 || U_FAILURE(status)) {
   4357         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
   4358     } else {
   4359         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
   4360     }
   4361     delete reversed;
   4362 
   4363     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
   4364     // that all of them cause errors
   4365 /*
   4366 (commented out until we have the real ::BEGIN/::END stuff in place
   4367     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
   4368         UParseError parseError;
   4369         UErrorCode status = U_ZERO_ERROR;
   4370         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
   4371                 UTRANS_FORWARD, parseError, status);
   4372         if (!U_FAILURE(status)) {
   4373             delete t;
   4374             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
   4375         }
   4376     }
   4377 */
   4378 }
   4379 
   4380 void TransliteratorTest::TestBeginEndToRules() {
   4381     // run through the same list of test cases we used above, but this time, instead of just
   4382     // instantiating a Transliterator from the rules and running the test against it, we instantiate
   4383     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
   4384     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
   4385     // to (i.e., does the same thing as) the original rule set
   4386     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
   4387         UParseError parseError;
   4388         UErrorCode status = U_ZERO_ERROR;
   4389         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
   4390                 UTRANS_FORWARD, parseError, status);
   4391         if (U_FAILURE(status)) {
   4392             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
   4393         } else {
   4394             UnicodeString rules;
   4395             t->toRules(rules, TRUE);
   4396             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
   4397                     UTRANS_FORWARD, parseError, status);
   4398             if (U_FAILURE(status)) {
   4399                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
   4400                         parseError, status);
   4401                 delete t;
   4402             } else {
   4403                 expect(*t2,
   4404                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
   4405                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
   4406                 delete t;
   4407                 delete t2;
   4408             }
   4409         }
   4410     }
   4411 
   4412     // do the same thing for the reversible test case
   4413     UParseError parseError;
   4414     UErrorCode status = U_ZERO_ERROR;
   4415     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
   4416             UTRANS_REVERSE, parseError, status);
   4417     if (U_FAILURE(status)) {
   4418         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
   4419     } else {
   4420         UnicodeString rules;
   4421         reversed->toRules(rules, FALSE);
   4422         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
   4423                 parseError, status);
   4424         if (U_FAILURE(status)) {
   4425             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
   4426                     parseError, status);
   4427             delete reversed;
   4428         } else {
   4429             expect(*reversed2,
   4430                    UnicodeString("xy XY XYZ yz YZ"),
   4431                    UnicodeString("xy abc xaba yz aba"));
   4432             delete reversed;
   4433             delete reversed2;
   4434         }
   4435     }
   4436 }
   4437 
   4438 void TransliteratorTest::TestRegisterAlias() {
   4439     UnicodeString longID("Lower;[aeiou]Upper");
   4440     UnicodeString shortID("Any-CapVowels");
   4441     UnicodeString reallyShortID("CapVowels");
   4442 
   4443     Transliterator::registerAlias(shortID, longID);
   4444 
   4445     UErrorCode err = U_ZERO_ERROR;
   4446     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
   4447     if (U_FAILURE(err)) {
   4448         errln("Failed to instantiate transliterator with long ID");
   4449         Transliterator::unregister(shortID);
   4450         return;
   4451     }
   4452     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
   4453     if (U_FAILURE(err)) {
   4454         errln("Failed to instantiate transliterator with short ID");
   4455         delete t1;
   4456         Transliterator::unregister(shortID);
   4457         return;
   4458     }
   4459 
   4460     if (t1->getID() != longID)
   4461         errln("Transliterator instantiated with long ID doesn't have long ID");
   4462     if (t2->getID() != reallyShortID)
   4463         errln("Transliterator instantiated with short ID doesn't have short ID");
   4464 
   4465     UnicodeString rules1;
   4466     UnicodeString rules2;
   4467 
   4468     t1->toRules(rules1, TRUE);
   4469     t2->toRules(rules2, TRUE);
   4470     if (rules1 != rules2)
   4471         errln("Alias transliterators aren't the same");
   4472 
   4473     delete t1;
   4474     delete t2;
   4475     Transliterator::unregister(shortID);
   4476 
   4477     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
   4478     if (U_SUCCESS(err)) {
   4479         errln("Instantiation with short ID succeeded after short ID was unregistered");
   4480         delete t1;
   4481     }
   4482 
   4483     // try the same thing again, but this time with something other than
   4484     // an instance of CompoundTransliterator
   4485     UnicodeString realID("Latin-Greek");
   4486     UnicodeString fakeID("Latin-dlgkjdflkjdl");
   4487     Transliterator::registerAlias(fakeID, realID);
   4488 
   4489     err = U_ZERO_ERROR;
   4490     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
   4491     if (U_FAILURE(err)) {
   4492         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
   4493         Transliterator::unregister(realID);
   4494         return;
   4495     }
   4496     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
   4497     if (U_FAILURE(err)) {
   4498         errln("Failed to instantiate transliterator with fake ID");
   4499         delete t1;
   4500         Transliterator::unregister(realID);
   4501         return;
   4502     }
   4503 
   4504     t1->toRules(rules1, TRUE);
   4505     t2->toRules(rules2, TRUE);
   4506     if (rules1 != rules2)
   4507         errln("Alias transliterators aren't the same");
   4508 
   4509     delete t1;
   4510     delete t2;
   4511     Transliterator::unregister(fakeID);
   4512 }
   4513 
   4514 void TransliteratorTest::TestRuleStripping() {
   4515     /*
   4516 #
   4517 \uE001>\u0C01; # SIGN
   4518     */
   4519     static const UChar rule[] = {
   4520         0x0023,0x0020,0x000D,0x000A,
   4521         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
   4522     };
   4523     static const UChar expectedRule[] = {
   4524         0xE001,0x003E,0x0C01,0x003B,0
   4525     };
   4526     UChar result[sizeof(rule)/sizeof(rule[0])];
   4527     UErrorCode status = U_ZERO_ERROR;
   4528     int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
   4529     if (len != u_strlen(expectedRule)) {
   4530         errln("utrans_stripRules return len = %d", len);
   4531     }
   4532     if (u_strncmp(expectedRule, result, len) != 0) {
   4533         errln("utrans_stripRules did not return expected string");
   4534     }
   4535 }
   4536 
   4537 /**
   4538  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
   4539  */
   4540 void TransliteratorTest::TestHalfwidthFullwidth(void) {
   4541     UParseError parseError;
   4542     UErrorCode status = U_ZERO_ERROR;
   4543     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
   4544     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
   4545     if (hf == 0 || fh == 0) {
   4546         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   4547         delete hf;
   4548         delete fh;
   4549         return;
   4550     }
   4551 
   4552     // Array of 2n items
   4553     // Each item is
   4554     //   "hf"|"fh"|"both",
   4555     //   <Halfwidth>,
   4556     //   <Fullwidth>
   4557     const char* DATA[] = {
   4558         "both",
   4559         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
   4560         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
   4561     };
   4562     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
   4563 
   4564     for (int32_t i=0; i<DATA_length; i+=3) {
   4565         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
   4566         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
   4567         switch (*DATA[i]) {
   4568         case 0x68: //'h': // Halfwidth-Fullwidth only
   4569             expect(*hf, h, f);
   4570             break;
   4571         case 0x66: //'f': // Fullwidth-Halfwidth only
   4572             expect(*fh, f, h);
   4573             break;
   4574         case 0x62: //'b': // both directions
   4575             expect(*hf, h, f);
   4576             expect(*fh, f, h);
   4577             break;
   4578         }
   4579     }
   4580     delete hf;
   4581     delete fh;
   4582 }
   4583 
   4584 
   4585     /**
   4586      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
   4587      *              TODO: confirm that the expected results are correct.
   4588      *              For now, test just confirms that C++ and Java give identical results.
   4589      */
   4590 void TransliteratorTest::TestThai(void) {
   4591 #if !UCONFIG_NO_BREAK_ITERATION
   4592     UParseError parseError;
   4593     UErrorCode status = U_ZERO_ERROR;
   4594     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
   4595     if (tr == 0) {
   4596         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   4597         return;
   4598     }
   4599     if (U_FAILURE(status)) {
   4600         errln("FAIL: createInstance failed with %s", u_errorName(status));
   4601         return;
   4602     }
   4603     const char *thaiText =
   4604         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
   4605         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
   4606         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
   4607         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
   4608         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
   4609         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
   4610         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
   4611         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
   4612         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
   4613         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
   4614         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
   4615         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
   4616         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
   4617         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
   4618         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
   4619         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
   4620         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
   4621         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
   4622         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
   4623         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
   4624         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
   4625         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
   4626         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
   4627         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
   4628         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
   4629         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
   4630         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
   4631         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
   4632         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
   4633         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
   4634 
   4635     const char *latinText =
   4636         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
   4637         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
   4638         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
   4639         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
   4640         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
   4641         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
   4642         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
   4643         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
   4644         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
   4645         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
   4646         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
   4647         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
   4648         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
   4649         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
   4650         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
   4651         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
   4652         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
   4653         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
   4654 
   4655 
   4656     UnicodeString  xlitText(thaiText);
   4657     xlitText = xlitText.unescape();
   4658     tr->transliterate(xlitText);
   4659 
   4660     UnicodeString expectedText(latinText);
   4661     expectedText = expectedText.unescape();
   4662     expect(*tr, xlitText, expectedText);
   4663 
   4664     delete tr;
   4665 #endif
   4666 }
   4667 
   4668 
   4669 //======================================================================
   4670 // Support methods
   4671 //======================================================================
   4672 void TransliteratorTest::expectT(const UnicodeString& id,
   4673                                  const UnicodeString& source,
   4674                                  const UnicodeString& expectedResult) {
   4675     UErrorCode ec = U_ZERO_ERROR;
   4676     UParseError pe;
   4677     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
   4678     if (U_FAILURE(ec)) {
   4679         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
   4680         delete t;
   4681         return;
   4682     }
   4683     expect(*t, source, expectedResult);
   4684     delete t;
   4685 }
   4686 
   4687 void TransliteratorTest::reportParseError(const UnicodeString& message,
   4688                                           const UParseError& parseError,
   4689                                           const UErrorCode& status) {
   4690     dataerrln(message +
   4691           /*", parse error " + parseError.code +*/
   4692           ", line " + parseError.line +
   4693           ", offset " + parseError.offset +
   4694           ", pre-context " + prettify(parseError.preContext, TRUE) +
   4695           ", post-context " + prettify(parseError.postContext,TRUE) +
   4696           ", Error: " + u_errorName(status));
   4697 }
   4698 
   4699 void TransliteratorTest::expect(const UnicodeString& rules,
   4700                                 const UnicodeString& source,
   4701                                 const UnicodeString& expectedResult,
   4702                                 UTransPosition *pos) {
   4703     expect("<ID>", rules, source, expectedResult, pos);
   4704 }
   4705 
   4706 void TransliteratorTest::expect(const UnicodeString& id,
   4707                                 const UnicodeString& rules,
   4708                                 const UnicodeString& source,
   4709                                 const UnicodeString& expectedResult,
   4710                                 UTransPosition *pos) {
   4711     UErrorCode status = U_ZERO_ERROR;
   4712     UParseError parseError;
   4713     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
   4714     if (U_FAILURE(status)) {
   4715         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
   4716     } else {
   4717         expect(*t, source, expectedResult, pos);
   4718     }
   4719     delete t;
   4720 }
   4721 
   4722 void TransliteratorTest::expect(const Transliterator& t,
   4723                                 const UnicodeString& source,
   4724                                 const UnicodeString& expectedResult,
   4725                                 const Transliterator& reverseTransliterator) {
   4726     expect(t, source, expectedResult);
   4727     expect(reverseTransliterator, expectedResult, source);
   4728 }
   4729 
   4730 void TransliteratorTest::expect(const Transliterator& t,
   4731                                 const UnicodeString& source,
   4732                                 const UnicodeString& expectedResult,
   4733                                 UTransPosition *pos) {
   4734     if (pos == 0) {
   4735         UnicodeString result(source);
   4736         t.transliterate(result);
   4737         expectAux(t.getID() + ":String", source, result, expectedResult);
   4738     }
   4739     UTransPosition index={0, 0, 0, 0};
   4740     if (pos != 0) {
   4741         index = *pos;
   4742     }
   4743 
   4744     UnicodeString rsource(source);
   4745     if (pos == 0) {
   4746         t.transliterate(rsource);
   4747     } else {
   4748         // Do it all at once -- below we do it incrementally
   4749         t.finishTransliteration(rsource, *pos);
   4750     }
   4751     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
   4752 
   4753     // Test keyboard (incremental) transliteration -- this result
   4754     // must be the same after we finalize (see below).
   4755     UnicodeString log;
   4756     rsource.remove();
   4757     if (pos != 0) {
   4758         rsource = source;
   4759         formatInput(log, rsource, index);
   4760         log.append(" -> ");
   4761         UErrorCode status = U_ZERO_ERROR;
   4762         t.transliterate(rsource, index, status);
   4763         formatInput(log, rsource, index);
   4764     } else {
   4765         for (int32_t i=0; i<source.length(); ++i) {
   4766             if (i != 0) {
   4767                 log.append(" + ");
   4768             }
   4769             log.append(source.charAt(i)).append(" -> ");
   4770             UErrorCode status = U_ZERO_ERROR;
   4771             t.transliterate(rsource, index, source.charAt(i), status);
   4772             formatInput(log, rsource, index);
   4773         }
   4774     }
   4775 
   4776     // As a final step in keyboard transliteration, we must call
   4777     // transliterate to finish off any pending partial matches that
   4778     // were waiting for more input.
   4779     t.finishTransliteration(rsource, index);
   4780     log.append(" => ").append(rsource);
   4781 
   4782     expectAux(t.getID() + ":Keyboard", log,
   4783               rsource == expectedResult,
   4784               expectedResult);
   4785 }
   4786 
   4787 
   4788 /**
   4789  * @param appendTo result is appended to this param.
   4790  * @param input the string being transliterated
   4791  * @param pos the index struct
   4792  */
   4793 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
   4794                                                const UnicodeString& input,
   4795                                                const UTransPosition& pos) {
   4796     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
   4797     // the {} indicate the context start and limit, and the ||
   4798     // indicate the start and limit.
   4799     if (0 <= pos.contextStart &&
   4800         pos.contextStart <= pos.start &&
   4801         pos.start <= pos.limit &&
   4802         pos.limit <= pos.contextLimit &&
   4803         pos.contextLimit <= input.length()) {
   4804 
   4805         UnicodeString a, b, c, d, e;
   4806         input.extractBetween(0, pos.contextStart, a);
   4807         input.extractBetween(pos.contextStart, pos.start, b);
   4808         input.extractBetween(pos.start, pos.limit, c);
   4809         input.extractBetween(pos.limit, pos.contextLimit, d);
   4810         input.extractBetween(pos.contextLimit, input.length(), e);
   4811         appendTo.append(a).append((UChar)123/*{*/).append(b).
   4812             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
   4813             append((UChar)125/*}*/).append(e);
   4814     } else {
   4815         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
   4816                         pos.contextStart + ", s=" + pos.start + ", l=" +
   4817                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
   4818                         input);
   4819     }
   4820     return appendTo;
   4821 }
   4822 
   4823 void TransliteratorTest::expectAux(const UnicodeString& tag,
   4824                                    const UnicodeString& source,
   4825                                    const UnicodeString& result,
   4826                                    const UnicodeString& expectedResult) {
   4827     expectAux(tag, source + " -> " + result,
   4828               result == expectedResult,
   4829               expectedResult);
   4830 }
   4831 
   4832 void TransliteratorTest::expectAux(const UnicodeString& tag,
   4833                                    const UnicodeString& summary, UBool pass,
   4834                                    const UnicodeString& expectedResult) {
   4835     if (pass) {
   4836         logln(UnicodeString("(")+tag+") " + prettify(summary));
   4837     } else {
   4838         dataerrln(UnicodeString("FAIL: (")+tag+") "
   4839               + prettify(summary)
   4840               + ", expected " + prettify(expectedResult));
   4841     }
   4842 }
   4843 
   4844 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
   4845