Home | History | Annotate | Download | only in intltest
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 1999-2009, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   Date        Name        Description
      7 *   11/10/99    aliu        Creation.
      8 **********************************************************************
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_TRANSLITERATION
     14 
     15 #include "transtst.h"
     16 #include "unicode/locid.h"
     17 #include "unicode/dtfmtsym.h"
     18 #include "unicode/normlzr.h"
     19 #include "unicode/translit.h"
     20 #include "unicode/uchar.h"
     21 #include "unicode/unifilt.h"
     22 #include "unicode/uniset.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/usetiter.h"
     25 #include "unicode/uscript.h"
     26 #include "cpdtrans.h"
     27 #include "nultrans.h"
     28 #include "rbt.h"
     29 #include "rbt_pars.h"
     30 #include "anytrans.h"
     31 #include "esctrn.h"
     32 #include "name2uni.h"
     33 #include "nortrans.h"
     34 #include "remtrans.h"
     35 #include "titletrn.h"
     36 #include "tolowtrn.h"
     37 #include "toupptrn.h"
     38 #include "unesctrn.h"
     39 #include "uni2name.h"
     40 #include "cstring.h"
     41 #include "cmemory.h"
     42 #include <stdio.h>
     43 
     44 /***********************************************************************
     45 
     46                      HOW TO USE THIS TEST FILE
     47                                -or-
     48                   How I developed on two platforms
     49                 without losing (too much of) my mind
     50 
     51 
     52 1. Add new tests by copying/pasting/changing existing tests.  On Java,
     53    any public void method named Test...() taking no parameters becomes
     54    a test.  On C++, you need to modify the header and add a line to
     55    the runIndexedTest() dispatch method.
     56 
     57 2. Make liberal use of the expect() method; it is your friend.
     58 
     59 3. The tests in this file exactly match those in a sister file on the
     60    other side.  The two files are:
     61 
     62    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
     63    icu4c:  source/test/intltest/transtst.cpp
     64 
     65                   ==> THIS IS THE IMPORTANT PART <==
     66 
     67    When you add a test in this file, add it in TransliteratorTest.java
     68    too.  Give it the same name and put it in the same relative place.
     69    This makes maintenance a lot simpler for any poor soul who ends up
     70    trying to synchronize the tests between icu4j and icu4c.
     71 
     72 4. If you MUST enter a test that is NOT paralleled in the sister file,
     73    then add it in the special non-mirrored section.  These are
     74    labeled
     75 
     76      "icu4j ONLY"
     77 
     78    or
     79 
     80      "icu4c ONLY"
     81 
     82    Make sure you document the reason the test is here and not there.
     83 
     84 
     85 Thank you.
     86 The Management
     87 ***********************************************************************/
     88 
     89 // Define character constants thusly to be EBCDIC-friendly
     90 enum {
     91     LEFT_BRACE=((UChar)0x007B), /*{*/
     92     PIPE      =((UChar)0x007C), /*|*/
     93     ZERO      =((UChar)0x0030), /*0*/
     94     UPPER_A   =((UChar)0x0041)  /*A*/
     95 };
     96 
     97 TransliteratorTest::TransliteratorTest()
     98 :   DESERET_DEE((UChar32)0x10414),
     99     DESERET_dee((UChar32)0x1043C)
    100 {
    101 }
    102 
    103 TransliteratorTest::~TransliteratorTest() {}
    104 
    105 void
    106 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
    107                                    const char* &name, char* /*par*/) {
    108     switch (index) {
    109         TESTCASE(0,TestInstantiation);
    110         TESTCASE(1,TestSimpleRules);
    111         TESTCASE(2,TestRuleBasedInverse);
    112         TESTCASE(3,TestKeyboard);
    113         TESTCASE(4,TestKeyboard2);
    114         TESTCASE(5,TestKeyboard3);
    115         TESTCASE(6,TestArabic);
    116         TESTCASE(7,TestCompoundKana);
    117         TESTCASE(8,TestCompoundHex);
    118         TESTCASE(9,TestFiltering);
    119         TESTCASE(10,TestInlineSet);
    120         TESTCASE(11,TestPatternQuoting);
    121         TESTCASE(12,TestJ277);
    122         TESTCASE(13,TestJ243);
    123         TESTCASE(14,TestJ329);
    124         TESTCASE(15,TestSegments);
    125         TESTCASE(16,TestCursorOffset);
    126         TESTCASE(17,TestArbitraryVariableValues);
    127         TESTCASE(18,TestPositionHandling);
    128         TESTCASE(19,TestHiraganaKatakana);
    129         TESTCASE(20,TestCopyJ476);
    130         TESTCASE(21,TestAnchors);
    131         TESTCASE(22,TestInterIndic);
    132         TESTCASE(23,TestFilterIDs);
    133         TESTCASE(24,TestCaseMap);
    134         TESTCASE(25,TestNameMap);
    135         TESTCASE(26,TestLiberalizedID);
    136         TESTCASE(27,TestCreateInstance);
    137         TESTCASE(28,TestNormalizationTransliterator);
    138         TESTCASE(29,TestCompoundRBT);
    139         TESTCASE(30,TestCompoundFilter);
    140         TESTCASE(31,TestRemove);
    141         TESTCASE(32,TestToRules);
    142         TESTCASE(33,TestContext);
    143         TESTCASE(34,TestSupplemental);
    144         TESTCASE(35,TestQuantifier);
    145         TESTCASE(36,TestSTV);
    146         TESTCASE(37,TestCompoundInverse);
    147         TESTCASE(38,TestNFDChainRBT);
    148         TESTCASE(39,TestNullInverse);
    149         TESTCASE(40,TestAliasInverseID);
    150         TESTCASE(41,TestCompoundInverseID);
    151         TESTCASE(42,TestUndefinedVariable);
    152         TESTCASE(43,TestEmptyContext);
    153         TESTCASE(44,TestCompoundFilterID);
    154         TESTCASE(45,TestPropertySet);
    155         TESTCASE(46,TestNewEngine);
    156         TESTCASE(47,TestQuantifiedSegment);
    157         TESTCASE(48,TestDevanagariLatinRT);
    158         TESTCASE(49,TestTeluguLatinRT);
    159         TESTCASE(50,TestCompoundLatinRT);
    160         TESTCASE(51,TestSanskritLatinRT);
    161         TESTCASE(52,TestLocaleInstantiation);
    162         TESTCASE(53,TestTitleAccents);
    163         TESTCASE(54,TestLocaleResource);
    164         TESTCASE(55,TestParseError);
    165         TESTCASE(56,TestOutputSet);
    166         TESTCASE(57,TestVariableRange);
    167         TESTCASE(58,TestInvalidPostContext);
    168         TESTCASE(59,TestIDForms);
    169         TESTCASE(60,TestToRulesMark);
    170         TESTCASE(61,TestEscape);
    171         TESTCASE(62,TestAnchorMasking);
    172         TESTCASE(63,TestDisplayName);
    173         TESTCASE(64,TestSpecialCases);
    174         TESTCASE(65,TestIncrementalProgress);
    175         TESTCASE(66,TestSurrogateCasing);
    176         TESTCASE(67,TestFunction);
    177         TESTCASE(68,TestInvalidBackRef);
    178         TESTCASE(69,TestMulticharStringSet);
    179         TESTCASE(70,TestUserFunction);
    180         TESTCASE(71,TestAnyX);
    181         TESTCASE(72,TestSourceTargetSet);
    182         TESTCASE(73,TestGurmukhiDevanagari);
    183         TESTCASE(74,TestRuleWhitespace);
    184         TESTCASE(75,TestAllCodepoints);
    185         TESTCASE(76,TestBoilerplate);
    186         TESTCASE(77,TestAlternateSyntax);
    187         TESTCASE(78,TestBeginEnd);
    188         TESTCASE(79,TestBeginEndToRules);
    189         TESTCASE(80,TestRegisterAlias);
    190         TESTCASE(81,TestRuleStripping);
    191         TESTCASE(82,TestHalfwidthFullwidth);
    192         TESTCASE(83,TestThai);
    193         TESTCASE(84,TestAny);
    194         default: name = ""; break;
    195     }
    196 }
    197 
    198 static const UVersionInfo ICU_39 = {3,9,4,0};
    199 /**
    200  * Make sure every system transliterator can be instantiated.
    201  *
    202  * ALSO test that the result of toRules() for each rule is a valid
    203  * rule.  Do this here so we don't have to have another test that
    204  * instantiates everything as well.
    205  */
    206 void TransliteratorTest::TestInstantiation() {
    207     UErrorCode ec = U_ZERO_ERROR;
    208     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
    209     assertSuccess("getAvailableIDs()", ec);
    210     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
    211     int32_t n = Transliterator::countAvailableIDs();
    212     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
    213                avail->count(ec) == n);
    214     assertSuccess("count()", ec);
    215     UnicodeString name;
    216     for (int32_t i=0; i<n; ++i) {
    217         const UnicodeString& id = *avail->snext(ec);
    218         if (!assertSuccess("snext()", ec) ||
    219             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
    220             break;
    221         }
    222         UnicodeString id2 = Transliterator::getAvailableID(i);
    223         if (id.length() < 1) {
    224             errln(UnicodeString("FAIL: getAvailableID(") +
    225                   i + ") returned empty string");
    226             continue;
    227         }
    228         if (id != id2) {
    229             errln(UnicodeString("FAIL: getAvailableID(") +
    230                   i + ") != getAvailableIDs().snext()");
    231             continue;
    232         }
    233         UParseError parseError;
    234         UErrorCode status = U_ZERO_ERROR;
    235         Transliterator* t = Transliterator::createInstance(id,
    236                               UTRANS_FORWARD, parseError,status);
    237         name.truncate(0);
    238         Transliterator::getDisplayName(id, name);
    239         if (t == 0) {
    240             errln(UnicodeString("FAIL: Couldn't create ") + id +
    241                   /*", parse error " + parseError.code +*/
    242                   ", line " + parseError.line +
    243                   ", offset " + parseError.offset +
    244                   ", pre-context " + prettify(parseError.preContext, TRUE) +
    245                   ", post-context " +prettify(parseError.postContext,TRUE) +
    246                   ", Error: " + u_errorName(status));
    247             // When createInstance fails, it deletes the failing
    248             // entry from the available ID list.  We detect this
    249             // here by looking for a change in countAvailableIDs.
    250             int32_t nn = Transliterator::countAvailableIDs();
    251             if (nn == (n - 1)) {
    252                 n = nn;
    253                 --i; // Compensate for deleted entry
    254             }
    255         } else {
    256             logln(UnicodeString("OK: ") + name + " (" + id + ")");
    257 
    258             // Now test toRules
    259             UnicodeString rules;
    260             t->toRules(rules, TRUE);
    261             Transliterator *u = Transliterator::createFromRules("x",
    262                                     rules, UTRANS_FORWARD, parseError,status);
    263             if (u == 0) {
    264                 errln(UnicodeString("FAIL: ") + id +
    265                       ".createFromRules() => bad rules" +
    266                       /*", parse error " + parseError.code +*/
    267                       ", line " + parseError.line +
    268                       ", offset " + parseError.offset +
    269                       ", context " + prettify(parseError.preContext, TRUE) +
    270                       ", rules: " + prettify(rules, TRUE));
    271             } else {
    272                 delete u;
    273             }
    274             delete t;
    275         }
    276     }
    277     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
    278     assertSuccess("snext()", ec);
    279     delete avail;
    280 
    281     // Now test the failure path
    282     UParseError parseError;
    283     UErrorCode status = U_ZERO_ERROR;
    284     UnicodeString id("<Not a valid Transliterator ID>");
    285     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
    286     if (t != 0) {
    287         errln("FAIL: " + id + " returned a transliterator");
    288         delete t;
    289     } else {
    290         logln("OK: Bogus ID handled properly");
    291     }
    292 }
    293 
    294 void TransliteratorTest::TestSimpleRules(void) {
    295     /* Example: rules 1. ab>x|y
    296      *                2. yc>z
    297      *
    298      * []|eabcd  start - no match, copy e to tranlated buffer
    299      * [e]|abcd  match rule 1 - copy output & adjust cursor
    300      * [ex|y]cd  match rule 2 - copy output & adjust cursor
    301      * [exz]|d   no match, copy d to transliterated buffer
    302      * [exzd]|   done
    303      */
    304     expect(UnicodeString("ab>x|y;", "") +
    305            "yc>z",
    306            "eabcd", "exzd");
    307 
    308     /* Another set of rules:
    309      *    1. ab>x|yzacw
    310      *    2. za>q
    311      *    3. qc>r
    312      *    4. cw>n
    313      *
    314      * []|ab       Rule 1
    315      * [x|yzacw]   No match
    316      * [xy|zacw]   Rule 2
    317      * [xyq|cw]    Rule 4
    318      * [xyqn]|     Done
    319      */
    320     expect(UnicodeString("ab>x|yzacw;") +
    321            "za>q;" +
    322            "qc>r;" +
    323            "cw>n",
    324            "ab", "xyqn");
    325 
    326     /* Test categories
    327      */
    328     UErrorCode status = U_ZERO_ERROR;
    329     UParseError parseError;
    330     Transliterator *t = Transliterator::createFromRules(
    331         "<ID>",
    332         UnicodeString("$dummy=").append((UChar)0xE100) +
    333         UnicodeString(";"
    334                       "$vowel=[aeiouAEIOU];"
    335                       "$lu=[:Lu:];"
    336                       "$vowel } $lu > '!';"
    337                       "$vowel > '&';"
    338                       "'!' { $lu > '^';"
    339                       "$lu > '*';"
    340                       "a > ERROR", ""),
    341         UTRANS_FORWARD, parseError,
    342         status);
    343     if (U_FAILURE(status)) {
    344         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
    345         return;
    346     }
    347     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
    348     delete t;
    349 }
    350 
    351 /**
    352  * Test inline set syntax and set variable syntax.
    353  */
    354 void TransliteratorTest::TestInlineSet(void) {
    355     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
    356     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
    357 
    358     expect(UnicodeString(
    359            "$digit = [0-9];"
    360            "$alpha = [a-zA-Z];"
    361            "$alphanumeric = [$digit $alpha];" // ***
    362            "$special = [^$alphanumeric];"     // ***
    363            "$alphanumeric > '-';"
    364            "$special > '*';", ""),
    365 
    366            "thx-1138", "---*----");
    367 }
    368 
    369 /**
    370  * Create some inverses and confirm that they work.  We have to be
    371  * careful how we do this, since the inverses will not be true
    372  * inverses -- we can't throw any random string at the composition
    373  * of the transliterators and expect the identity function.  F x
    374  * F' != I.  However, if we are careful about the input, we will
    375  * get the expected results.
    376  */
    377 void TransliteratorTest::TestRuleBasedInverse(void) {
    378     UnicodeString RULES =
    379         UnicodeString("abc>zyx;") +
    380         "ab>yz;" +
    381         "bc>zx;" +
    382         "ca>xy;" +
    383         "a>x;" +
    384         "b>y;" +
    385         "c>z;" +
    386 
    387         "abc<zyx;" +
    388         "ab<yz;" +
    389         "bc<zx;" +
    390         "ca<xy;" +
    391         "a<x;" +
    392         "b<y;" +
    393         "c<z;" +
    394 
    395         "";
    396 
    397     const char* DATA[] = {
    398         // Careful here -- random strings will not work.  If we keep
    399         // the left side to the domain and the right side to the range
    400         // we will be okay though (left, abc; right xyz).
    401         "a", "x",
    402         "abcacab", "zyxxxyy",
    403         "caccb", "xyzzy",
    404     };
    405 
    406     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
    407 
    408     UErrorCode status = U_ZERO_ERROR;
    409     UParseError parseError;
    410     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
    411                                 UTRANS_FORWARD, parseError, status);
    412     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
    413                                 UTRANS_REVERSE, parseError, status);
    414     if (U_FAILURE(status)) {
    415         errln("FAIL: RBT constructor failed");
    416         return;
    417     }
    418     for (int32_t i=0; i<DATA_length; i+=2) {
    419         expect(*fwd, DATA[i], DATA[i+1]);
    420         expect(*rev, DATA[i+1], DATA[i]);
    421     }
    422     delete fwd;
    423     delete rev;
    424 }
    425 
    426 /**
    427  * Basic test of keyboard.
    428  */
    429 void TransliteratorTest::TestKeyboard(void) {
    430     UParseError parseError;
    431     UErrorCode status = U_ZERO_ERROR;
    432     Transliterator *t = Transliterator::createFromRules("<ID>",
    433                               UnicodeString("psch>Y;")
    434                               +"ps>y;"
    435                               +"ch>x;"
    436                               +"a>A;",
    437                               UTRANS_FORWARD, parseError,
    438                               status);
    439     if (U_FAILURE(status)) {
    440         errln("FAIL: RBT constructor failed");
    441         return;
    442     }
    443     const char* DATA[] = {
    444         // insertion, buffer
    445         "a", "A",
    446         "p", "Ap",
    447         "s", "Aps",
    448         "c", "Apsc",
    449         "a", "AycA",
    450         "psch", "AycAY",
    451         0, "AycAY", // null means finishKeyboardTransliteration
    452     };
    453 
    454     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
    455     delete t;
    456 }
    457 
    458 /**
    459  * Basic test of keyboard with cursor.
    460  */
    461 void TransliteratorTest::TestKeyboard2(void) {
    462     UParseError parseError;
    463     UErrorCode status = U_ZERO_ERROR;
    464     Transliterator *t = Transliterator::createFromRules("<ID>",
    465                               UnicodeString("ych>Y;")
    466                               +"ps>|y;"
    467                               +"ch>x;"
    468                               +"a>A;",
    469                               UTRANS_FORWARD, parseError,
    470                               status);
    471     if (U_FAILURE(status)) {
    472         errln("FAIL: RBT constructor failed");
    473         return;
    474     }
    475     const char* DATA[] = {
    476         // insertion, buffer
    477         "a", "A",
    478         "p", "Ap",
    479         "s", "Aps", // modified for rollback - "Ay",
    480         "c", "Apsc", // modified for rollback - "Ayc",
    481         "a", "AycA",
    482         "p", "AycAp",
    483         "s", "AycAps", // modified for rollback - "AycAy",
    484         "c", "AycApsc", // modified for rollback - "AycAyc",
    485         "h", "AycAY",
    486         0, "AycAY", // null means finishKeyboardTransliteration
    487     };
    488 
    489     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
    490     delete t;
    491 }
    492 
    493 /**
    494  * Test keyboard transliteration with back-replacement.
    495  */
    496 void TransliteratorTest::TestKeyboard3(void) {
    497     // We want th>z but t>y.  Furthermore, during keyboard
    498     // transliteration we want t>y then yh>z if t, then h are
    499     // typed.
    500     UnicodeString RULES("t>|y;"
    501                         "yh>z;");
    502 
    503     const char* DATA[] = {
    504         // Column 1: characters to add to buffer (as if typed)
    505         // Column 2: expected appearance of buffer after
    506         //           keyboard xliteration.
    507         "a", "a",
    508         "b", "ab",
    509         "t", "abt", // modified for rollback - "aby",
    510         "c", "abyc",
    511         "t", "abyct", // modified for rollback - "abycy",
    512         "h", "abycz",
    513         0, "abycz", // null means finishKeyboardTransliteration
    514     };
    515 
    516     UParseError parseError;
    517     UErrorCode status = U_ZERO_ERROR;
    518     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
    519     if (U_FAILURE(status)) {
    520         errln("FAIL: RBT constructor failed");
    521         return;
    522     }
    523     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
    524     delete t;
    525 }
    526 
    527 void TransliteratorTest::keyboardAux(const Transliterator& t,
    528                                      const char* DATA[], int32_t DATA_length) {
    529     UErrorCode status = U_ZERO_ERROR;
    530     UTransPosition index={0, 0, 0, 0};
    531     UnicodeString s;
    532     for (int32_t i=0; i<DATA_length; i+=2) {
    533         UnicodeString log;
    534         if (DATA[i] != 0) {
    535             log = s + " + "
    536                 + DATA[i]
    537                 + " -> ";
    538             t.transliterate(s, index, DATA[i], status);
    539         } else {
    540             log = s + " => ";
    541             t.finishTransliteration(s, index);
    542         }
    543         // Show the start index '{' and the cursor '|'
    544         UnicodeString a, b, c;
    545         s.extractBetween(0, index.contextStart, a);
    546         s.extractBetween(index.contextStart, index.start, b);
    547         s.extractBetween(index.start, s.length(), c);
    548         log.append(a).
    549             append((UChar)LEFT_BRACE).
    550             append(b).
    551             append((UChar)PIPE).
    552             append(c);
    553         if (s == DATA[i+1] && U_SUCCESS(status)) {
    554             logln(log);
    555         } else {
    556             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
    557         }
    558     }
    559 }
    560 
    561 void TransliteratorTest::TestArabic(void) {
    562 // Test disabled for 2.0 until new Arabic transliterator can be written.
    563 //    /*
    564 //    const char* DATA[] = {
    565 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
    566 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
    567 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
    568 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
    569 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
    570 //                  "\u062c\u0645\u064a\u0644\u0629",
    571 //    };
    572 //    */
    573 //
    574 //    UChar ar_raw[] = {
    575 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
    576 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
    577 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
    578 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
    579 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
    580 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
    581 //    };
    582 //    UnicodeString ar(ar_raw);
    583 //    UErrorCode status=U_ZERO_ERROR;
    584 //    UParseError parseError;
    585 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
    586 //    if (t == 0) {
    587 //        errln("FAIL: createInstance failed");
    588 //        return;
    589 //    }
    590 //    expect(*t, "Arabic", ar);
    591 //    delete t;
    592 }
    593 
    594 /**
    595  * Compose the Kana transliterator forward and reverse and try
    596  * some strings that should come out unchanged.
    597  */
    598 void TransliteratorTest::TestCompoundKana(void) {
    599     UParseError parseError;
    600     UErrorCode status = U_ZERO_ERROR;
    601     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
    602     if (t == 0) {
    603         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
    604     } else {
    605         expect(*t, "aaaaa", "aaaaa");
    606         delete t;
    607     }
    608 }
    609 
    610 /**
    611  * Compose the hex transliterators forward and reverse.
    612  */
    613 void TransliteratorTest::TestCompoundHex(void) {
    614     UParseError parseError;
    615     UErrorCode status = U_ZERO_ERROR;
    616     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
    617     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
    618     Transliterator* transab[] = { a, b };
    619     Transliterator* transba[] = { b, a };
    620     if (a == 0 || b == 0) {
    621         errln("FAIL: construction failed");
    622         delete a;
    623         delete b;
    624         return;
    625     }
    626     // Do some basic tests of a
    627     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
    628     // Do some basic tests of b
    629     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
    630 
    631     Transliterator* ab = new CompoundTransliterator(transab, 2);
    632     UnicodeString s("abcde", "");
    633     expect(*ab, s, s);
    634 
    635     UnicodeString str(s);
    636     a->transliterate(str);
    637     Transliterator* ba = new CompoundTransliterator(transba, 2);
    638     expect(*ba, str, str);
    639 
    640     delete ab;
    641     delete ba;
    642     delete a;
    643     delete b;
    644 }
    645 
    646 int gTestFilterClassID = 0;
    647 /**
    648  * Used by TestFiltering().
    649  */
    650 class TestFilter : public UnicodeFilter {
    651     virtual UnicodeFunctor* clone() const {
    652         return new TestFilter(*this);
    653     }
    654     virtual UBool contains(UChar32 c) const {
    655         return c != (UChar)0x0063 /*c*/;
    656     }
    657     // Stubs
    658     virtual UnicodeString& toPattern(UnicodeString& result,
    659                                      UBool /*escapeUnprintable*/) const {
    660         return result;
    661     }
    662     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
    663         return FALSE;
    664     }
    665     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
    666 public:
    667     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
    668 };
    669 
    670 /**
    671  * Do some basic tests of filtering.
    672  */
    673 void TransliteratorTest::TestFiltering(void) {
    674     UParseError parseError;
    675     UErrorCode status = U_ZERO_ERROR;
    676     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
    677     if (hex == 0) {
    678         errln("FAIL: createInstance(Any-Hex) failed");
    679         return;
    680     }
    681     hex->adoptFilter(new TestFilter());
    682     UnicodeString s("abcde");
    683     hex->transliterate(s);
    684     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
    685     if (s == exp) {
    686         logln(UnicodeString("Ok:   \"") + exp + "\"");
    687     } else {
    688         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
    689     }
    690 
    691     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
    692     UnicodeFilter *f = hex->orphanFilter();
    693     if (f == NULL){
    694         errln("FAIL: orphanFilter() should get a UnicodeFilter");
    695     } else {
    696         delete f;
    697     }
    698     delete hex;
    699 }
    700 
    701 /**
    702  * Test anchors
    703  */
    704 void TransliteratorTest::TestAnchors(void) {
    705     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
    706            "aaa",
    707            "012");
    708     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
    709            "aaa",
    710            "012");
    711     expect(UnicodeString("^ab  > 01 ;"
    712            " ab  > |8 ;"
    713            "  b  > k ;"
    714            " 8x$ > 45 ;"
    715            " 8x  > 77 ;", ""),
    716 
    717            "ababbabxabx",
    718            "018k7745");
    719     expect(UnicodeString("$s = [z$] ;"
    720            "$s{ab    > 01 ;"
    721            "   ab    > |8 ;"
    722            "    b    > k ;"
    723            "   8x}$s > 45 ;"
    724            "   8x    > 77 ;", ""),
    725 
    726            "abzababbabxzabxabx",
    727            "01z018k45z01x45");
    728 }
    729 
    730 /**
    731  * Test pattern quoting and escape mechanisms.
    732  */
    733 void TransliteratorTest::TestPatternQuoting(void) {
    734     // Array of 3n items
    735     // Each item is <rules>, <input>, <expected output>
    736     const UnicodeString DATA[] = {
    737         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
    738         UnicodeString(UChar(0x4E01)),
    739         "[male adult]"
    740     };
    741 
    742     for (int32_t i=0; i<3; i+=3) {
    743         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
    744         UParseError parseError;
    745         UErrorCode status = U_ZERO_ERROR;
    746         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
    747         if (U_FAILURE(status)) {
    748             errln("RBT constructor failed");
    749         } else {
    750             expect(*t, DATA[i+1], DATA[i+2]);
    751         }
    752         delete t;
    753     }
    754 }
    755 
    756 /**
    757  * Regression test for bugs found in Greek transliteration.
    758  */
    759 void TransliteratorTest::TestJ277(void) {
    760     UErrorCode status = U_ZERO_ERROR;
    761     UParseError parseError;
    762     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
    763     if (gl == NULL) {
    764         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
    765         return;
    766     }
    767 
    768     UChar sigma = 0x3C3;
    769     UChar upsilon = 0x3C5;
    770     UChar nu = 0x3BD;
    771 //    UChar PHI = 0x3A6;
    772     UChar alpha = 0x3B1;
    773 //    UChar omega = 0x3C9;
    774 //    UChar omicron = 0x3BF;
    775 //    UChar epsilon = 0x3B5;
    776 
    777     // sigma upsilon nu -> syn
    778     UnicodeString syn;
    779     syn.append(sigma).append(upsilon).append(nu);
    780     expect(*gl, syn, "syn");
    781 
    782     // sigma alpha upsilon nu -> saun
    783     UnicodeString sayn;
    784     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
    785     expect(*gl, sayn, "saun");
    786 
    787     // Again, using a smaller rule set
    788     UnicodeString rules(
    789                 "$alpha   = \\u03B1;"
    790                 "$nu      = \\u03BD;"
    791                 "$sigma   = \\u03C3;"
    792                 "$ypsilon = \\u03C5;"
    793                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
    794                 "s <>           $sigma;"
    795                 "a <>           $alpha;"
    796                 "u <>  $vowel { $ypsilon;"
    797                 "y <>           $ypsilon;"
    798                 "n <>           $nu;",
    799                 "");
    800     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
    801     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
    802     expect(*mini, syn, "syn");
    803     expect(*mini, sayn, "saun");
    804     delete mini;
    805     mini = NULL;
    806 
    807 #if !UCONFIG_NO_FORMATTING
    808     // Transliterate the Greek locale data
    809     Locale el("el");
    810     DateFormatSymbols syms(el, status);
    811     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
    812     int32_t i, count;
    813     const UnicodeString* data = syms.getMonths(count);
    814     for (i=0; i<count; ++i) {
    815         if (data[i].length() == 0) {
    816             continue;
    817         }
    818         UnicodeString out(data[i]);
    819         gl->transliterate(out);
    820         UBool ok = TRUE;
    821         if (data[i].length() >= 2 && out.length() >= 2 &&
    822             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
    823             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
    824                 ok = FALSE;
    825             }
    826         }
    827         if (ok) {
    828             logln(prettify(data[i] + " -> " + out));
    829         } else {
    830             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
    831         }
    832     }
    833 #endif
    834 
    835     delete gl;
    836 }
    837 
    838 /**
    839  * Prefix, suffix support in hex transliterators
    840  */
    841 void TransliteratorTest::TestJ243(void) {
    842     UErrorCode ec = U_ZERO_ERROR;
    843 
    844     // Test default Hex-Any, which should handle
    845     // \u, \U, u+, and U+
    846     Transliterator *hex =
    847         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
    848     if (assertSuccess("getInstance", ec)) {
    849         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
    850     }
    851     delete hex;
    852 
    853 //    // Try a custom Hex-Unicode
    854 //    // \uXXXX and &#xXXXX;
    855 //    ec = U_ZERO_ERROR;
    856 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
    857 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
    858 //           "abcd5fx012&#x00033;");
    859 //    // Try custom Any-Hex (default is tested elsewhere)
    860 //    ec = U_ZERO_ERROR;
    861 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
    862 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
    863 }
    864 
    865 /**
    866  * Parsers need better syntax error messages.
    867  */
    868 void TransliteratorTest::TestJ329(void) {
    869 
    870     struct { UBool containsErrors; const char* rule; } DATA[] = {
    871         { FALSE, "a > b; c > d" },
    872         { TRUE,  "a > b; no operator; c > d" },
    873     };
    874     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
    875 
    876     for (int32_t i=0; i<DATA_length; ++i) {
    877         UErrorCode status = U_ZERO_ERROR;
    878         UParseError parseError;
    879         Transliterator *rbt = Transliterator::createFromRules("<ID>",
    880                                     DATA[i].rule,
    881                                     UTRANS_FORWARD,
    882                                     parseError,
    883                                     status);
    884         UBool gotError = U_FAILURE(status);
    885         UnicodeString desc(DATA[i].rule);
    886         desc.append(gotError ? " -> error" : " -> no error");
    887         if (gotError) {
    888             desc = desc + ", ParseError code=" + u_errorName(status) +
    889                 " line=" + parseError.line +
    890                 " offset=" + parseError.offset +
    891                 " context=" + parseError.preContext;
    892         }
    893         if (gotError == DATA[i].containsErrors) {
    894             logln(UnicodeString("Ok:   ") + desc);
    895         } else {
    896             errln(UnicodeString("FAIL: ") + desc);
    897         }
    898         delete rbt;
    899     }
    900 }
    901 
    902 /**
    903  * Test segments and segment references.
    904  */
    905 void TransliteratorTest::TestSegments(void) {
    906     // Array of 3n items
    907     // Each item is <rules>, <input>, <expected output>
    908     UnicodeString DATA[] = {
    909         "([a-z]) '.' ([0-9]) > $2 '-' $1",
    910         "abc.123.xyz.456",
    911         "ab1-c23.xy4-z56",
    912 
    913         // nested
    914         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
    915         "a1 b2",
    916         "a1.a.1 b2.b.2",
    917     };
    918     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
    919 
    920     for (int32_t i=0; i<DATA_length; i+=3) {
    921         logln("Pattern: " + prettify(DATA[i]));
    922         UParseError parseError;
    923         UErrorCode status = U_ZERO_ERROR;
    924         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
    925         if (U_FAILURE(status)) {
    926             errln("FAIL: RBT constructor");
    927         } else {
    928             expect(*t, DATA[i+1], DATA[i+2]);
    929         }
    930         delete t;
    931     }
    932 }
    933 
    934 /**
    935  * Test cursor positioning outside of the key
    936  */
    937 void TransliteratorTest::TestCursorOffset(void) {
    938     // Array of 3n items
    939     // Each item is <rules>, <input>, <expected output>
    940     UnicodeString DATA[] = {
    941         "pre {alpha} post > | @ ALPHA ;"
    942         "eALPHA > beta ;"
    943         "pre {beta} post > BETA @@ | ;"
    944         "post > xyz",
    945 
    946         "prealphapost prebetapost",
    947 
    948         "prbetaxyz preBETApost",
    949     };
    950     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
    951 
    952     for (int32_t i=0; i<DATA_length; i+=3) {
    953         logln("Pattern: " + prettify(DATA[i]));
    954         UParseError parseError;
    955         UErrorCode status = U_ZERO_ERROR;
    956         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
    957         if (U_FAILURE(status)) {
    958             errln("FAIL: RBT constructor");
    959         } else {
    960             expect(*t, DATA[i+1], DATA[i+2]);
    961         }
    962         delete t;
    963     }
    964 }
    965 
    966 /**
    967  * Test zero length and > 1 char length variable values.  Test
    968  * use of variable refs in UnicodeSets.
    969  */
    970 void TransliteratorTest::TestArbitraryVariableValues(void) {
    971     // Array of 3n items
    972     // Each item is <rules>, <input>, <expected output>
    973     UnicodeString DATA[] = {
    974         "$abe = ab;"
    975         "$pat = x[yY]z;"
    976         "$ll  = 'a-z';"
    977         "$llZ = [$ll];"
    978         "$llY = [$ll$pat];"
    979         "$emp = ;"
    980 
    981         "$abe > ABE;"
    982         "$pat > END;"
    983         "$llZ > 1;"
    984         "$llY > 2;"
    985         "7$emp 8 > 9;"
    986         "",
    987 
    988         "ab xYzxyz stY78",
    989         "ABE ENDEND 1129",
    990     };
    991     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
    992 
    993     for (int32_t i=0; i<DATA_length; i+=3) {
    994         logln("Pattern: " + prettify(DATA[i]));
    995         UParseError parseError;
    996         UErrorCode status = U_ZERO_ERROR;
    997         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
    998         if (U_FAILURE(status)) {
    999             errln("FAIL: RBT constructor");
   1000         } else {
   1001             expect(*t, DATA[i+1], DATA[i+2]);
   1002         }
   1003         delete t;
   1004     }
   1005 }
   1006 
   1007 /**
   1008  * Confirm that the contextStart, contextLimit, start, and limit
   1009  * behave correctly. J474.
   1010  */
   1011 void TransliteratorTest::TestPositionHandling(void) {
   1012     // Array of 3n items
   1013     // Each item is <rules>, <input>, <expected output>
   1014     const char* DATA[] = {
   1015         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
   1016         "xtat txtb", // pos 0,9,0,9
   1017         "xTTaSS TTxUUb",
   1018 
   1019         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
   1020         "xtat txtb", // pos 2,9,3,8
   1021         "xtaSS TTxUUb",
   1022 
   1023         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
   1024         "xtat txtb", // pos 3,8,3,8
   1025         "xtaTT TTxTTb",
   1026     };
   1027 
   1028     // Array of 4n positions -- these go with the DATA array
   1029     // They are: contextStart, contextLimit, start, limit
   1030     int32_t POS[] = {
   1031         0, 9, 0, 9,
   1032         2, 9, 3, 8,
   1033         3, 8, 3, 8,
   1034     };
   1035 
   1036     int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
   1037     for (int32_t i=0; i<n; i++) {
   1038         UErrorCode status = U_ZERO_ERROR;
   1039         UParseError parseError;
   1040         Transliterator *t = Transliterator::createFromRules("<ID>",
   1041                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
   1042         if (U_FAILURE(status)) {
   1043             delete t;
   1044             errln("FAIL: RBT constructor");
   1045             return;
   1046         }
   1047         UTransPosition pos;
   1048         pos.contextStart= POS[4*i];
   1049         pos.contextLimit = POS[4*i+1];
   1050         pos.start = POS[4*i+2];
   1051         pos.limit = POS[4*i+3];
   1052         UnicodeString rsource(DATA[3*i+1]);
   1053         t->transliterate(rsource, pos, status);
   1054         if (U_FAILURE(status)) {
   1055             delete t;
   1056             errln("FAIL: transliterate");
   1057             return;
   1058         }
   1059         t->finishTransliteration(rsource, pos);
   1060         expectAux(DATA[3*i],
   1061                   DATA[3*i+1],
   1062                   rsource,
   1063                   DATA[3*i+2]);
   1064         delete t;
   1065     }
   1066 }
   1067 
   1068 /**
   1069  * Test the Hiragana-Katakana transliterator.
   1070  */
   1071 void TransliteratorTest::TestHiraganaKatakana(void) {
   1072     UParseError parseError;
   1073     UErrorCode status = U_ZERO_ERROR;
   1074     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
   1075     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
   1076     if (hk == 0 || kh == 0) {
   1077         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   1078         delete hk;
   1079         delete kh;
   1080         return;
   1081     }
   1082 
   1083     // Array of 3n items
   1084     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
   1085     const char* DATA[] = {
   1086         "both",
   1087         "\\u3042\\u3090\\u3099\\u3092\\u3050",
   1088         "\\u30A2\\u30F8\\u30F2\\u30B0",
   1089 
   1090         "kh",
   1091         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
   1092         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
   1093     };
   1094     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
   1095 
   1096     for (int32_t i=0; i<DATA_length; i+=3) {
   1097         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
   1098         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
   1099         switch (*DATA[i]) {
   1100         case 0x68: //'h': // Hiragana-Katakana
   1101             expect(*hk, h, k);
   1102             break;
   1103         case 0x6B: //'k': // Katakana-Hiragana
   1104             expect(*kh, k, h);
   1105             break;
   1106         case 0x62: //'b': // both
   1107             expect(*hk, h, k);
   1108             expect(*kh, k, h);
   1109             break;
   1110         }
   1111     }
   1112     delete hk;
   1113     delete kh;
   1114 }
   1115 
   1116 /**
   1117  * Test cloning / copy constructor of RBT.
   1118  */
   1119 void TransliteratorTest::TestCopyJ476(void) {
   1120     // The real test here is what happens when the destructors are
   1121     // called.  So we let one object get destructed, and check to
   1122     // see that its copy still works.
   1123     Transliterator *t2 = 0;
   1124     {
   1125         UParseError parseError;
   1126         UErrorCode status = U_ZERO_ERROR;
   1127         Transliterator *t1 = Transliterator::createFromRules("t1",
   1128             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
   1129         if (U_FAILURE(status)) {
   1130             errln("FAIL: RBT constructor");
   1131             return;
   1132         }
   1133         t2 = t1->clone(); // Call copy constructor under the covers.
   1134         expect(*t1, "abcfoofoo", "ABcbar");
   1135         delete t1;
   1136     }
   1137     expect(*t2, "abcfoofoo", "ABcbar");
   1138     delete t2;
   1139 }
   1140 
   1141 /**
   1142  * Test inter-Indic transliterators.  These are composed.
   1143  * ICU4C Jitterbug 483.
   1144  */
   1145 void TransliteratorTest::TestInterIndic(void) {
   1146     UnicodeString ID("Devanagari-Gujarati", "");
   1147     UErrorCode status = U_ZERO_ERROR;
   1148     UParseError parseError;
   1149     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
   1150     if (dg == 0) {
   1151         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
   1152         return;
   1153     }
   1154     UnicodeString id = dg->getID();
   1155     if (id != ID) {
   1156         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
   1157     }
   1158     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
   1159     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
   1160     expect(*dg, dev, guj);
   1161     delete dg;
   1162 }
   1163 
   1164 /**
   1165  * Test filter syntax in IDs. (J918)
   1166  */
   1167 void TransliteratorTest::TestFilterIDs(void) {
   1168     // Array of 3n strings:
   1169     // <id>, <inverse id>, <input>, <expected output>
   1170     const char* DATA[] = {
   1171         "[aeiou]Any-Hex", // ID
   1172         "[aeiou]Hex-Any", // expected inverse ID
   1173         "quizzical",      // src
   1174         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
   1175 
   1176         "[aeiou]Any-Hex;[^5]Hex-Any",
   1177         "[^5]Any-Hex;[aeiou]Hex-Any",
   1178         "quizzical",
   1179         "q\\u0075izzical",
   1180 
   1181         "[abc]Null",
   1182         "[abc]Null",
   1183         "xyz",
   1184         "xyz",
   1185     };
   1186     enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
   1187 
   1188     for (int i=0; i<DATA_length; i+=4) {
   1189         UnicodeString ID(DATA[i], "");
   1190         UnicodeString uID(DATA[i+1], "");
   1191         UnicodeString data2(DATA[i+2], "");
   1192         UnicodeString data3(DATA[i+3], "");
   1193         UParseError parseError;
   1194         UErrorCode status = U_ZERO_ERROR;
   1195         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
   1196         if (t == 0) {
   1197             errln("FAIL: createInstance(" + ID + ") returned NULL");
   1198             return;
   1199         }
   1200         expect(*t, data2, data3);
   1201 
   1202         // Check the ID
   1203         if (ID != t->getID()) {
   1204             errln("FAIL: createInstance(" + ID + ").getID() => " +
   1205                   t->getID());
   1206         }
   1207 
   1208         // Check the inverse
   1209         Transliterator *u = t->createInverse(status);
   1210         if (u == 0) {
   1211             errln("FAIL: " + ID + ".createInverse() returned NULL");
   1212         } else if (u->getID() != uID) {
   1213             errln("FAIL: " + ID + ".createInverse().getID() => " +
   1214                   u->getID() + ", expected " + uID);
   1215         }
   1216 
   1217         delete t;
   1218         delete u;
   1219     }
   1220 }
   1221 
   1222 /**
   1223  * Test the case mapping transliterators.
   1224  */
   1225 void TransliteratorTest::TestCaseMap(void) {
   1226     UParseError parseError;
   1227     UErrorCode status = U_ZERO_ERROR;
   1228     Transliterator* toUpper =
   1229         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
   1230     Transliterator* toLower =
   1231         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
   1232     Transliterator* toTitle =
   1233         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
   1234     if (toUpper==0 || toLower==0 || toTitle==0) {
   1235         errln("FAIL: createInstance returned NULL");
   1236         delete toUpper;
   1237         delete toLower;
   1238         delete toTitle;
   1239         return;
   1240     }
   1241 
   1242     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
   1243            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
   1244     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
   1245            "the quick brown foX jumped over the lazY dogs.");
   1246     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
   1247            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
   1248 
   1249     delete toUpper;
   1250     delete toLower;
   1251     delete toTitle;
   1252 }
   1253 
   1254 /**
   1255  * Test the name mapping transliterators.
   1256  */
   1257 void TransliteratorTest::TestNameMap(void) {
   1258     UParseError parseError;
   1259     UErrorCode status = U_ZERO_ERROR;
   1260     Transliterator* uni2name =
   1261         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
   1262     Transliterator* name2uni =
   1263         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
   1264     if (uni2name==0 || name2uni==0) {
   1265         errln("FAIL: createInstance returned NULL");
   1266         delete uni2name;
   1267         delete name2uni;
   1268         return;
   1269     }
   1270 
   1271     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
   1272     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
   1273            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
   1274     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
   1275            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
   1276 
   1277     delete uni2name;
   1278     delete name2uni;
   1279 
   1280     // round trip
   1281     Transliterator* t =
   1282         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
   1283     if (t==0) {
   1284         errln("FAIL: createInstance returned NULL");
   1285         delete t;
   1286         return;
   1287     }
   1288 
   1289     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
   1290     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
   1291     expect(*t, s, s);
   1292     delete t;
   1293 }
   1294 
   1295 /**
   1296  * Test liberalized ID syntax.  1006c
   1297  */
   1298 void TransliteratorTest::TestLiberalizedID(void) {
   1299     // Some test cases have an expected getID() value of NULL.  This
   1300     // means I have disabled the test case for now.  This stuff is
   1301     // still under development, and I haven't decided whether to make
   1302     // getID() return canonical case yet.  It will all get rewritten
   1303     // with the move to Source-Target/Variant IDs anyway. [aliu]
   1304     const char* DATA[] = {
   1305         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
   1306         "  Null  ", "Null", "whitespace",
   1307         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
   1308         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
   1309     };
   1310     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
   1311     UParseError parseError;
   1312     UErrorCode status= U_ZERO_ERROR;
   1313     for (int32_t i=0; i<DATA_length; i+=3) {
   1314         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
   1315         if (t == 0) {
   1316             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
   1317                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
   1318         } else {
   1319             UnicodeString exp;
   1320             if (DATA[i+1]) {
   1321                 exp = UnicodeString(DATA[i+1], "");
   1322             }
   1323             // Don't worry about getID() if the expected char*
   1324             // is NULL -- see above.
   1325             if (exp.length() == 0 || exp == t->getID()) {
   1326                 logln(UnicodeString("Ok: ") + DATA[i+2] +
   1327                       " create ID \"" + DATA[i] + "\" => \"" +
   1328                       exp + "\"");
   1329             } else {
   1330                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
   1331                       " create ID \"" + DATA[i] + "\" => \"" +
   1332                       t->getID() + "\", exp \"" + exp + "\"");
   1333             }
   1334             delete t;
   1335         }
   1336     }
   1337 }
   1338 
   1339 /* test for Jitterbug 912 */
   1340 void TransliteratorTest::TestCreateInstance(){
   1341     const char* FORWARD = "F";
   1342     const char* REVERSE = "R";
   1343     const char* DATA[] = {
   1344         // Column 1: id
   1345         // Column 2: direction
   1346         // Column 3: expected ID, or "" if expect failure
   1347         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
   1348 
   1349         // JB#2689: bad compound causes crash
   1350         "InvalidSource-InvalidTarget", FORWARD, "",
   1351         "InvalidSource-InvalidTarget", REVERSE, "",
   1352         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
   1353         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
   1354         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
   1355         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
   1356 
   1357         NULL
   1358     };
   1359 
   1360     for (int32_t i=0; DATA[i]; i+=3) {
   1361         UParseError err;
   1362         UErrorCode ec = U_ZERO_ERROR;
   1363         UnicodeString id(DATA[i]);
   1364         UTransDirection dir = (DATA[i+1]==FORWARD)?
   1365             UTRANS_FORWARD:UTRANS_REVERSE;
   1366         UnicodeString expID(DATA[i+2]);
   1367         Transliterator* t =
   1368             Transliterator::createInstance(id,dir,err,ec);
   1369         UnicodeString newID;
   1370         if (t) {
   1371             newID = t->getID();
   1372         }
   1373         UBool ok = (newID == expID);
   1374         if (!t) {
   1375             newID = u_errorName(ec);
   1376         }
   1377         if (ok) {
   1378             logln((UnicodeString)"Ok: createInstance(" +
   1379                   id + "," + DATA[i+1] + ") => " + newID);
   1380         } else {
   1381             dataerrln((UnicodeString)"FAIL: createInstance(" +
   1382                   id + "," + DATA[i+1] + ") => " + newID +
   1383                   ", expected " + expID);
   1384         }
   1385         delete t;
   1386     }
   1387 }
   1388 
   1389 /**
   1390  * Test the normalization transliterator.
   1391  */
   1392 void TransliteratorTest::TestNormalizationTransliterator() {
   1393     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
   1394     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
   1395     const char* CANON[] = {
   1396         // Input               Decomposed            Composed
   1397         "cat",                "cat",                "cat"               ,
   1398         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
   1399 
   1400         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
   1401         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
   1402 
   1403         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
   1404         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
   1405         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
   1406 
   1407         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
   1408         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
   1409 
   1410         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
   1411         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
   1412         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
   1413 
   1414         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
   1415         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
   1416 
   1417         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
   1418         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
   1419 
   1420         "Henry IV",           "Henry IV",           "Henry IV"          ,
   1421         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
   1422 
   1423         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
   1424         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
   1425         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
   1426         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
   1427         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
   1428 
   1429         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
   1430         0 // end
   1431     };
   1432 
   1433     const char* COMPAT[] = {
   1434         // Input               Decomposed            Composed
   1435         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
   1436 
   1437         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
   1438         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
   1439 
   1440         "Henry IV",           "Henry IV",           "Henry IV"          ,
   1441         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
   1442 
   1443         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
   1444         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
   1445 
   1446         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
   1447         0 // end
   1448     };
   1449 
   1450     int32_t i;
   1451     UParseError parseError;
   1452     UErrorCode status = U_ZERO_ERROR;
   1453     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
   1454     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
   1455     if (!NFD || !NFC) {
   1456         errln("FAIL: createInstance failed");
   1457         delete NFD;
   1458         delete NFC;
   1459         return;
   1460     }
   1461     for (i=0; CANON[i]; i+=3) {
   1462         UnicodeString in = CharsToUnicodeString(CANON[i]);
   1463         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
   1464         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
   1465         expect(*NFD, in, expd);
   1466         expect(*NFC, in, expc);
   1467     }
   1468     delete NFD;
   1469     delete NFC;
   1470 
   1471     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
   1472     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
   1473     if (!NFKD || !NFKC) {
   1474         errln("FAIL: createInstance failed");
   1475         delete NFKD;
   1476         delete NFKC;
   1477         return;
   1478     }
   1479     for (i=0; COMPAT[i]; i+=3) {
   1480         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
   1481         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
   1482         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
   1483         expect(*NFKD, in, expkd);
   1484         expect(*NFKC, in, expkc);
   1485     }
   1486     delete NFKD;
   1487     delete NFKC;
   1488 
   1489     UParseError pe;
   1490     status = U_ZERO_ERROR;
   1491     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
   1492                                                        UTRANS_FORWARD,
   1493                                                        pe, status);
   1494     if (t == 0) {
   1495         errln("FAIL: createInstance failed");
   1496     }
   1497     expect(*t, CharsToUnicodeString("\\u010dx"),
   1498            CharsToUnicodeString("c\\u030C"));
   1499     delete t;
   1500 }
   1501 
   1502 /**
   1503  * Test compound RBT rules.
   1504  */
   1505 void TransliteratorTest::TestCompoundRBT(void) {
   1506     // Careful with spacing and ';' here:  Phrase this exactly
   1507     // as toRules() is going to return it.  If toRules() changes
   1508     // with regard to spacing or ';', then adjust this string.
   1509     UnicodeString rule("::Hex-Any;\n"
   1510                        "::Any-Lower;\n"
   1511                        "a > '.A.';\n"
   1512                        "b > '.B.';\n"
   1513                        "::[^t]Any-Upper;", "");
   1514     UParseError parseError;
   1515     UErrorCode status = U_ZERO_ERROR;
   1516     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
   1517     if (t == 0) {
   1518         errln("FAIL: createFromRules failed");
   1519         return;
   1520     }
   1521     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
   1522            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
   1523     UnicodeString r;
   1524     t->toRules(r, TRUE);
   1525     if (r == rule) {
   1526         logln((UnicodeString)"OK: toRules() => " + r);
   1527     } else {
   1528         errln((UnicodeString)"FAIL: toRules() => " + r +
   1529               ", expected " + rule);
   1530     }
   1531     delete t;
   1532 
   1533     // Now test toRules
   1534     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
   1535     if (t == 0) {
   1536         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   1537         return;
   1538     }
   1539     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
   1540     t->toRules(r, TRUE);
   1541     if (r != exp) {
   1542         errln((UnicodeString)"FAIL: toRules() => " + r +
   1543               ", expected " + exp);
   1544     } else {
   1545         logln((UnicodeString)"OK: toRules() => " + r);
   1546     }
   1547     delete t;
   1548 
   1549     // Round trip the result of toRules
   1550     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
   1551     if (t == 0) {
   1552         errln("FAIL: createFromRules #2 failed");
   1553         return;
   1554     } else {
   1555         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
   1556     }
   1557 
   1558     // Test toRules again
   1559     t->toRules(r, TRUE);
   1560     if (r != exp) {
   1561         errln((UnicodeString)"FAIL: toRules() => " + r +
   1562               ", expected " + exp);
   1563     } else {
   1564         logln((UnicodeString)"OK: toRules() => " + r);
   1565     }
   1566 
   1567     delete t;
   1568 
   1569     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
   1570     // to what the regenerated ID will look like.
   1571     UnicodeString id("Upper(Lower);(NFKC)", "");
   1572     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
   1573     if (t == 0) {
   1574         errln("FAIL: createInstance #2 failed");
   1575         return;
   1576     }
   1577     if (t->getID() == id) {
   1578         logln((UnicodeString)"OK: created " + id);
   1579     } else {
   1580         errln((UnicodeString)"FAIL: createInstance(" + id +
   1581               ").getID() => " + t->getID());
   1582     }
   1583 
   1584     Transliterator *u = t->createInverse(status);
   1585     if (u == 0) {
   1586         errln("FAIL: createInverse failed");
   1587         delete t;
   1588         return;
   1589     }
   1590     exp = "NFKC();Lower(Upper)";
   1591     if (u->getID() == exp) {
   1592         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
   1593               u->getID());
   1594     } else {
   1595         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
   1596               u->getID());
   1597     }
   1598     delete t;
   1599     delete u;
   1600 }
   1601 
   1602 /**
   1603  * Compound filter semantics were orginially not implemented
   1604  * correctly.  Originally, each component filter f(i) is replaced by
   1605  * f'(i) = f(i) && g, where g is the filter for the compound
   1606  * transliterator.
   1607  *
   1608  * From Mark:
   1609  *
   1610  * Suppose and I have a transliterator X. Internally X is
   1611  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
   1612  *
   1613  * The compound should convert all greek characters (through latin) to
   1614  * cyrillic, then lowercase the result. The filter should say "don't
   1615  * touch 'A' in the original". But because an intermediate result
   1616  * happens to go through "A", the Greek Alpha gets hung up.
   1617  */
   1618 void TransliteratorTest::TestCompoundFilter(void) {
   1619     UParseError parseError;
   1620     UErrorCode status = U_ZERO_ERROR;
   1621     Transliterator *t = Transliterator::createInstance
   1622         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
   1623     if (t == 0) {
   1624         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   1625         return;
   1626     }
   1627     t->adoptFilter(new UnicodeSet("[^A]", status));
   1628     if (U_FAILURE(status)) {
   1629         errln("FAIL: UnicodeSet ct failed");
   1630         delete t;
   1631         return;
   1632     }
   1633 
   1634     // Only the 'A' at index 1 should remain unchanged
   1635     expect(*t,
   1636            CharsToUnicodeString("BA\\u039A\\u0391"),
   1637            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
   1638     delete t;
   1639 }
   1640 
   1641 void TransliteratorTest::TestRemove(void) {
   1642     UParseError parseError;
   1643     UErrorCode status = U_ZERO_ERROR;
   1644     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
   1645     if (t == 0) {
   1646         errln("FAIL: createInstance failed");
   1647         return;
   1648     }
   1649 
   1650     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
   1651 
   1652     // extra test for RemoveTransliterator::clone(), which at one point wasn't
   1653     // duplicating the filter
   1654     Transliterator* t2 = t->clone();
   1655     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
   1656 
   1657     delete t;
   1658     delete t2;
   1659 }
   1660 
   1661 void TransliteratorTest::TestToRules(void) {
   1662     const char* RBT = "rbt";
   1663     const char* SET = "set";
   1664     static const char* DATA[] = {
   1665         RBT,
   1666         "$a=\\u4E61; [$a] > A;",
   1667         "[\\u4E61] > A;",
   1668 
   1669         RBT,
   1670         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
   1671         "[[:Zs:][:Zl:]]{a} > A;",
   1672 
   1673         SET,
   1674         "[[:Zs:][:Zl:]]",
   1675         "[[:Zs:][:Zl:]]",
   1676 
   1677         SET,
   1678         "[:Ps:]",
   1679         "[:Ps:]",
   1680 
   1681         SET,
   1682         "[:L:]",
   1683         "[:L:]",
   1684 
   1685         SET,
   1686         "[[:L:]-[A]]",
   1687         "[[:L:]-[A]]",
   1688 
   1689         SET,
   1690         "[~[:Lu:][:Ll:]]",
   1691         "[~[:Lu:][:Ll:]]",
   1692 
   1693         SET,
   1694         "[~[a-z]]",
   1695         "[~[a-z]]",
   1696 
   1697         RBT,
   1698         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
   1699         "[^[:Zs:]]{a} > A;",
   1700 
   1701         RBT,
   1702         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
   1703         "[[a-z]-[:Zs:]]{a} > A;",
   1704 
   1705         RBT,
   1706         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
   1707         "[[:Zs:]&[a-z]]{a} > A;",
   1708 
   1709         RBT,
   1710         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
   1711         "[x[:Zs:]]{a} > A;",
   1712 
   1713         RBT,
   1714         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
   1715         "$macron = \\u0304 ;"
   1716         "$evowel = [aeiouyAEIOUY] ;"
   1717         "$iotasub = \\u0345 ;"
   1718         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
   1719         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
   1720 
   1721         RBT,
   1722         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
   1723         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
   1724     };
   1725     static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
   1726 
   1727     for (int32_t d=0; d < DATA_length; d+=3) {
   1728         if (DATA[d] == RBT) {
   1729             // Transliterator test
   1730             UParseError parseError;
   1731             UErrorCode status = U_ZERO_ERROR;
   1732             Transliterator *t = Transliterator::createFromRules("ID",
   1733                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
   1734             if (t == 0) {
   1735                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
   1736                 return;
   1737             }
   1738             UnicodeString rules, escapedRules;
   1739             t->toRules(rules, FALSE);
   1740             t->toRules(escapedRules, TRUE);
   1741             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
   1742             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
   1743             if (rules == expRules) {
   1744                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1745                       " => " + rules);
   1746             } else {
   1747                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1748                       " => " + rules + ", exp " + expRules);
   1749             }
   1750             if (escapedRules == expEscapedRules) {
   1751                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1752                       " => " + escapedRules);
   1753             } else {
   1754                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1755                       " => " + escapedRules + ", exp " + expEscapedRules);
   1756             }
   1757             delete t;
   1758 
   1759         } else {
   1760             // UnicodeSet test
   1761             UErrorCode status = U_ZERO_ERROR;
   1762             UnicodeString pat(DATA[d+1], -1, US_INV);
   1763             UnicodeString expToPat(DATA[d+2], -1, US_INV);
   1764             UnicodeSet set(pat, status);
   1765             if (U_FAILURE(status)) {
   1766                 errln("FAIL: UnicodeSet ct failed");
   1767                 return;
   1768             }
   1769             // Adjust spacing etc. as necessary.
   1770             UnicodeString toPat;
   1771             set.toPattern(toPat);
   1772             if (expToPat == toPat) {
   1773                 logln((UnicodeString)"Ok: " + pat +
   1774                       " => " + toPat);
   1775             } else {
   1776                 errln((UnicodeString)"FAIL: " + pat +
   1777                       " => " + prettify(toPat, TRUE) +
   1778                       ", exp " + prettify(pat, TRUE));
   1779             }
   1780         }
   1781     }
   1782 }
   1783 
   1784 void TransliteratorTest::TestContext() {
   1785     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
   1786     expect("de > x; {d}e > y;",
   1787            "de",
   1788            "ye",
   1789            &pos);
   1790 
   1791     expect("ab{c} > z;",
   1792            "xadabdabcy",
   1793            "xadabdabzy");
   1794 }
   1795 
   1796 void TransliteratorTest::TestSupplemental() {
   1797 
   1798     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
   1799                                 "a > $a; $s > i;"),
   1800            CharsToUnicodeString("ab\\U0001030Fx"),
   1801            CharsToUnicodeString("\\U00010300bix"));
   1802 
   1803     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
   1804                                 "$b=[A-Z\\U00010400-\\U0001044D];"
   1805                                 "($a)($b) > $2 $1;"),
   1806            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
   1807            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
   1808 
   1809     // k|ax\\U00010300xm
   1810 
   1811     // k|a\\U00010400\\U00010300xm
   1812     // ky|\\U00010400\\U00010300xm
   1813     // ky\\U00010400|\\U00010300xm
   1814 
   1815     // ky\\U00010400|\\U00010300\\U00010400m
   1816     // ky\\U00010400y|\\U00010400m
   1817     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
   1818                                 "$a {x} > | @ \\U00010400;"
   1819                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
   1820            CharsToUnicodeString("kax\\U00010300xm"),
   1821            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
   1822 
   1823     expectT("Any-Name",
   1824            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
   1825            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
   1826 
   1827     expectT("Any-Hex/Unicode",
   1828            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1829            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
   1830 
   1831     expectT("Any-Hex/C",
   1832            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1833            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
   1834 
   1835     expectT("Any-Hex/Perl",
   1836            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1837            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
   1838 
   1839     expectT("Any-Hex/Java",
   1840            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1841            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
   1842 
   1843     expectT("Any-Hex/XML",
   1844            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1845            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
   1846 
   1847     expectT("Any-Hex/XML10",
   1848            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1849            "&#66352;&#1113856;&#917601;&#160;");
   1850 
   1851     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
   1852            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1853            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
   1854 }
   1855 
   1856 void TransliteratorTest::TestQuantifier() {
   1857 
   1858     // Make sure @ in a quantified anteContext works
   1859     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
   1860            "AAAAAb",
   1861            "aaa(aac)");
   1862 
   1863     // Make sure @ in a quantified postContext works
   1864     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
   1865            "baaaaa",
   1866            "caa(aaa)");
   1867 
   1868     // Make sure @ in a quantified postContext with seg ref works
   1869     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
   1870            "baaaaa",
   1871            "baa(aaa)");
   1872 
   1873     // Make sure @ past ante context doesn't enter ante context
   1874     UTransPosition pos = {0, 5, 3, 5};
   1875     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
   1876            "xxxab",
   1877            "xxx(ac)",
   1878            &pos);
   1879 
   1880     // Make sure @ past post context doesn't pass limit
   1881     UTransPosition pos2 = {0, 4, 0, 2};
   1882     expect("{b} a+ > c @@ |; x > y; a > A;",
   1883            "baxx",
   1884            "caxx",
   1885            &pos2);
   1886 
   1887     // Make sure @ past post context doesn't enter post context
   1888     expect("{b} a+ > c @@ |; x > y; a > A;",
   1889            "baxx",
   1890            "cayy");
   1891 
   1892     expect("(ab)? c > d;",
   1893            "c abc ababc",
   1894            "d d abd");
   1895 
   1896     // NOTE: The (ab)+ when referenced just yields a single "ab",
   1897     // not the full sequence of them.  This accords with perl behavior.
   1898     expect("(ab)+ {x} > '(' $1 ')';",
   1899            "x abx ababxy",
   1900            "x ab(ab) abab(ab)y");
   1901 
   1902     expect("b+ > x;",
   1903            "ac abc abbc abbbc",
   1904            "ac axc axc axc");
   1905 
   1906     expect("[abc]+ > x;",
   1907            "qac abrc abbcs abtbbc",
   1908            "qx xrx xs xtx");
   1909 
   1910     expect("q{(ab)+} > x;",
   1911            "qa qab qaba qababc qaba",
   1912            "qa qx qxa qxc qxa");
   1913 
   1914     expect("q(ab)* > x;",
   1915            "qa qab qaba qababc",
   1916            "xa x xa xc");
   1917 
   1918     // NOTE: The (ab)+ when referenced just yields a single "ab",
   1919     // not the full sequence of them.  This accords with perl behavior.
   1920     expect("q(ab)* > '(' $1 ')';",
   1921            "qa qab qaba qababc",
   1922            "()a (ab) (ab)a (ab)c");
   1923 
   1924     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
   1925     // quoted string
   1926     expect("'ab'+ > x;",
   1927            "bb ab ababb",
   1928            "bb x xb");
   1929 
   1930     // $foo+ and $foo* -- the quantifier should apply to the entire
   1931     // variable reference
   1932     expect("$var = ab; $var+ > x;",
   1933            "bb ab ababb",
   1934            "bb x xb");
   1935 }
   1936 
   1937 class TestTrans : public Transliterator {
   1938 public:
   1939     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
   1940     }
   1941     virtual Transliterator* clone(void) const {
   1942         return new TestTrans(getID());
   1943     }
   1944     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
   1945         UBool /*isIncremental*/) const
   1946     {
   1947         offsets.start = offsets.limit;
   1948     }
   1949     virtual UClassID getDynamicClassID() const;
   1950     static UClassID U_EXPORT2 getStaticClassID();
   1951 };
   1952 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
   1953 
   1954 /**
   1955  * Test Source-Target/Variant.
   1956  */
   1957 void TransliteratorTest::TestSTV(void) {
   1958     int32_t ns = Transliterator::countAvailableSources();
   1959     if (ns < 0 || ns > 255) {
   1960         errln((UnicodeString)"FAIL: Bad source count: " + ns);
   1961         return;
   1962     }
   1963     int32_t i, j;
   1964     for (i=0; i<ns; ++i) {
   1965         UnicodeString source;
   1966         Transliterator::getAvailableSource(i, source);
   1967         logln((UnicodeString)"" + i + ": " + source);
   1968         if (source.length() == 0) {
   1969             errln("FAIL: empty source");
   1970             continue;
   1971         }
   1972         int32_t nt = Transliterator::countAvailableTargets(source);
   1973         if (nt < 0 || nt > 255) {
   1974             errln((UnicodeString)"FAIL: Bad target count: " + nt);
   1975             continue;
   1976         }
   1977         for (int32_t j=0; j<nt; ++j) {
   1978             UnicodeString target;
   1979             Transliterator::getAvailableTarget(j, source, target);
   1980             logln((UnicodeString)" " + j + ": " + target);
   1981             if (target.length() == 0) {
   1982                 errln("FAIL: empty target");
   1983                 continue;
   1984             }
   1985             int32_t nv = Transliterator::countAvailableVariants(source, target);
   1986             if (nv < 0 || nv > 255) {
   1987                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
   1988                 continue;
   1989             }
   1990             for (int32_t k=0; k<nv; ++k) {
   1991                 UnicodeString variant;
   1992                 Transliterator::getAvailableVariant(k, source, target, variant);
   1993                 if (variant.length() == 0) {
   1994                     logln((UnicodeString)"  " + k + ": <empty>");
   1995                 } else {
   1996                     logln((UnicodeString)"  " + k + ": " + variant);
   1997                 }
   1998             }
   1999         }
   2000     }
   2001 
   2002     // Test registration
   2003     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
   2004     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
   2005     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
   2006     for (i=0; i<3; ++i) {
   2007         Transliterator *t = new TestTrans(IDS[i]);
   2008         if (t == 0) {
   2009             errln("FAIL: out of memory");
   2010             return;
   2011         }
   2012         if (t->getID() != IDS[i]) {
   2013             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
   2014             delete t;
   2015             return;
   2016         }
   2017         Transliterator::registerInstance(t);
   2018         UErrorCode status = U_ZERO_ERROR;
   2019         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
   2020         if (t == NULL) {
   2021             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
   2022                   IDS[i]);
   2023         } else {
   2024             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
   2025                   IDS[i]);
   2026             delete t;
   2027         }
   2028         Transliterator::unregister(IDS[i]);
   2029         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
   2030         if (t != NULL) {
   2031             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
   2032                   IDS[i]);
   2033             delete t;
   2034         }
   2035     }
   2036 
   2037     // Make sure getAvailable API reflects removal
   2038     int32_t n = Transliterator::countAvailableIDs();
   2039     for (i=0; i<n; ++i) {
   2040         UnicodeString id = Transliterator::getAvailableID(i);
   2041         for (j=0; j<3; ++j) {
   2042             if (id.caseCompare(FULL_IDS[j],0)==0) {
   2043                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
   2044             }
   2045         }
   2046     }
   2047     n = Transliterator::countAvailableTargets("Any");
   2048     for (i=0; i<n; ++i) {
   2049         UnicodeString t;
   2050         Transliterator::getAvailableTarget(i, "Any", t);
   2051         if (t.caseCompare(IDS[0],0)==0) {
   2052             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
   2053         }
   2054     }
   2055     n = Transliterator::countAvailableSources();
   2056     for (i=0; i<n; ++i) {
   2057         UnicodeString s;
   2058         Transliterator::getAvailableSource(i, s);
   2059         for (j=0; j<3; ++j) {
   2060             if (SOURCES[j] == NULL) continue;
   2061             if (s.caseCompare(SOURCES[j],0)==0) {
   2062                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
   2063             }
   2064         }
   2065     }
   2066 }
   2067 
   2068 /**
   2069  * Test inverse of Greek-Latin; Title()
   2070  */
   2071 void TransliteratorTest::TestCompoundInverse(void) {
   2072     UParseError parseError;
   2073     UErrorCode status = U_ZERO_ERROR;
   2074     Transliterator *t = Transliterator::createInstance
   2075         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
   2076     if (t == 0) {
   2077         dataerrln("FAIL: createInstance - %s", u_errorName(status));
   2078         return;
   2079     }
   2080     UnicodeString exp("(Title);Latin-Greek");
   2081     if (t->getID() == exp) {
   2082         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
   2083               t->getID());
   2084     } else {
   2085         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
   2086               t->getID() + "\", expected \"" + exp + "\"");
   2087     }
   2088     delete t;
   2089 }
   2090 
   2091 /**
   2092  * Test NFD chaining with RBT
   2093  */
   2094 void TransliteratorTest::TestNFDChainRBT() {
   2095     UParseError pe;
   2096     UErrorCode ec = U_ZERO_ERROR;
   2097     Transliterator* t = Transliterator::createFromRules(
   2098                                "TEST", "::NFD; aa > Q; a > q;",
   2099                                UTRANS_FORWARD, pe, ec);
   2100     if (t == NULL || U_FAILURE(ec)) {
   2101         errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
   2102         return;
   2103     }
   2104     expect(*t, "aa", "Q");
   2105     delete t;
   2106 
   2107     // TEMPORARY TESTS -- BEING DEBUGGED
   2108 //=-    UnicodeString s, s2;
   2109 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
   2110 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
   2111 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
   2112 //=-    expect(*t, s, s2);
   2113 //=-    delete t;
   2114 //=-
   2115 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
   2116 //=-    expect(*t, s2, s);
   2117 //=-    delete t;
   2118 //=-
   2119 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
   2120 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
   2121 //=-    expect(*t, s, s);
   2122 //=-    delete t;
   2123 
   2124 //    const char* source[] = {
   2125 //        /*
   2126 //        "\\u015Br\\u012Bmad",
   2127 //        "bhagavadg\\u012Bt\\u0101",
   2128 //        "adhy\\u0101ya",
   2129 //        "arjuna",
   2130 //        "vi\\u1E63\\u0101da",
   2131 //        "y\\u014Dga",
   2132 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
   2133 //        "uv\\u0101cr\\u0325",
   2134 //        */
   2135 //        "rmk\\u1E63\\u0113t",
   2136 //      //"dharmak\\u1E63\\u0113tr\\u0113",
   2137 //        /*
   2138 //        "kuruk\\u1E63\\u0113tr\\u0113",
   2139 //        "samav\\u0113t\\u0101",
   2140 //        "yuyutsava-\\u1E25",
   2141 //        "m\\u0101mak\\u0101-\\u1E25",
   2142 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
   2143 //        "kimakurvata",
   2144 //        "san\\u0304java",
   2145 //        */
   2146 //
   2147 //        0
   2148 //    };
   2149 //    const char* expected[] = {
   2150 //        /*
   2151 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
   2152 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
   2153 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
   2154 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
   2155 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
   2156 //        "\\u092f\\u094b\\u0917",
   2157 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
   2158 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
   2159 //        */
   2160 //        "\\u0927",
   2161 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2162 //        /*
   2163 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2164 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
   2165 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
   2166 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
   2167 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
   2168 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
   2169 //        "\\u0938\\u0902\\u091c\\u0935",
   2170 //        */
   2171 //        0
   2172 //    };
   2173 //    UErrorCode status = U_ZERO_ERROR;
   2174 //    UParseError parseError;
   2175 //    UnicodeString message;
   2176 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2177 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2178 //    if(U_FAILURE(status)){
   2179 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2180 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
   2181 //        delete latinToDevToLatin;
   2182 //        delete devToLatinToDev;
   2183 //        return;
   2184 //    }
   2185 //    UnicodeString gotResult;
   2186 //    for(int i= 0; source[i] != 0; i++){
   2187 //        gotResult = source[i];
   2188 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
   2189 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
   2190 //    }
   2191 //    delete latinToDevToLatin;
   2192 //    delete devToLatinToDev;
   2193 }
   2194 
   2195 /**
   2196  * Inverse of "Null" should be "Null". (J21)
   2197  */
   2198 void TransliteratorTest::TestNullInverse() {
   2199     UParseError pe;
   2200     UErrorCode ec = U_ZERO_ERROR;
   2201     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
   2202     if (t == 0 || U_FAILURE(ec)) {
   2203         errln("FAIL: createInstance");
   2204         return;
   2205     }
   2206     Transliterator *u = t->createInverse(ec);
   2207     if (u == 0 || U_FAILURE(ec)) {
   2208         errln("FAIL: createInverse");
   2209         delete t;
   2210         return;
   2211     }
   2212     if (u->getID() != "Null") {
   2213         errln("FAIL: Inverse of Null should be Null");
   2214     }
   2215     delete t;
   2216     delete u;
   2217 }
   2218 
   2219 /**
   2220  * Check ID of inverse of alias. (J22)
   2221  */
   2222 void TransliteratorTest::TestAliasInverseID() {
   2223     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
   2224     UParseError pe;
   2225     UErrorCode ec = U_ZERO_ERROR;
   2226     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
   2227     if (t == 0 || U_FAILURE(ec)) {
   2228         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
   2229         return;
   2230     }
   2231     Transliterator *u = t->createInverse(ec);
   2232     if (u == 0 || U_FAILURE(ec)) {
   2233         errln("FAIL: createInverse");
   2234         delete t;
   2235         return;
   2236     }
   2237     UnicodeString exp = "Hangul-Latin";
   2238     UnicodeString got = u->getID();
   2239     if (got != exp) {
   2240         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
   2241               ", expected " + exp);
   2242     }
   2243     delete t;
   2244     delete u;
   2245 }
   2246 
   2247 /**
   2248  * Test IDs of inverses of compound transliterators. (J20)
   2249  */
   2250 void TransliteratorTest::TestCompoundInverseID() {
   2251     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
   2252     UParseError pe;
   2253     UErrorCode ec = U_ZERO_ERROR;
   2254     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
   2255     if (t == 0 || U_FAILURE(ec)) {
   2256         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
   2257         return;
   2258     }
   2259     Transliterator *u = t->createInverse(ec);
   2260     if (u == 0 || U_FAILURE(ec)) {
   2261         errln("FAIL: createInverse");
   2262         delete t;
   2263         return;
   2264     }
   2265     UnicodeString exp = "NFD(NFC);Jamo-Latin";
   2266     UnicodeString got = u->getID();
   2267     if (got != exp) {
   2268         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
   2269               ", expected " + exp);
   2270     }
   2271     delete t;
   2272     delete u;
   2273 }
   2274 
   2275 /**
   2276  * Test undefined variable.
   2277 
   2278  */
   2279 void TransliteratorTest::TestUndefinedVariable() {
   2280     UnicodeString rule = "$initial } a <> \\u1161;";
   2281     UParseError pe;
   2282     UErrorCode ec = U_ZERO_ERROR;
   2283     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
   2284     delete t;
   2285     if (U_FAILURE(ec)) {
   2286         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
   2287               u_errorName(ec));
   2288         return;
   2289     }
   2290     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
   2291           u_errorName(ec));
   2292 }
   2293 
   2294 /**
   2295  * Test empty context.
   2296  */
   2297 void TransliteratorTest::TestEmptyContext() {
   2298     expect(" { a } > b;", "xay a ", "xby b ");
   2299 }
   2300 
   2301 /**
   2302 * Test compound filter ID syntax
   2303 */
   2304 void TransliteratorTest::TestCompoundFilterID(void) {
   2305     static const char* DATA[] = {
   2306         // Col. 1 = ID or rule set (latter must start with #)
   2307 
   2308         // = columns > 1 are null if expect col. 1 to be illegal =
   2309 
   2310         // Col. 2 = direction, "F..." or "R..."
   2311         // Col. 3 = source string
   2312         // Col. 4 = exp result
   2313 
   2314         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
   2315         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
   2316         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
   2317         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
   2318         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
   2319         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
   2320         NULL,
   2321     };
   2322 
   2323     for (int32_t i=0; DATA[i]; i+=4) {
   2324         UnicodeString id = CharsToUnicodeString(DATA[i]);
   2325         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
   2326             UTRANS_REVERSE : UTRANS_FORWARD;
   2327         UnicodeString source;
   2328         UnicodeString exp;
   2329         if (DATA[i+2] != NULL) {
   2330             source = CharsToUnicodeString(DATA[i+2]);
   2331             exp = CharsToUnicodeString(DATA[i+3]);
   2332         }
   2333         UBool expOk = (DATA[i+1] != NULL);
   2334         Transliterator* t = NULL;
   2335         UParseError pe;
   2336         UErrorCode ec = U_ZERO_ERROR;
   2337         if (id.charAt(0) == 0x23/*#*/) {
   2338             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
   2339         } else {
   2340             t = Transliterator::createInstance(id, direction, pe, ec);
   2341         }
   2342         UBool ok = (t != NULL && U_SUCCESS(ec));
   2343         UnicodeString transID;
   2344         if (t!=0) {
   2345             transID = t->getID();
   2346         }
   2347         else {
   2348             transID = UnicodeString("NULL", "");
   2349         }
   2350         if (ok == expOk) {
   2351             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
   2352                   u_errorName(ec));
   2353             if (source.length() != 0) {
   2354                 expect(*t, source, exp);
   2355             }
   2356             delete t;
   2357         } else {
   2358             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
   2359                   u_errorName(ec));
   2360         }
   2361     }
   2362 }
   2363 
   2364 /**
   2365  * Test new property set syntax
   2366  */
   2367 void TransliteratorTest::TestPropertySet() {
   2368     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
   2369     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
   2370            "[ a stitch ]\n[ in time ]\r[ saves 9]");
   2371 }
   2372 
   2373 /**
   2374  * Test various failure points of the new 2.0 engine.
   2375  */
   2376 void TransliteratorTest::TestNewEngine() {
   2377     UParseError pe;
   2378     UErrorCode ec = U_ZERO_ERROR;
   2379     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
   2380     if (t == 0 || U_FAILURE(ec)) {
   2381         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
   2382         return;
   2383     }
   2384     // Katakana should be untouched
   2385     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
   2386            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
   2387 
   2388     delete t;
   2389 
   2390 #if 1
   2391     // This test will only work if Transliterator.ROLLBACK is
   2392     // true.  Otherwise, this test will fail, revealing a
   2393     // limitation of global filters in incremental mode.
   2394     Transliterator *a =
   2395         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
   2396     Transliterator *A =
   2397         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
   2398     if (U_FAILURE(ec)) {
   2399         delete a;
   2400         delete A;
   2401         return;
   2402     }
   2403 
   2404     Transliterator* array[3];
   2405     array[0] = a;
   2406     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
   2407     array[2] = A;
   2408     if (U_FAILURE(ec)) {
   2409         errln("FAIL: createInstance NFD");
   2410         delete a;
   2411         delete A;
   2412         delete array[1];
   2413         return;
   2414     }
   2415 
   2416     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
   2417     if (U_FAILURE(ec)) {
   2418         errln("FAIL: UnicodeSet constructor");
   2419         delete a;
   2420         delete A;
   2421         delete array[1];
   2422         delete t;
   2423         return;
   2424     }
   2425 
   2426     expect(*t, "aAaA", "bAbA");
   2427 
   2428     assertTrue("countElements", t->countElements() == 3);
   2429     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
   2430     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
   2431     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
   2432     assertSuccess("getElement", ec);
   2433 
   2434     delete a;
   2435     delete A;
   2436     delete array[1];
   2437     delete t;
   2438 #endif
   2439 
   2440     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
   2441            "a",
   2442            "ax");
   2443 
   2444     UnicodeString gr = CharsToUnicodeString(
   2445         "$ddot = \\u0308 ;"
   2446         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
   2447         "$rough = \\u0314 ;"
   2448         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
   2449         "\\u03b1 <> a ;"
   2450         "$rough <> h ;");
   2451 
   2452     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
   2453 }
   2454 
   2455 /**
   2456  * Test quantified segment behavior.  We want:
   2457  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
   2458  */
   2459 void TransliteratorTest::TestQuantifiedSegment(void) {
   2460     // The normal case
   2461     expect("([abc]+) > x $1 x;", "cba", "xcbax");
   2462 
   2463     // The tricky case; the quantifier is around the segment
   2464     expect("([abc])+ > x $1 x;", "cba", "xax");
   2465 
   2466     // Tricky case in reverse direction
   2467     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
   2468 
   2469     // Check post-context segment
   2470     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
   2471 
   2472     // Test toRule/toPattern for non-quantified segment.
   2473     // Careful with spacing here.
   2474     UnicodeString r("([a-c]){q} > x $1 x;");
   2475     UParseError pe;
   2476     UErrorCode ec = U_ZERO_ERROR;
   2477     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
   2478     if (U_FAILURE(ec)) {
   2479         errln("FAIL: createFromRules");
   2480         delete t;
   2481         return;
   2482     }
   2483     UnicodeString rr;
   2484     t->toRules(rr, TRUE);
   2485     if (r != rr) {
   2486         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2487     } else {
   2488         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2489     }
   2490     delete t;
   2491 
   2492     // Test toRule/toPattern for quantified segment.
   2493     // Careful with spacing here.
   2494     r = "([a-c])+{q} > x $1 x;";
   2495     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
   2496     if (U_FAILURE(ec)) {
   2497         errln("FAIL: createFromRules");
   2498         delete t;
   2499         return;
   2500     }
   2501     t->toRules(rr, TRUE);
   2502     if (r != rr) {
   2503         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2504     } else {
   2505         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2506     }
   2507     delete t;
   2508 }
   2509 
   2510 //======================================================================
   2511 // Ram's tests
   2512 //======================================================================
   2513 void TransliteratorTest::TestDevanagariLatinRT(){
   2514     const int MAX_LEN= 52;
   2515     const char* const source[MAX_LEN] = {
   2516         "bh\\u0101rata",
   2517         "kra",
   2518         "k\\u1E63a",
   2519         "khra",
   2520         "gra",
   2521         "\\u1E45ra",
   2522         "cra",
   2523         "chra",
   2524         "j\\u00F1a",
   2525         "jhra",
   2526         "\\u00F1ra",
   2527         "\\u1E6Dya",
   2528         "\\u1E6Dhra",
   2529         "\\u1E0Dya",
   2530       //"r\\u0323ya", // \u095c is not valid in Devanagari
   2531         "\\u1E0Dhya",
   2532         "\\u1E5Bhra",
   2533         "\\u1E47ra",
   2534         "tta",
   2535         "thra",
   2536         "dda",
   2537         "dhra",
   2538         "nna",
   2539         "pra",
   2540         "phra",
   2541         "bra",
   2542         "bhra",
   2543         "mra",
   2544         "\\u1E49ra",
   2545       //"l\\u0331ra",
   2546         "yra",
   2547         "\\u1E8Fra",
   2548       //"l-",
   2549         "vra",
   2550         "\\u015Bra",
   2551         "\\u1E63ra",
   2552         "sra",
   2553         "hma",
   2554         "\\u1E6D\\u1E6Da",
   2555         "\\u1E6D\\u1E6Dha",
   2556         "\\u1E6Dh\\u1E6Dha",
   2557         "\\u1E0D\\u1E0Da",
   2558         "\\u1E0D\\u1E0Dha",
   2559         "\\u1E6Dya",
   2560         "\\u1E6Dhya",
   2561         "\\u1E0Dya",
   2562         "\\u1E0Dhya",
   2563         // Not roundtrippable --
   2564         // \\u0939\\u094d\\u094d\\u092E  - hma
   2565         // \\u0939\\u094d\\u092E         - hma
   2566         // CharsToUnicodeString("hma"),
   2567         "hya",
   2568         "\\u015Br\\u0325",
   2569         "\\u015Bca",
   2570         "\\u0115",
   2571         "san\\u0304j\\u012Bb s\\u0113nagupta",
   2572         "\\u0101nand vaddir\\u0101ju",
   2573         "\\u0101",
   2574         "a"
   2575     };
   2576     const char* const expected[MAX_LEN] = {
   2577         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
   2578         "\\u0915\\u094D\\u0930",          /* kra         */
   2579         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
   2580         "\\u0916\\u094D\\u0930",          /* khra        */
   2581         "\\u0917\\u094D\\u0930",          /* gra         */
   2582         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
   2583         "\\u091A\\u094D\\u0930",          /* cra         */
   2584         "\\u091B\\u094D\\u0930",          /* chra        */
   2585         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
   2586         "\\u091D\\u094D\\u0930",          /* jhra        */
   2587         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
   2588         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
   2589         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
   2590         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
   2591       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
   2592         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
   2593         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
   2594         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
   2595         "\\u0924\\u094D\\u0924",          /* tta         */
   2596         "\\u0925\\u094D\\u0930",          /* thra        */
   2597         "\\u0926\\u094D\\u0926",          /* dda         */
   2598         "\\u0927\\u094D\\u0930",          /* dhra        */
   2599         "\\u0928\\u094D\\u0928",          /* nna         */
   2600         "\\u092A\\u094D\\u0930",          /* pra         */
   2601         "\\u092B\\u094D\\u0930",          /* phra        */
   2602         "\\u092C\\u094D\\u0930",          /* bra         */
   2603         "\\u092D\\u094D\\u0930",          /* bhra        */
   2604         "\\u092E\\u094D\\u0930",          /* mra         */
   2605         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
   2606       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
   2607         "\\u092F\\u094D\\u0930",          /* yra         */
   2608         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
   2609       //"l-",
   2610         "\\u0935\\u094D\\u0930",          /* vra         */
   2611         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
   2612         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
   2613         "\\u0938\\u094D\\u0930",          /* sra         */
   2614         "\\u0939\\u094d\\u092E",          /* hma         */
   2615         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
   2616         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
   2617         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
   2618         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
   2619         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
   2620         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
   2621         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
   2622         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
   2623         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
   2624      // "hma",                         /* hma         */
   2625         "\\u0939\\u094D\\u092F",          /* hya         */
   2626         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
   2627         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
   2628         "\\u090d",                        /* e\\u0306    */
   2629         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
   2630         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
   2631         "\\u0906",
   2632         "\\u0905",
   2633     };
   2634     UErrorCode status = U_ZERO_ERROR;
   2635     UParseError parseError;
   2636     UnicodeString message;
   2637     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2638     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2639     if(U_FAILURE(status)){
   2640         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2641         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2642         return;
   2643     }
   2644     UnicodeString gotResult;
   2645     for(int i= 0; i<MAX_LEN; i++){
   2646         gotResult = source[i];
   2647         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
   2648         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
   2649     }
   2650     delete latinToDev;
   2651     delete devToLatin;
   2652 }
   2653 
   2654 void TransliteratorTest::TestTeluguLatinRT(){
   2655     const int MAX_LEN=10;
   2656     const char* const source[MAX_LEN] = {
   2657         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
   2658         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
   2659         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
   2660         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
   2661         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
   2662         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
   2663         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
   2664         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
   2665         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
   2666         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
   2667     };
   2668 
   2669     const char* const expected[MAX_LEN] = {
   2670         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
   2671         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
   2672         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
   2673         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
   2674         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
   2675         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
   2676         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
   2677         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
   2678         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
   2679         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
   2680     };
   2681 
   2682     UErrorCode status = U_ZERO_ERROR;
   2683     UParseError parseError;
   2684     UnicodeString message;
   2685     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
   2686     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
   2687     if(U_FAILURE(status)){
   2688         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2689         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2690         return;
   2691     }
   2692     UnicodeString gotResult;
   2693     for(int i= 0; i<MAX_LEN; i++){
   2694         gotResult = source[i];
   2695         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
   2696         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
   2697     }
   2698     delete latinToDev;
   2699     delete devToLatin;
   2700 }
   2701 
   2702 void TransliteratorTest::TestSanskritLatinRT(){
   2703     const int MAX_LEN =16;
   2704     const char* const source[MAX_LEN] = {
   2705         "rmk\\u1E63\\u0113t",
   2706         "\\u015Br\\u012Bmad",
   2707         "bhagavadg\\u012Bt\\u0101",
   2708         "adhy\\u0101ya",
   2709         "arjuna",
   2710         "vi\\u1E63\\u0101da",
   2711         "y\\u014Dga",
   2712         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
   2713         "uv\\u0101cr\\u0325",
   2714         "dharmak\\u1E63\\u0113tr\\u0113",
   2715         "kuruk\\u1E63\\u0113tr\\u0113",
   2716         "samav\\u0113t\\u0101",
   2717         "yuyutsava\\u1E25",
   2718         "m\\u0101mak\\u0101\\u1E25",
   2719     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
   2720         "kimakurvata",
   2721         "san\\u0304java",
   2722     };
   2723     const char* const expected[MAX_LEN] = {
   2724         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
   2725         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
   2726         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
   2727         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
   2728         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
   2729         "\\u0935\\u093f\\u0937\\u093e\\u0926",
   2730         "\\u092f\\u094b\\u0917",
   2731         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
   2732         "\\u0909\\u0935\\u093E\\u091A\\u0943",
   2733         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2734         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2735         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
   2736         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
   2737         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
   2738     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
   2739         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
   2740         "\\u0938\\u0902\\u091c\\u0935",
   2741     };
   2742     UErrorCode status = U_ZERO_ERROR;
   2743     UParseError parseError;
   2744     UnicodeString message;
   2745     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2746     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2747     if(U_FAILURE(status)){
   2748         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2749         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2750         return;
   2751     }
   2752     UnicodeString gotResult;
   2753     for(int i= 0; i<MAX_LEN; i++){
   2754         gotResult = source[i];
   2755         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
   2756         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
   2757     }
   2758     delete latinToDev;
   2759     delete devToLatin;
   2760 }
   2761 
   2762 
   2763 void TransliteratorTest::TestCompoundLatinRT(){
   2764     const char* const source[] = {
   2765         "rmk\\u1E63\\u0113t",
   2766         "\\u015Br\\u012Bmad",
   2767         "bhagavadg\\u012Bt\\u0101",
   2768         "adhy\\u0101ya",
   2769         "arjuna",
   2770         "vi\\u1E63\\u0101da",
   2771         "y\\u014Dga",
   2772         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
   2773         "uv\\u0101cr\\u0325",
   2774         "dharmak\\u1E63\\u0113tr\\u0113",
   2775         "kuruk\\u1E63\\u0113tr\\u0113",
   2776         "samav\\u0113t\\u0101",
   2777         "yuyutsava\\u1E25",
   2778         "m\\u0101mak\\u0101\\u1E25",
   2779      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
   2780         "kimakurvata",
   2781         "san\\u0304java"
   2782     };
   2783     const int MAX_LEN = sizeof(source)/sizeof(source[0]);
   2784     const char* const expected[MAX_LEN] = {
   2785         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
   2786         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
   2787         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
   2788         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
   2789         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
   2790         "\\u0935\\u093f\\u0937\\u093e\\u0926",
   2791         "\\u092f\\u094b\\u0917",
   2792         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
   2793         "\\u0909\\u0935\\u093E\\u091A\\u0943",
   2794         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2795         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2796         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
   2797         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
   2798         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
   2799     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
   2800         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
   2801         "\\u0938\\u0902\\u091c\\u0935"
   2802     };
   2803     if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
   2804         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
   2805         return;
   2806     }
   2807 
   2808     UErrorCode status = U_ZERO_ERROR;
   2809     UParseError parseError;
   2810     UnicodeString message;
   2811     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2812     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2813     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
   2814     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
   2815 
   2816     if(U_FAILURE(status)){
   2817         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2818         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2819         return;
   2820     }
   2821     UnicodeString gotResult;
   2822     for(int i= 0; i<MAX_LEN; i++){
   2823         gotResult = source[i];
   2824         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
   2825         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
   2826         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
   2827 
   2828     }
   2829     delete(latinToDevToLatin);
   2830     delete(devToLatinToDev);
   2831     delete(devToTelToDev);
   2832     delete(latinToTelToLatin);
   2833 }
   2834 
   2835 /**
   2836  * Test Gurmukhi-Devanagari Tippi and Bindi
   2837  */
   2838 void TransliteratorTest::TestGurmukhiDevanagari(){
   2839     // the rule says:
   2840     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
   2841     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
   2842     UErrorCode status = U_ZERO_ERROR;
   2843     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
   2844     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
   2845     UParseError parseError;
   2846 
   2847     UnicodeSetIterator vIter(vowel);
   2848     UnicodeSetIterator nvIter(non_vowel);
   2849     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
   2850     if(U_FAILURE(status)) {
   2851       dataerrln("Error creating transliterator %s", u_errorName(status));
   2852       delete trans;
   2853       return;
   2854     }
   2855     UnicodeString src (" \\u0902", -1, US_INV);
   2856     UnicodeString expected(" \\u0A02", -1, US_INV);
   2857     src = src.unescape();
   2858     expected= expected.unescape();
   2859 
   2860     while(vIter.next()){
   2861         src.setCharAt(0,(UChar) vIter.getCodepoint());
   2862         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
   2863         expect(*trans,src,expected);
   2864     }
   2865 
   2866     expected.setCharAt(1,0x0A70);
   2867     while(nvIter.next()){
   2868         //src.setCharAt(0,(char) nvIter.codepoint);
   2869         src.setCharAt(0,(UChar)nvIter.getCodepoint());
   2870         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
   2871         expect(*trans,src,expected);
   2872     }
   2873     delete trans;
   2874 }
   2875 /**
   2876  * Test instantiation from a locale.
   2877  */
   2878 void TransliteratorTest::TestLocaleInstantiation(void) {
   2879     UParseError pe;
   2880     UErrorCode ec = U_ZERO_ERROR;
   2881     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
   2882     if (U_FAILURE(ec)) {
   2883         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
   2884         delete t;
   2885         return;
   2886     }
   2887     expect(*t, CharsToUnicodeString("\\u0430"), "a");
   2888     delete t;
   2889 
   2890     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
   2891     if (U_FAILURE(ec)) {
   2892         errln("FAIL: createInstance(en-el)");
   2893         delete t;
   2894         return;
   2895     }
   2896     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
   2897     delete t;
   2898 }
   2899 
   2900 /**
   2901  * Test title case handling of accent (should ignore accents)
   2902  */
   2903 void TransliteratorTest::TestTitleAccents(void) {
   2904     UParseError pe;
   2905     UErrorCode ec = U_ZERO_ERROR;
   2906     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
   2907     if (U_FAILURE(ec)) {
   2908         errln("FAIL: createInstance(Title)");
   2909         delete t;
   2910         return;
   2911     }
   2912     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
   2913     delete t;
   2914 }
   2915 
   2916 /**
   2917  * Basic test of a locale resource based rule.
   2918  */
   2919 void TransliteratorTest::TestLocaleResource() {
   2920     const char* DATA[] = {
   2921         // id                    from               to
   2922         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
   2923         "Latin-el",              "b",               "\\u03bc\\u03c0",
   2924         "Latin-Greek",           "b",               "\\u03B2",
   2925         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
   2926         "el-Latin",              "\\u03B2",         "v",
   2927         "Greek-Latin",           "\\u03B2",         "b",
   2928     };
   2929     const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
   2930     for (int32_t i=0; i<DATA_length; i+=3) {
   2931         UParseError pe;
   2932         UErrorCode ec = U_ZERO_ERROR;
   2933         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
   2934         if (U_FAILURE(ec)) {
   2935             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
   2936             delete t;
   2937             continue;
   2938         }
   2939         expect(*t, CharsToUnicodeString(DATA[i+1]),
   2940                CharsToUnicodeString(DATA[i+2]));
   2941         delete t;
   2942     }
   2943 }
   2944 
   2945 /**
   2946  * Make sure parse errors reference the right line.
   2947  */
   2948 void TransliteratorTest::TestParseError() {
   2949     static const char* rule =
   2950         "a > b;\n"
   2951         "# more stuff\n"
   2952         "d << b;";
   2953     UErrorCode ec = U_ZERO_ERROR;
   2954     UParseError pe;
   2955     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   2956     delete t;
   2957     if (U_FAILURE(ec)) {
   2958         UnicodeString err(pe.preContext);
   2959         err.append((UChar)124/*|*/).append(pe.postContext);
   2960         if (err.indexOf("d << b") >= 0) {
   2961             logln("Ok: " + err);
   2962         } else {
   2963             errln("FAIL: " + err);
   2964         }
   2965     }
   2966     else {
   2967         errln("FAIL: no syntax error");
   2968     }
   2969     static const char* maskingRule =
   2970         "a>x;\n"
   2971         "# more stuff\n"
   2972         "ab>y;";
   2973     ec = U_ZERO_ERROR;
   2974     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
   2975     if (ec != U_RULE_MASK_ERROR) {
   2976         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
   2977     }
   2978     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
   2979         errln("FAIL: did not get expected precontext");
   2980     }
   2981     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
   2982         errln("FAIL: did not get expected postcontext");
   2983     }
   2984 }
   2985 
   2986 /**
   2987  * Make sure sets on output are disallowed.
   2988  */
   2989 void TransliteratorTest::TestOutputSet() {
   2990     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
   2991     UErrorCode ec = U_ZERO_ERROR;
   2992     UParseError pe;
   2993     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   2994     delete t;
   2995     if (U_FAILURE(ec)) {
   2996         UnicodeString err(pe.preContext);
   2997         err.append((UChar)124/*|*/).append(pe.postContext);
   2998         logln("Ok: " + err);
   2999         return;
   3000     }
   3001     errln("FAIL: No syntax error");
   3002 }
   3003 
   3004 /**
   3005  * Test the use variable range pragma, making sure that use of
   3006  * variable range characters is detected and flagged as an error.
   3007  */
   3008 void TransliteratorTest::TestVariableRange() {
   3009     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
   3010     UErrorCode ec = U_ZERO_ERROR;
   3011     UParseError pe;
   3012     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   3013     delete t;
   3014     if (U_FAILURE(ec)) {
   3015         UnicodeString err(pe.preContext);
   3016         err.append((UChar)124/*|*/).append(pe.postContext);
   3017         logln("Ok: " + err);
   3018         return;
   3019     }
   3020     errln("FAIL: No syntax error");
   3021 }
   3022 
   3023 /**
   3024  * Test invalid post context error handling
   3025  */
   3026 void TransliteratorTest::TestInvalidPostContext() {
   3027     UnicodeString rule = "a}b{c>d;";
   3028     UErrorCode ec = U_ZERO_ERROR;
   3029     UParseError pe;
   3030     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   3031     delete t;
   3032     if (U_FAILURE(ec)) {
   3033         UnicodeString err(pe.preContext);
   3034         err.append((UChar)124/*|*/).append(pe.postContext);
   3035         if (err.indexOf("a}b{c") >= 0) {
   3036             logln("Ok: " + err);
   3037         } else {
   3038             errln("FAIL: " + err);
   3039         }
   3040         return;
   3041     }
   3042     errln("FAIL: No syntax error");
   3043 }
   3044 
   3045 /**
   3046  * Test ID form variants
   3047  */
   3048 void TransliteratorTest::TestIDForms() {
   3049     const char* DATA[] = {
   3050         "NFC", NULL, "NFD",
   3051         "nfd", NULL, "NFC", // make sure case is ignored
   3052         "Any-NFKD", NULL, "Any-NFKC",
   3053         "Null", NULL, "Null",
   3054         "-nfkc", "nfkc", "NFKD",
   3055         "-nfkc/", "nfkc", "NFKD",
   3056         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
   3057         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
   3058         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
   3059         "Source-", NULL, NULL,
   3060         "Source/Variant-", NULL, NULL,
   3061         "Source-/Variant", NULL, NULL,
   3062         "/Variant", NULL, NULL,
   3063         "/Variant-", NULL, NULL,
   3064         "-/Variant", NULL, NULL,
   3065         "-/", NULL, NULL,
   3066         "-", NULL, NULL,
   3067         "/", NULL, NULL,
   3068     };
   3069     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
   3070 
   3071     for (int32_t i=0; i<DATA_length; i+=3) {
   3072         const char* ID = DATA[i];
   3073         const char* expID = DATA[i+1];
   3074         const char* expInvID = DATA[i+2];
   3075         UBool expValid = (expInvID != NULL);
   3076         if (expID == NULL) {
   3077             expID = ID;
   3078         }
   3079         UParseError pe;
   3080         UErrorCode ec = U_ZERO_ERROR;
   3081         Transliterator *t =
   3082             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
   3083         if (U_FAILURE(ec)) {
   3084             if (!expValid) {
   3085                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
   3086             } else {
   3087                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
   3088             }
   3089             delete t;
   3090             continue;
   3091         }
   3092         Transliterator *u = t->createInverse(ec);
   3093         if (U_FAILURE(ec)) {
   3094             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
   3095             delete t;
   3096             delete u;
   3097             continue;
   3098         }
   3099         if (t->getID() == expID &&
   3100             u->getID() == expInvID) {
   3101             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
   3102         } else {
   3103             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
   3104                   t->getID() + " x getInverse() => " + u->getID() +
   3105                   ", expected " + expInvID);
   3106         }
   3107         delete t;
   3108         delete u;
   3109     }
   3110 }
   3111 
   3112 static const UChar SPACE[]   = {32,0};
   3113 static const UChar NEWLINE[] = {10,0};
   3114 static const UChar RETURN[]  = {13,0};
   3115 static const UChar EMPTY[]   = {0};
   3116 
   3117 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
   3118                                     const UnicodeString& testRulesForward) {
   3119     UnicodeString rules2; t2.toRules(rules2, TRUE);
   3120     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
   3121     rules2.findAndReplace(SPACE, EMPTY);
   3122     rules2.findAndReplace(NEWLINE, EMPTY);
   3123     rules2.findAndReplace(RETURN, EMPTY);
   3124 
   3125     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
   3126 
   3127     if (rules2 != testRules) {
   3128         errln(label);
   3129         logln((UnicodeString)"GENERATED RULES: " + rules2);
   3130         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
   3131     }
   3132 }
   3133 
   3134 /**
   3135  * Mark's toRules test.
   3136  */
   3137 void TransliteratorTest::TestToRulesMark() {
   3138     const char* testRules =
   3139         "::[[:Latin:][:Mark:]];"
   3140         "::NFKD (NFC);"
   3141         "::Lower (Lower);"
   3142         "a <> \\u03B1;" // alpha
   3143         "::NFKC (NFD);"
   3144         "::Upper (Lower);"
   3145         "::Lower ();"
   3146         "::([[:Greek:][:Mark:]]);"
   3147         ;
   3148     const char* testRulesForward =
   3149         "::[[:Latin:][:Mark:]];"
   3150         "::NFKD(NFC);"
   3151         "::Lower(Lower);"
   3152         "a > \\u03B1;"
   3153         "::NFKC(NFD);"
   3154         "::Upper (Lower);"
   3155         "::Lower ();"
   3156         ;
   3157     const char* testRulesBackward =
   3158         "::[[:Greek:][:Mark:]];"
   3159         "::Lower (Upper);"
   3160         "::NFD(NFKC);"
   3161         "\\u03B1 > a;"
   3162         "::Lower(Lower);"
   3163         "::NFC(NFKD);"
   3164         ;
   3165     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
   3166     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
   3167 
   3168     UParseError pe;
   3169     UErrorCode ec = U_ZERO_ERROR;
   3170     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
   3171     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
   3172 
   3173     if (U_FAILURE(ec)) {
   3174         delete t2;
   3175         delete t3;
   3176         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
   3177         return;
   3178     }
   3179 
   3180     expect(*t2, source, target);
   3181     expect(*t3, target, source);
   3182 
   3183     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
   3184     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
   3185 
   3186     delete t2;
   3187     delete t3;
   3188 }
   3189 
   3190 /**
   3191  * Test Escape and Unescape transliterators.
   3192  */
   3193 void TransliteratorTest::TestEscape() {
   3194     UParseError pe;
   3195     UErrorCode ec;
   3196     Transliterator *t;
   3197 
   3198     ec = U_ZERO_ERROR;
   3199     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
   3200     if (U_FAILURE(ec)) {
   3201         errln((UnicodeString)"FAIL: createInstance");
   3202     } else {
   3203         expect(*t,
   3204                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
   3205                "@12Q");
   3206     }
   3207     delete t;
   3208 
   3209     ec = U_ZERO_ERROR;
   3210     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
   3211     if (U_FAILURE(ec)) {
   3212         errln((UnicodeString)"FAIL: createInstance");
   3213     } else {
   3214         expect(*t,
   3215                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
   3216                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
   3217     }
   3218     delete t;
   3219 
   3220     ec = U_ZERO_ERROR;
   3221     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
   3222     if (U_FAILURE(ec)) {
   3223         errln((UnicodeString)"FAIL: createInstance");
   3224     } else {
   3225         expect(*t,
   3226                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
   3227                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
   3228     }
   3229     delete t;
   3230 
   3231     ec = U_ZERO_ERROR;
   3232     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
   3233     if (U_FAILURE(ec)) {
   3234         errln((UnicodeString)"FAIL: createInstance");
   3235     } else {
   3236         expect(*t,
   3237                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
   3238                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
   3239     }
   3240     delete t;
   3241 }
   3242 
   3243 
   3244 void TransliteratorTest::TestAnchorMasking(){
   3245     UnicodeString rule ("^a > Q; a > q;");
   3246     UErrorCode status= U_ZERO_ERROR;
   3247     UParseError parseError;
   3248 
   3249     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
   3250     if(U_FAILURE(status)){
   3251         errln(UnicodeString("FAIL: ") + "ID" +
   3252               ".createFromRules() => bad rules" +
   3253               /*", parse error " + parseError.code +*/
   3254               ", line " + parseError.line +
   3255               ", offset " + parseError.offset +
   3256               ", context " + prettify(parseError.preContext, TRUE) +
   3257               ", rules: " + prettify(rule, TRUE));
   3258     }
   3259     delete t;
   3260 }
   3261 
   3262 /**
   3263  * Make sure display names of variants look reasonable.
   3264  */
   3265 void TransliteratorTest::TestDisplayName() {
   3266 #if UCONFIG_NO_FORMATTING
   3267     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
   3268     return;
   3269 #else
   3270     static const char* DATA[] = {
   3271         // ID, forward name, reverse name
   3272         // Update the text as necessary -- the important thing is
   3273         // not the text itself, but how various cases are handled.
   3274 
   3275         // Basic test
   3276         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
   3277 
   3278         // Variants
   3279         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
   3280 
   3281         // Target-only IDs
   3282         "NFC", "Any to NFC", "Any to NFD",
   3283     };
   3284 
   3285     int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
   3286 
   3287     Locale US("en", "US");
   3288 
   3289     for (int32_t i=0; i<DATA_length; i+=3) {
   3290         UnicodeString name;
   3291         Transliterator::getDisplayName(DATA[i], US, name);
   3292         if (name != DATA[i+1]) {
   3293             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
   3294                   name + ", expected " + DATA[i+1]);
   3295         } else {
   3296             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
   3297         }
   3298         UErrorCode ec = U_ZERO_ERROR;
   3299         UParseError pe;
   3300         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
   3301         if (U_FAILURE(ec)) {
   3302             delete t;
   3303             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
   3304             continue;
   3305         }
   3306         name = Transliterator::getDisplayName(t->getID(), US, name);
   3307         if (name != DATA[i+2]) {
   3308             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
   3309                   name + ", expected " + DATA[i+2]);
   3310         } else {
   3311             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
   3312         }
   3313         delete t;
   3314     }
   3315 #endif
   3316 }
   3317 
   3318 void TransliteratorTest::TestSpecialCases(void) {
   3319     const UnicodeString registerRules[] = {
   3320         "Any-Dev1", "x > X; y > Y;",
   3321         "Any-Dev2", "XY > Z",
   3322         "Greek-Latin/FAKE",
   3323             CharsToUnicodeString
   3324             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
   3325         "" // END MARKER
   3326     };
   3327 
   3328     const UnicodeString testCases[] = {
   3329         // NORMALIZATION
   3330         // should add more test cases
   3331         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3332         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3333         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3334         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3335 
   3336         // mp -> b BUG
   3337         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
   3338         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
   3339 
   3340         // check for devanagari bug
   3341         "nfd;Dev1;Dev2;nfc", "xy", "Z",
   3342 
   3343         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
   3344         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3345                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
   3346 
   3347         //TODO: enable this test once Titlecase works right
   3348         /*
   3349         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3350                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
   3351                  */
   3352         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3353                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
   3354         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3355                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
   3356 
   3357         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
   3358         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
   3359 
   3360          // FORMS OF S
   3361         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
   3362                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
   3363         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
   3364                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
   3365         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
   3366                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
   3367         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
   3368                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
   3369         // Tatiana bug
   3370         // Upper: TAT\\u02B9\\u00C2NA
   3371         // Lower: tat\\u02B9\\u00E2na
   3372         // Title: Tat\\u02B9\\u00E2na
   3373         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
   3374                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
   3375         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
   3376                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
   3377         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
   3378                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
   3379 
   3380         "" // END MARKER
   3381     };
   3382 
   3383     UParseError pos;
   3384     int32_t i;
   3385     for (i = 0; registerRules[i].length()!=0; i+=2) {
   3386         UErrorCode status = U_ZERO_ERROR;
   3387 
   3388         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
   3389             registerRules[i+1], UTRANS_FORWARD, pos, status);
   3390         if (U_FAILURE(status)) {
   3391             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
   3392         } else {
   3393             Transliterator::registerInstance(t);
   3394         }
   3395     }
   3396     for (i = 0; testCases[i].length()!=0; i+=3) {
   3397         UErrorCode ec = U_ZERO_ERROR;
   3398         UParseError pe;
   3399         const UnicodeString& name = testCases[i];
   3400         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
   3401         if (U_FAILURE(ec)) {
   3402             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
   3403             delete t;
   3404             continue;
   3405         }
   3406         const UnicodeString& id = t->getID();
   3407         const UnicodeString& source = testCases[i+1];
   3408         UnicodeString target;
   3409 
   3410         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
   3411 
   3412         if (testCases[i+2].length() > 0) {
   3413             target = testCases[i+2];
   3414         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
   3415             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
   3416         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
   3417             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
   3418         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
   3419             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
   3420         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
   3421             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
   3422         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
   3423             target = source;
   3424             target.toLower(Locale::getUS());
   3425         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
   3426             target = source;
   3427             target.toUpper(Locale::getUS());
   3428         }
   3429         if (U_FAILURE(ec)) {
   3430             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
   3431             continue;
   3432         }
   3433 
   3434         expect(*t, source, target);
   3435         delete t;
   3436     }
   3437     for (i = 0; registerRules[i].length()!=0; i+=2) {
   3438         Transliterator::unregister(registerRules[i]);
   3439     }
   3440 }
   3441 
   3442 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
   3443     if (ch <= 0xFFFF) {
   3444         sprintf(buffer, "\\u%04x", (int)ch);
   3445     } else {
   3446         sprintf(buffer, "\\U%08x", (int)ch);
   3447     }
   3448     return buffer;
   3449 }
   3450 
   3451 void TransliteratorTest::TestSurrogateCasing (void) {
   3452     // check that casing handles surrogates
   3453     // titlecase is currently defective
   3454     char buffer[20];
   3455     UChar buffer2[20];
   3456     UChar32 dee;
   3457     UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
   3458     UnicodeString DEE(u_totitle(dee));
   3459     if (DEE != DESERET_DEE) {
   3460         err("Fails titlecase of surrogates");
   3461         err(Char32ToEscapedChars(dee, buffer));
   3462         err(", ");
   3463         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
   3464     }
   3465 
   3466     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
   3467     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
   3468     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
   3469     UErrorCode status= U_ZERO_ERROR;
   3470 
   3471     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
   3472     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
   3473         errln("Fails: Can't uppercase surrogates.");
   3474     }
   3475 
   3476     status= U_ZERO_ERROR;
   3477     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
   3478     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
   3479         errln("Fails: Can't lowercase surrogates.");
   3480     }
   3481 }
   3482 
   3483 static void _trans(Transliterator& t, const UnicodeString& src,
   3484                    UnicodeString& result) {
   3485     result = src;
   3486     t.transliterate(result);
   3487 }
   3488 
   3489 static void _trans(const UnicodeString& id, const UnicodeString& src,
   3490                    UnicodeString& result, UErrorCode ec) {
   3491     UParseError pe;
   3492     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
   3493     if (U_SUCCESS(ec)) {
   3494         _trans(*t, src, result);
   3495     }
   3496     delete t;
   3497 }
   3498 
   3499 static UnicodeString _findMatch(const UnicodeString& source,
   3500                                        const UnicodeString* pairs) {
   3501     UnicodeString empty;
   3502     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
   3503         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
   3504             return pairs[i+1];
   3505         }
   3506     }
   3507     return empty;
   3508 }
   3509 
   3510 // Check to see that incremental gets at least part way through a reasonable string.
   3511 
   3512 void TransliteratorTest::TestIncrementalProgress(void) {
   3513     UErrorCode ec = U_ZERO_ERROR;
   3514     UnicodeString latinTest = "The Quick Brown Fox.";
   3515     UnicodeString devaTest;
   3516     _trans("Latin-Devanagari", latinTest, devaTest, ec);
   3517     UnicodeString kataTest;
   3518     _trans("Latin-Katakana", latinTest, kataTest, ec);
   3519     if (U_FAILURE(ec)) {
   3520         errln("FAIL: Internal error");
   3521         return;
   3522     }
   3523     const UnicodeString tests[] = {
   3524         "Any", latinTest,
   3525         "Latin", latinTest,
   3526         "Halfwidth", latinTest,
   3527         "Devanagari", devaTest,
   3528         "Katakana", kataTest,
   3529         "" // END MARKER
   3530     };
   3531 
   3532     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
   3533     int32_t i = 0, j=0, k=0;
   3534     int32_t sources = Transliterator::countAvailableSources();
   3535     for (i = 0; i < sources; i++) {
   3536         UnicodeString source;
   3537         Transliterator::getAvailableSource(i, source);
   3538         UnicodeString test = _findMatch(source, tests);
   3539         if (test.length() == 0) {
   3540             logln((UnicodeString)"Skipping " + source + "-X");
   3541             continue;
   3542         }
   3543         int32_t targets = Transliterator::countAvailableTargets(source);
   3544         for (j = 0; j < targets; j++) {
   3545             UnicodeString target;
   3546             Transliterator::getAvailableTarget(j, source, target);
   3547             int32_t variants = Transliterator::countAvailableVariants(source, target);
   3548             for (k =0; k< variants; k++) {
   3549                 UnicodeString variant;
   3550                 UParseError err;
   3551                 UErrorCode status = U_ZERO_ERROR;
   3552 
   3553                 Transliterator::getAvailableVariant(k, source, target, variant);
   3554                 UnicodeString id = source + "-" + target + "/" + variant;
   3555 
   3556                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
   3557                 if (U_FAILURE(status)) {
   3558                     errln((UnicodeString)"FAIL: Could not create " + id);
   3559                     delete t;
   3560                     continue;
   3561                 }
   3562                 status = U_ZERO_ERROR;
   3563                 CheckIncrementalAux(t, test);
   3564 
   3565                 UnicodeString rev;
   3566                 _trans(*t, test, rev);
   3567                 Transliterator *inv = t->createInverse(status);
   3568                 if (U_FAILURE(status)) {
   3569                     errln((UnicodeString)"FAIL: Could not create inverse of " + id);
   3570                     delete t;
   3571                     delete inv;
   3572                     continue;
   3573                 }
   3574                 CheckIncrementalAux(inv, rev);
   3575                 delete t;
   3576                 delete inv;
   3577             }
   3578         }
   3579     }
   3580 }
   3581 
   3582 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
   3583                                                       const UnicodeString& input) {
   3584     UErrorCode ec = U_ZERO_ERROR;
   3585     UTransPosition pos;
   3586     UnicodeString test = input;
   3587 
   3588     pos.contextStart = 0;
   3589     pos.contextLimit = input.length();
   3590     pos.start = 0;
   3591     pos.limit = input.length();
   3592 
   3593     t->transliterate(test, pos, ec);
   3594     if (U_FAILURE(ec)) {
   3595         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
   3596         return;
   3597     }
   3598     UBool gotError = FALSE;
   3599 
   3600     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
   3601 
   3602     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
   3603         errln((UnicodeString)"No Progress, " +
   3604               t->getID() + ": " + formatInput(test, input, pos));
   3605         gotError = TRUE;
   3606     } else {
   3607         logln((UnicodeString)"PASS Progress, " +
   3608               t->getID() + ": " + formatInput(test, input, pos));
   3609     }
   3610     t->finishTransliteration(test, pos);
   3611     if (pos.start != pos.limit) {
   3612         errln((UnicodeString)"Incomplete, " +
   3613               t->getID() + ": " + formatInput(test, input, pos));
   3614         gotError = TRUE;
   3615     }
   3616 }
   3617 
   3618 void TransliteratorTest::TestFunction() {
   3619     // Careful with spacing and ';' here:  Phrase this exactly
   3620     // as toRules() is going to return it.  If toRules() changes
   3621     // with regard to spacing or ';', then adjust this string.
   3622     UnicodeString rule =
   3623         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
   3624 
   3625     UParseError pe;
   3626     UErrorCode ec = U_ZERO_ERROR;
   3627     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3628     if (t == NULL) {
   3629         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
   3630         return;
   3631     }
   3632 
   3633     UnicodeString r;
   3634     t->toRules(r, TRUE);
   3635     if (r == rule) {
   3636         logln((UnicodeString)"OK: toRules() => " + r);
   3637     } else {
   3638         errln((UnicodeString)"FAIL: toRules() => " + r +
   3639               ", expected " + rule);
   3640     }
   3641 
   3642     expect(*t, "The Quick Brown Fox",
   3643            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
   3644 
   3645     delete t;
   3646 }
   3647 
   3648 void TransliteratorTest::TestInvalidBackRef(void) {
   3649     UnicodeString rule =  ". > $1;";
   3650     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
   3651     UParseError pe;
   3652     UErrorCode ec = U_ZERO_ERROR;
   3653     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3654     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
   3655 
   3656     if (t != NULL) {
   3657         errln("FAIL: createFromRules should have returned NULL");
   3658         delete t;
   3659     }
   3660 
   3661     if (t2 != NULL) {
   3662         errln("FAIL: createFromRules should have returned NULL");
   3663         delete t2;
   3664     }
   3665 
   3666     if (U_SUCCESS(ec)) {
   3667         errln("FAIL: Ok: . > $1; => no error");
   3668     } else {
   3669         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
   3670     }
   3671 }
   3672 
   3673 void TransliteratorTest::TestMulticharStringSet() {
   3674     // Basic testing
   3675     const char* rule =
   3676         "       [{aa}]       > x;"
   3677         "         a          > y;"
   3678         "       [b{bc}]      > z;"
   3679         "[{gd}] { e          > q;"
   3680         "         e } [{fg}] > r;" ;
   3681 
   3682     UParseError pe;
   3683     UErrorCode ec = U_ZERO_ERROR;
   3684     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3685     if (t == NULL || U_FAILURE(ec)) {
   3686         delete t;
   3687         errln("FAIL: createFromRules failed");
   3688         return;
   3689     }
   3690 
   3691     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
   3692            "y x yz z d gd de gdq gdqfg ddrfg");
   3693     delete t;
   3694 
   3695     // Overlapped string test.  Make sure that when multiple
   3696     // strings can match that the longest one is matched.
   3697     rule =
   3698         "    [a {ab} {abc}]    > x;"
   3699         "           b          > y;"
   3700         "           c          > z;"
   3701         " q [t {st} {rst}] { e > p;" ;
   3702 
   3703     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3704     if (t == NULL || U_FAILURE(ec)) {
   3705         delete t;
   3706         errln("FAIL: createFromRules failed");
   3707         return;
   3708     }
   3709 
   3710     expect(*t, "a ab abc qte qste qrste",
   3711            "x x x qtp qstp qrstp");
   3712     delete t;
   3713 }
   3714 
   3715 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   3716 // BEGIN TestUserFunction support factory
   3717 
   3718 Transliterator* _TUFF[4];
   3719 UnicodeString* _TUFID[4];
   3720 
   3721 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
   3722                                    Transliterator::Token context) {
   3723     return _TUFF[context.integer]->clone();
   3724 }
   3725 
   3726 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
   3727     _TUFF[n] = t;
   3728     _TUFID[n] = new UnicodeString(ID);
   3729     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
   3730 }
   3731 
   3732 static void _TUFUnreg(int32_t n) {
   3733     if (_TUFF[n] != NULL) {
   3734         Transliterator::unregister(*_TUFID[n]);
   3735         delete _TUFF[n];
   3736         delete _TUFID[n];
   3737     }
   3738 }
   3739 
   3740 // END TestUserFunction support factory
   3741 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   3742 
   3743 /**
   3744  * Test that user-registered transliterators can be used under function
   3745  * syntax.
   3746  */
   3747 void TransliteratorTest::TestUserFunction() {
   3748 
   3749     Transliterator* t;
   3750     UParseError pe;
   3751     UErrorCode ec = U_ZERO_ERROR;
   3752 
   3753     // Setup our factory
   3754     int32_t i;
   3755     for (i=0; i<4; ++i) {
   3756         _TUFF[i] = NULL;
   3757     }
   3758 
   3759     // There's no need to register inverses if we don't use them
   3760     t = Transliterator::createFromRules("gif",
   3761                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
   3762                                         UTRANS_FORWARD, pe, ec);
   3763     if (t == NULL || U_FAILURE(ec)) {
   3764         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
   3765         return;
   3766     }
   3767     _TUFReg("Any-gif", t, 0);
   3768 
   3769     t = Transliterator::createFromRules("RemoveCurly",
   3770                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
   3771                                         UTRANS_FORWARD, pe, ec);
   3772     if (t == NULL || U_FAILURE(ec)) {
   3773         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
   3774         goto FAIL;
   3775     }
   3776     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
   3777     _TUFReg("Any-RemoveCurly", t, 1);
   3778 
   3779     logln("Trying &hex");
   3780     t = Transliterator::createFromRules("hex2",
   3781                                         "(.) > &hex($1);",
   3782                                         UTRANS_FORWARD, pe, ec);
   3783     if (t == NULL || U_FAILURE(ec)) {
   3784         errln("FAIL: createFromRules");
   3785         goto FAIL;
   3786     }
   3787     logln("Registering");
   3788     _TUFReg("Any-hex2", t, 2);
   3789     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
   3790     if (t == NULL || U_FAILURE(ec)) {
   3791         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
   3792         goto FAIL;
   3793     }
   3794     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
   3795     delete t;
   3796 
   3797     logln("Trying &gif");
   3798     t = Transliterator::createFromRules("gif2",
   3799                                         "(.) > &Gif(&Hex2($1));",
   3800                                         UTRANS_FORWARD, pe, ec);
   3801     if (t == NULL || U_FAILURE(ec)) {
   3802         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
   3803         goto FAIL;
   3804     }
   3805     logln("Registering");
   3806     _TUFReg("Any-gif2", t, 3);
   3807     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
   3808     if (t == NULL || U_FAILURE(ec)) {
   3809         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
   3810         goto FAIL;
   3811     }
   3812     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
   3813            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
   3814     delete t;
   3815 
   3816     // Test that filters are allowed after &
   3817     t = Transliterator::createFromRules("test",
   3818                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
   3819                                         UTRANS_FORWARD, pe, ec);
   3820     if (t == NULL || U_FAILURE(ec)) {
   3821         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
   3822         goto FAIL;
   3823     }
   3824     expect(*t, "abc",
   3825            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
   3826     delete t;
   3827 
   3828  FAIL:
   3829     for (i=0; i<4; ++i) {
   3830         _TUFUnreg(i);
   3831     }
   3832 }
   3833 
   3834 /**
   3835  * Test the Any-X transliterators.
   3836  */
   3837 void TransliteratorTest::TestAnyX(void) {
   3838     UParseError parseError;
   3839     UErrorCode status = U_ZERO_ERROR;
   3840     Transliterator* anyLatin =
   3841         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
   3842     if (anyLatin==0) {
   3843         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
   3844         delete anyLatin;
   3845         return;
   3846     }
   3847 
   3848     expect(*anyLatin,
   3849            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
   3850            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
   3851 
   3852     delete anyLatin;
   3853 }
   3854 
   3855 /**
   3856  * Test Any-X transliterators with sample letters from all scripts.
   3857  */
   3858 void TransliteratorTest::TestAny(void) {
   3859     UErrorCode status = U_ZERO_ERROR;
   3860     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
   3861     //       function call parameters going on in this test.
   3862     UnicodeSet alphabetic("[:alphabetic:]", status);
   3863     if (U_FAILURE(status)) {
   3864         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
   3865         return;
   3866     }
   3867     alphabetic.freeze();
   3868 
   3869     UnicodeString testString;
   3870     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
   3871         const char *scriptName = uscript_getShortName((UScriptCode)i);
   3872         if (scriptName == NULL) {
   3873             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
   3874             return;
   3875         }
   3876 
   3877         UnicodeSet sample;
   3878         sample.applyPropertyAlias("script", scriptName, status);
   3879         if (U_FAILURE(status)) {
   3880             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
   3881             return;
   3882         }
   3883         sample.retainAll(alphabetic);
   3884         for (int32_t count=0; count<5; count++) {
   3885             UChar32 c = sample.charAt(count);
   3886             if (c == -1) {
   3887                 break;
   3888             }
   3889             testString.append(c);
   3890         }
   3891     }
   3892 
   3893     UParseError parseError;
   3894     Transliterator* anyLatin =
   3895         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
   3896     if (U_FAILURE(status)) {
   3897         errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
   3898         return;
   3899     }
   3900 
   3901     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
   3902     anyLatin->transliterate(testString);
   3903     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
   3904     delete anyLatin;
   3905 }
   3906 
   3907 
   3908 /**
   3909  * Test the source and target set API.  These are only implemented
   3910  * for RBT and CompoundTransliterator at this time.
   3911  */
   3912 void TransliteratorTest::TestSourceTargetSet() {
   3913     UErrorCode ec = U_ZERO_ERROR;
   3914 
   3915     // Rules
   3916     const char* r =
   3917         "a > b; "
   3918         "r [x{lu}] > q;";
   3919 
   3920     // Expected source
   3921     UnicodeSet expSrc("[arx{lu}]", ec);
   3922 
   3923     // Expected target
   3924     UnicodeSet expTrg("[bq]", ec);
   3925 
   3926     UParseError pe;
   3927     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
   3928 
   3929     if (U_FAILURE(ec)) {
   3930         delete t;
   3931         errln("FAIL: Couldn't set up test");
   3932         return;
   3933     }
   3934 
   3935     UnicodeSet src; t->getSourceSet(src);
   3936     UnicodeSet trg; t->getTargetSet(trg);
   3937 
   3938     if (src == expSrc && trg == expTrg) {
   3939         UnicodeString a, b;
   3940         logln((UnicodeString)"Ok: " +
   3941               r + " => source = " + src.toPattern(a, TRUE) +
   3942               ", target = " + trg.toPattern(b, TRUE));
   3943     } else {
   3944         UnicodeString a, b, c, d;
   3945         errln((UnicodeString)"FAIL: " +
   3946               r + " => source = " + src.toPattern(a, TRUE) +
   3947               ", expected " + expSrc.toPattern(b, TRUE) +
   3948               "; target = " + trg.toPattern(c, TRUE) +
   3949               ", expected " + expTrg.toPattern(d, TRUE));
   3950     }
   3951 
   3952     delete t;
   3953 }
   3954 
   3955 /**
   3956  * Test handling of rule whitespace, for both RBT and UnicodeSet.
   3957  */
   3958 void TransliteratorTest::TestRuleWhitespace() {
   3959     // Rules
   3960     const char* r = "a > \\u200E b;";
   3961 
   3962     UErrorCode ec = U_ZERO_ERROR;
   3963     UParseError pe;
   3964     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
   3965 
   3966     if (U_FAILURE(ec)) {
   3967         errln("FAIL: Couldn't set up test");
   3968     } else {
   3969         expect(*t, "a", "b");
   3970     }
   3971     delete t;
   3972 
   3973     // UnicodeSet
   3974     ec = U_ZERO_ERROR;
   3975     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
   3976 
   3977     if (U_FAILURE(ec)) {
   3978         errln("FAIL: Couldn't set up test");
   3979     } else {
   3980         if (set.contains(0x200E)) {
   3981             errln("FAIL: U+200E not being ignored by UnicodeSet");
   3982         }
   3983     }
   3984 }
   3985 //======================================================================
   3986 // this method is in TestUScript.java
   3987 //======================================================================
   3988 void TransliteratorTest::TestAllCodepoints(){
   3989     UScriptCode code= USCRIPT_INVALID_CODE;
   3990     char id[256]={'\0'};
   3991     char abbr[256]={'\0'};
   3992     char newId[256]={'\0'};
   3993     char newAbbrId[256]={'\0'};
   3994     char oldId[256]={'\0'};
   3995     char oldAbbrId[256]={'\0'};
   3996 
   3997     UErrorCode status =U_ZERO_ERROR;
   3998     UParseError pe;
   3999 
   4000     for(uint32_t i = 0; i<=0x10ffff; i++){
   4001         code =  uscript_getScript(i,&status);
   4002         if(code == USCRIPT_INVALID_CODE){
   4003             errln("uscript_getScript for codepoint \\U%08X failed.\n", i);
   4004         }
   4005         const char* myId = uscript_getName(code);
   4006         if(!myId) {
   4007           dataerrln("Valid script code returned NULL name. Check your data!");
   4008           return;
   4009         }
   4010         uprv_strcpy(id,myId);
   4011         uprv_strcpy(abbr,uscript_getShortName(code));
   4012 
   4013         uprv_strcpy(newId,"[:");
   4014         uprv_strcat(newId,id);
   4015         uprv_strcat(newId,":];NFD");
   4016 
   4017         uprv_strcpy(newAbbrId,"[:");
   4018         uprv_strcat(newAbbrId,abbr);
   4019         uprv_strcat(newAbbrId,":];NFD");
   4020 
   4021         if(uprv_strcmp(newId,oldId)!=0){
   4022             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
   4023             if(t==NULL || U_FAILURE(status)){
   4024                 errln((UnicodeString)"FAIL: Could not create " + id);
   4025             }
   4026             delete t;
   4027         }
   4028         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
   4029             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
   4030             if(t==NULL || U_FAILURE(status)){
   4031                 errln((UnicodeString)"FAIL: Could not create " + id);
   4032             }
   4033             delete t;
   4034         }
   4035         uprv_strcpy(oldId,newId);
   4036         uprv_strcpy(oldAbbrId, newAbbrId);
   4037 
   4038     }
   4039 
   4040 }
   4041 
   4042 #define TEST_TRANSLIT_ID(id, cls) { \
   4043   UErrorCode ec = U_ZERO_ERROR; \
   4044   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
   4045   if (U_FAILURE(ec)) { \
   4046     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
   4047   } else { \
   4048     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
   4049       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
   4050     } \
   4051     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
   4052   } \
   4053   delete t; \
   4054 }
   4055 
   4056 #define TEST_TRANSLIT_RULE(rule, cls) { \
   4057   UErrorCode ec = U_ZERO_ERROR; \
   4058   UParseError pe; \
   4059   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
   4060   if (U_FAILURE(ec)) { \
   4061     errln("FAIL: Couldn't create " rule); \
   4062   } else { \
   4063     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
   4064       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
   4065     } \
   4066     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
   4067   } \
   4068   delete t; \
   4069 }
   4070 
   4071 void TransliteratorTest::TestBoilerplate() {
   4072     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
   4073     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
   4074     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
   4075     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
   4076     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
   4077     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
   4078     TEST_TRANSLIT_ID("Null", NullTransliterator);
   4079     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
   4080     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
   4081     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
   4082     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
   4083     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
   4084     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
   4085 }
   4086 
   4087 void TransliteratorTest::TestAlternateSyntax() {
   4088     // U+2206 == &
   4089     // U+2190 == <
   4090     // U+2192 == >
   4091     // U+2194 == <>
   4092     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
   4093            "abc",
   4094            "xbz");
   4095     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
   4096            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
   4097            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
   4098 }
   4099 
   4100 static const char* BEGIN_END_RULES[] = {
   4101     // [0]
   4102     "abc > xy;"
   4103     "aba > z;",
   4104 
   4105     // [1]
   4106 /*
   4107     "::BEGIN;"
   4108     "abc > xy;"
   4109     "::END;"
   4110     "::BEGIN;"
   4111     "aba > z;"
   4112     "::END;",
   4113 */
   4114     "", // test case commented out below, this is here to keep from messing up the indexes
   4115 
   4116     // [2]
   4117 /*
   4118     "abc > xy;"
   4119     "::BEGIN;"
   4120     "aba > z;"
   4121     "::END;",
   4122 */
   4123     "", // test case commented out below, this is here to keep from messing up the indexes
   4124 
   4125     // [3]
   4126 /*
   4127     "::BEGIN;"
   4128     "abc > xy;"
   4129     "::END;"
   4130     "aba > z;",
   4131 */
   4132     "", // test case commented out below, this is here to keep from messing up the indexes
   4133 
   4134     // [4]
   4135     "abc > xy;"
   4136     "::Null;"
   4137     "aba > z;",
   4138 
   4139     // [5]
   4140     "::Upper;"
   4141     "ABC > xy;"
   4142     "AB > x;"
   4143     "C > z;"
   4144     "::Upper;"
   4145     "XYZ > p;"
   4146     "XY > q;"
   4147     "Z > r;"
   4148     "::Upper;",
   4149 
   4150     // [6]
   4151     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4152     "$delim = [\\-$ws];"
   4153     "$ws $delim* > ' ';"
   4154     "'-' $delim* > '-';",
   4155 
   4156     // [7]
   4157     "::Null;"
   4158     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4159     "$delim = [\\-$ws];"
   4160     "$ws $delim* > ' ';"
   4161     "'-' $delim* > '-';",
   4162 
   4163     // [8]
   4164     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4165     "$delim = [\\-$ws];"
   4166     "$ws $delim* > ' ';"
   4167     "'-' $delim* > '-';"
   4168     "::Null;",
   4169 
   4170     // [9]
   4171     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4172     "$delim = [\\-$ws];"
   4173     "::Null;"
   4174     "$ws $delim* > ' ';"
   4175     "'-' $delim* > '-';",
   4176 
   4177     // [10]
   4178 /*
   4179     "::BEGIN;"
   4180     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4181     "$delim = [\\-$ws];"
   4182     "::END;"
   4183     "$ws $delim* > ' ';"
   4184     "'-' $delim* > '-';",
   4185 */
   4186     "", // test case commented out below, this is here to keep from messing up the indexes
   4187 
   4188     // [11]
   4189 /*
   4190     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4191     "$delim = [\\-$ws];"
   4192     "::BEGIN;"
   4193     "$ws $delim* > ' ';"
   4194     "'-' $delim* > '-';"
   4195     "::END;",
   4196 */
   4197     "", // test case commented out below, this is here to keep from messing up the indexes
   4198 
   4199     // [12]
   4200 /*
   4201     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4202     "$delim = [\\-$ws];"
   4203     "$ab = [ab];"
   4204     "::BEGIN;"
   4205     "$ws $delim* > ' ';"
   4206     "'-' $delim* > '-';"
   4207     "::END;"
   4208     "::BEGIN;"
   4209     "$ab { ' ' } $ab > '-';"
   4210     "c { ' ' > ;"
   4211     "::END;"
   4212     "::BEGIN;"
   4213     "'a-a' > a\\%|a;"
   4214     "::END;",
   4215 */
   4216     "", // test case commented out below, this is here to keep from messing up the indexes
   4217 
   4218     // [13]
   4219     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4220     "$delim = [\\-$ws];"
   4221     "$ab = [ab];"
   4222     "::Null;"
   4223     "$ws $delim* > ' ';"
   4224     "'-' $delim* > '-';"
   4225     "::Null;"
   4226     "$ab { ' ' } $ab > '-';"
   4227     "c { ' ' > ;"
   4228     "::Null;"
   4229     "'a-a' > a\\%|a;",
   4230 
   4231     // [14]
   4232 /*
   4233     "::[abc];"
   4234     "::BEGIN;"
   4235     "abc > xy;"
   4236     "::END;"
   4237     "::BEGIN;"
   4238     "aba > yz;"
   4239     "::END;"
   4240     "::Upper;",
   4241 */
   4242     "", // test case commented out below, this is here to keep from messing up the indexes
   4243 
   4244     // [15]
   4245     "::[abc];"
   4246     "abc > xy;"
   4247     "::Null;"
   4248     "aba > yz;"
   4249     "::Upper;",
   4250 
   4251     // [16]
   4252 /*
   4253     "::[abc];"
   4254     "::BEGIN;"
   4255     "abc <> xy;"
   4256     "::END;"
   4257     "::BEGIN;"
   4258     "aba <> yz;"
   4259     "::END;"
   4260     "::Upper(Lower);"
   4261     "::([XYZ]);"
   4262 */
   4263     "", // test case commented out below, this is here to keep from messing up the indexes
   4264 
   4265     // [17]
   4266     "::[abc];"
   4267     "abc <> xy;"
   4268     "::Null;"
   4269     "aba <> yz;"
   4270     "::Upper(Lower);"
   4271     "::([XYZ]);"
   4272 };
   4273 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
   4274 
   4275 /*
   4276 (This entire test is commented out below and will need some heavy revision when we re-add
   4277 the ::BEGIN/::END stuff)
   4278 static const char* BOGUS_BEGIN_END_RULES[] = {
   4279     // [7]
   4280     "::BEGIN;"
   4281     "abc > xy;"
   4282     "::BEGIN;"
   4283     "aba > z;"
   4284     "::END;"
   4285     "::END;",
   4286 
   4287     // [8]
   4288     "abc > xy;"
   4289     " aba > z;"
   4290     "::END;",
   4291 
   4292     // [9]
   4293     "::BEGIN;"
   4294     "::Upper;"
   4295     "::END;"
   4296 };
   4297 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
   4298 */
   4299 
   4300 static const char* BEGIN_END_TEST_CASES[] = {
   4301     // rules             input                   expected output
   4302     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
   4303 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
   4304 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
   4305 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
   4306     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
   4307     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
   4308 
   4309     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
   4310     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
   4311     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
   4312     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
   4313 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
   4314 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
   4315 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
   4316 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
   4317 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
   4318     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
   4319     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
   4320     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
   4321 
   4322 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
   4323     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
   4324 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
   4325     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
   4326 };
   4327 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
   4328 
   4329 void TransliteratorTest::TestBeginEnd() {
   4330     // run through the list of test cases above
   4331     int32_t i = 0;
   4332     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
   4333         expect((UnicodeString)"Test case #" + (i / 3),
   4334                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
   4335                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
   4336                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
   4337     }
   4338 
   4339     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
   4340     UParseError parseError;
   4341     UErrorCode status = U_ZERO_ERROR;
   4342     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
   4343             UTRANS_REVERSE, parseError, status);
   4344     if (reversed == 0 || U_FAILURE(status)) {
   4345         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
   4346     } else {
   4347         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
   4348     }
   4349     delete reversed;
   4350 
   4351     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
   4352     // that all of them cause errors
   4353 /*
   4354 (commented out until we have the real ::BEGIN/::END stuff in place
   4355     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
   4356         UParseError parseError;
   4357         UErrorCode status = U_ZERO_ERROR;
   4358         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
   4359                 UTRANS_FORWARD, parseError, status);
   4360         if (!U_FAILURE(status)) {
   4361             delete t;
   4362             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
   4363         }
   4364     }
   4365 */
   4366 }
   4367 
   4368 void TransliteratorTest::TestBeginEndToRules() {
   4369     // run through the same list of test cases we used above, but this time, instead of just
   4370     // instantiating a Transliterator from the rules and running the test against it, we instantiate
   4371     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
   4372     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
   4373     // to (i.e., does the same thing as) the original rule set
   4374     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
   4375         UParseError parseError;
   4376         UErrorCode status = U_ZERO_ERROR;
   4377         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
   4378                 UTRANS_FORWARD, parseError, status);
   4379         if (U_FAILURE(status)) {
   4380             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
   4381         } else {
   4382             UnicodeString rules;
   4383             t->toRules(rules, TRUE);
   4384             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
   4385                     UTRANS_FORWARD, parseError, status);
   4386             if (U_FAILURE(status)) {
   4387                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
   4388                         parseError, status);
   4389                 delete t;
   4390             } else {
   4391                 expect(*t2,
   4392                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
   4393                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
   4394                 delete t;
   4395                 delete t2;
   4396             }
   4397         }
   4398     }
   4399 
   4400     // do the same thing for the reversible test case
   4401     UParseError parseError;
   4402     UErrorCode status = U_ZERO_ERROR;
   4403     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
   4404             UTRANS_REVERSE, parseError, status);
   4405     if (U_FAILURE(status)) {
   4406         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
   4407     } else {
   4408         UnicodeString rules;
   4409         reversed->toRules(rules, FALSE);
   4410         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
   4411                 parseError, status);
   4412         if (U_FAILURE(status)) {
   4413             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
   4414                     parseError, status);
   4415             delete reversed;
   4416         } else {
   4417             expect(*reversed2,
   4418                    UnicodeString("xy XY XYZ yz YZ"),
   4419                    UnicodeString("xy abc xaba yz aba"));
   4420             delete reversed;
   4421             delete reversed2;
   4422         }
   4423     }
   4424 }
   4425 
   4426 void TransliteratorTest::TestRegisterAlias() {
   4427     UnicodeString longID("Lower;[aeiou]Upper");
   4428     UnicodeString shortID("Any-CapVowels");
   4429     UnicodeString reallyShortID("CapVowels");
   4430 
   4431     Transliterator::registerAlias(shortID, longID);
   4432 
   4433     UErrorCode err = U_ZERO_ERROR;
   4434     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
   4435     if (U_FAILURE(err)) {
   4436         errln("Failed to instantiate transliterator with long ID");
   4437         Transliterator::unregister(shortID);
   4438         return;
   4439     }
   4440     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
   4441     if (U_FAILURE(err)) {
   4442         errln("Failed to instantiate transliterator with short ID");
   4443         delete t1;
   4444         Transliterator::unregister(shortID);
   4445         return;
   4446     }
   4447 
   4448     if (t1->getID() != longID)
   4449         errln("Transliterator instantiated with long ID doesn't have long ID");
   4450     if (t2->getID() != reallyShortID)
   4451         errln("Transliterator instantiated with short ID doesn't have short ID");
   4452 
   4453     UnicodeString rules1;
   4454     UnicodeString rules2;
   4455 
   4456     t1->toRules(rules1, TRUE);
   4457     t2->toRules(rules2, TRUE);
   4458     if (rules1 != rules2)
   4459         errln("Alias transliterators aren't the same");
   4460 
   4461     delete t1;
   4462     delete t2;
   4463     Transliterator::unregister(shortID);
   4464 
   4465     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
   4466     if (U_SUCCESS(err)) {
   4467         errln("Instantiation with short ID succeeded after short ID was unregistered");
   4468         delete t1;
   4469     }
   4470 
   4471     // try the same thing again, but this time with something other than
   4472     // an instance of CompoundTransliterator
   4473     UnicodeString realID("Latin-Greek");
   4474     UnicodeString fakeID("Latin-dlgkjdflkjdl");
   4475     Transliterator::registerAlias(fakeID, realID);
   4476 
   4477     err = U_ZERO_ERROR;
   4478     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
   4479     if (U_FAILURE(err)) {
   4480         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
   4481         Transliterator::unregister(realID);
   4482         return;
   4483     }
   4484     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
   4485     if (U_FAILURE(err)) {
   4486         errln("Failed to instantiate transliterator with fake ID");
   4487         delete t1;
   4488         Transliterator::unregister(realID);
   4489         return;
   4490     }
   4491 
   4492     t1->toRules(rules1, TRUE);
   4493     t2->toRules(rules2, TRUE);
   4494     if (rules1 != rules2)
   4495         errln("Alias transliterators aren't the same");
   4496 
   4497     delete t1;
   4498     delete t2;
   4499     Transliterator::unregister(fakeID);
   4500 }
   4501 
   4502 void TransliteratorTest::TestRuleStripping() {
   4503     /*
   4504 #
   4505 \uE001>\u0C01; # SIGN
   4506     */
   4507     static const UChar rule[] = {
   4508         0x0023,0x0020,0x000D,0x000A,
   4509         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
   4510     };
   4511     static const UChar expectedRule[] = {
   4512         0xE001,0x003E,0x0C01,0x003B,0
   4513     };
   4514     UChar result[sizeof(rule)/sizeof(rule[0])];
   4515     UErrorCode status = U_ZERO_ERROR;
   4516     int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
   4517     if (len != u_strlen(expectedRule)) {
   4518         errln("utrans_stripRules return len = %d", len);
   4519     }
   4520     if (u_strncmp(expectedRule, result, len) != 0) {
   4521         errln("utrans_stripRules did not return expected string");
   4522     }
   4523 }
   4524 
   4525 /**
   4526  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
   4527  */
   4528 void TransliteratorTest::TestHalfwidthFullwidth(void) {
   4529     UParseError parseError;
   4530     UErrorCode status = U_ZERO_ERROR;
   4531     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
   4532     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
   4533     if (hf == 0 || fh == 0) {
   4534         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   4535         delete hf;
   4536         delete fh;
   4537         return;
   4538     }
   4539 
   4540     // Array of 2n items
   4541     // Each item is
   4542     //   "hf"|"fh"|"both",
   4543     //   <Halfwidth>,
   4544     //   <Fullwidth>
   4545     const char* DATA[] = {
   4546         "both",
   4547         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
   4548         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
   4549     };
   4550     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
   4551 
   4552     for (int32_t i=0; i<DATA_length; i+=3) {
   4553         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
   4554         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
   4555         switch (*DATA[i]) {
   4556         case 0x68: //'h': // Halfwidth-Fullwidth only
   4557             expect(*hf, h, f);
   4558             break;
   4559         case 0x66: //'f': // Fullwidth-Halfwidth only
   4560             expect(*fh, f, h);
   4561             break;
   4562         case 0x62: //'b': // both directions
   4563             expect(*hf, h, f);
   4564             expect(*fh, f, h);
   4565             break;
   4566         }
   4567     }
   4568     delete hf;
   4569     delete fh;
   4570 }
   4571 
   4572 
   4573     /**
   4574      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
   4575      *              TODO: confirm that the expected results are correct.
   4576      *              For now, test just confirms that C++ and Java give identical results.
   4577      */
   4578 void TransliteratorTest::TestThai(void) {
   4579     UParseError parseError;
   4580     UErrorCode status = U_ZERO_ERROR;
   4581     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
   4582     if (tr == 0) {
   4583         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   4584         return;
   4585     }
   4586     if (U_FAILURE(status)) {
   4587         errln("FAIL: createInstance failed with %s", u_errorName(status));
   4588         return;
   4589     }
   4590     const char *thaiText =
   4591         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
   4592         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
   4593         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
   4594         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
   4595         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
   4596         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
   4597         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
   4598         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
   4599         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
   4600         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
   4601         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
   4602         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
   4603         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
   4604         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
   4605         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
   4606         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
   4607         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
   4608         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
   4609         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
   4610         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
   4611         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
   4612         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
   4613         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
   4614         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
   4615         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
   4616         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
   4617         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
   4618         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
   4619         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
   4620         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
   4621 
   4622     const char *latinText =
   4623         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
   4624         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
   4625         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
   4626         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
   4627         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
   4628         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
   4629         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
   4630         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
   4631         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
   4632         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
   4633         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
   4634         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
   4635         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
   4636         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
   4637         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
   4638         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
   4639         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
   4640         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
   4641 
   4642 
   4643     UnicodeString  xlitText(thaiText);
   4644     xlitText = xlitText.unescape();
   4645     tr->transliterate(xlitText);
   4646 
   4647     UnicodeString expectedText(latinText);
   4648     expectedText = expectedText.unescape();
   4649     expect(*tr, xlitText, expectedText);
   4650 
   4651     delete tr;
   4652 }
   4653 
   4654 
   4655 //======================================================================
   4656 // Support methods
   4657 //======================================================================
   4658 void TransliteratorTest::expectT(const UnicodeString& id,
   4659                                  const UnicodeString& source,
   4660                                  const UnicodeString& expectedResult) {
   4661     UErrorCode ec = U_ZERO_ERROR;
   4662     UParseError pe;
   4663     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
   4664     if (U_FAILURE(ec)) {
   4665         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
   4666         delete t;
   4667         return;
   4668     }
   4669     expect(*t, source, expectedResult);
   4670     delete t;
   4671 }
   4672 
   4673 void TransliteratorTest::reportParseError(const UnicodeString& message,
   4674                                           const UParseError& parseError,
   4675                                           const UErrorCode& status) {
   4676     dataerrln(message +
   4677           /*", parse error " + parseError.code +*/
   4678           ", line " + parseError.line +
   4679           ", offset " + parseError.offset +
   4680           ", pre-context " + prettify(parseError.preContext, TRUE) +
   4681           ", post-context " + prettify(parseError.postContext,TRUE) +
   4682           ", Error: " + u_errorName(status));
   4683 }
   4684 
   4685 void TransliteratorTest::expect(const UnicodeString& rules,
   4686                                 const UnicodeString& source,
   4687                                 const UnicodeString& expectedResult,
   4688                                 UTransPosition *pos) {
   4689     expect("<ID>", rules, source, expectedResult, pos);
   4690 }
   4691 
   4692 void TransliteratorTest::expect(const UnicodeString& id,
   4693                                 const UnicodeString& rules,
   4694                                 const UnicodeString& source,
   4695                                 const UnicodeString& expectedResult,
   4696                                 UTransPosition *pos) {
   4697     UErrorCode status = U_ZERO_ERROR;
   4698     UParseError parseError;
   4699     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
   4700     if (U_FAILURE(status)) {
   4701         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
   4702     } else {
   4703         expect(*t, source, expectedResult, pos);
   4704     }
   4705     delete t;
   4706 }
   4707 
   4708 void TransliteratorTest::expect(const Transliterator& t,
   4709                                 const UnicodeString& source,
   4710                                 const UnicodeString& expectedResult,
   4711                                 const Transliterator& reverseTransliterator) {
   4712     expect(t, source, expectedResult);
   4713     expect(reverseTransliterator, expectedResult, source);
   4714 }
   4715 
   4716 void TransliteratorTest::expect(const Transliterator& t,
   4717                                 const UnicodeString& source,
   4718                                 const UnicodeString& expectedResult,
   4719                                 UTransPosition *pos) {
   4720     if (pos == 0) {
   4721         UnicodeString result(source);
   4722         t.transliterate(result);
   4723         expectAux(t.getID() + ":String", source, result, expectedResult);
   4724     }
   4725     UTransPosition index={0, 0, 0, 0};
   4726     if (pos != 0) {
   4727         index = *pos;
   4728     }
   4729 
   4730     UnicodeString rsource(source);
   4731     if (pos == 0) {
   4732         t.transliterate(rsource);
   4733     } else {
   4734         // Do it all at once -- below we do it incrementally
   4735         t.finishTransliteration(rsource, *pos);
   4736     }
   4737     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
   4738 
   4739     // Test keyboard (incremental) transliteration -- this result
   4740     // must be the same after we finalize (see below).
   4741     UnicodeString log;
   4742     rsource.remove();
   4743     if (pos != 0) {
   4744         rsource = source;
   4745         formatInput(log, rsource, index);
   4746         log.append(" -> ");
   4747         UErrorCode status = U_ZERO_ERROR;
   4748         t.transliterate(rsource, index, status);
   4749         formatInput(log, rsource, index);
   4750     } else {
   4751         for (int32_t i=0; i<source.length(); ++i) {
   4752             if (i != 0) {
   4753                 log.append(" + ");
   4754             }
   4755             log.append(source.charAt(i)).append(" -> ");
   4756             UErrorCode status = U_ZERO_ERROR;
   4757             t.transliterate(rsource, index, source.charAt(i), status);
   4758             formatInput(log, rsource, index);
   4759         }
   4760     }
   4761 
   4762     // As a final step in keyboard transliteration, we must call
   4763     // transliterate to finish off any pending partial matches that
   4764     // were waiting for more input.
   4765     t.finishTransliteration(rsource, index);
   4766     log.append(" => ").append(rsource);
   4767 
   4768     expectAux(t.getID() + ":Keyboard", log,
   4769               rsource == expectedResult,
   4770               expectedResult);
   4771 }
   4772 
   4773 
   4774 /**
   4775  * @param appendTo result is appended to this param.
   4776  * @param input the string being transliterated
   4777  * @param pos the index struct
   4778  */
   4779 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
   4780                                                const UnicodeString& input,
   4781                                                const UTransPosition& pos) {
   4782     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
   4783     // the {} indicate the context start and limit, and the ||
   4784     // indicate the start and limit.
   4785     if (0 <= pos.contextStart &&
   4786         pos.contextStart <= pos.start &&
   4787         pos.start <= pos.limit &&
   4788         pos.limit <= pos.contextLimit &&
   4789         pos.contextLimit <= input.length()) {
   4790 
   4791         UnicodeString a, b, c, d, e;
   4792         input.extractBetween(0, pos.contextStart, a);
   4793         input.extractBetween(pos.contextStart, pos.start, b);
   4794         input.extractBetween(pos.start, pos.limit, c);
   4795         input.extractBetween(pos.limit, pos.contextLimit, d);
   4796         input.extractBetween(pos.contextLimit, input.length(), e);
   4797         appendTo.append(a).append((UChar)123/*{*/).append(b).
   4798             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
   4799             append((UChar)125/*}*/).append(e);
   4800     } else {
   4801         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
   4802                         pos.contextStart + ", s=" + pos.start + ", l=" +
   4803                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
   4804                         input);
   4805     }
   4806     return appendTo;
   4807 }
   4808 
   4809 void TransliteratorTest::expectAux(const UnicodeString& tag,
   4810                                    const UnicodeString& source,
   4811                                    const UnicodeString& result,
   4812                                    const UnicodeString& expectedResult) {
   4813     expectAux(tag, source + " -> " + result,
   4814               result == expectedResult,
   4815               expectedResult);
   4816 }
   4817 
   4818 void TransliteratorTest::expectAux(const UnicodeString& tag,
   4819                                    const UnicodeString& summary, UBool pass,
   4820                                    const UnicodeString& expectedResult) {
   4821     if (pass) {
   4822         logln(UnicodeString("(")+tag+") " + prettify(summary));
   4823     } else {
   4824         dataerrln(UnicodeString("FAIL: (")+tag+") "
   4825               + prettify(summary)
   4826               + ", expected " + prettify(expectedResult));
   4827     }
   4828 }
   4829 
   4830 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
   4831